Rewrite null checks in read barrier introspection thunks.

Rely on the implicit null check in the fast path.

Test: Manual; run-test --gdb 160, break in the introspection
      entrypoint, find the mf.testField0000 read barrier
      code in the caller (this one has a stack map for null
      check, most other reads do not need one), break there,
      step into the thunk, overwrite the base register with
      0 and observe the NPE being thrown. Repeat with --64.
Test: Pixel 2 XL boots.
Test: testrunner.py --target --optimizing
Bug: 36141117
Change-Id: I61f879f22f5697a4108f1021eb0e3add742c8755
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 31887d9..d4cfab8 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6838,7 +6838,8 @@
 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
                                      vixl::aarch64::Register base_reg,
                                      vixl::aarch64::MemOperand& lock_word,
-                                     vixl::aarch64::Label* slow_path) {
+                                     vixl::aarch64::Label* slow_path,
+                                     vixl::aarch64::Label* throw_npe = nullptr) {
   // Load the lock word containing the rb_state.
   __ Ldr(ip0.W(), lock_word);
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
@@ -6848,6 +6849,10 @@
   static_assert(
       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
       "Field and array LDR offsets must be the same to reuse the same code.");
+  // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+  if (throw_npe != nullptr) {
+    __ Bind(throw_npe);
+  }
   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
                 "Field LDR must be 1 instruction (4B) before the return address label; "
@@ -6877,10 +6882,6 @@
   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
   switch (kind) {
     case BakerReadBarrierKind::kField: {
-      // Check if the holder is gray and, if not, add fake dependency to the base register
-      // and return to the LDR instruction to load the reference. Otherwise, use introspection
-      // to load the reference and call the entrypoint (in IP1) that performs further checks
-      // on the reference and marks it if needed.
       auto base_reg =
           Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
       CheckValidReg(base_reg.GetCode());
@@ -6889,16 +6890,22 @@
       CheckValidReg(holder_reg.GetCode());
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip0, ip1);
-      // If base_reg differs from holder_reg, the offset was too large and we must have
-      // emitted an explicit null check before the load. Otherwise, we need to null-check
-      // the holder as we do not necessarily do that check before going to the thunk.
-      vixl::aarch64::Label throw_npe;
-      if (holder_reg.Is(base_reg)) {
-        __ Cbz(holder_reg.W(), &throw_npe);
+      // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+      // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+      // null-check the holder as we do not necessarily do that check before going to the thunk.
+      vixl::aarch64::Label throw_npe_label;
+      vixl::aarch64::Label* throw_npe = nullptr;
+      if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+        throw_npe = &throw_npe_label;
+        __ Cbz(holder_reg.W(), throw_npe);
       }
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint that performs further checks on the
+      // reference and marks it if needed.
       vixl::aarch64::Label slow_path;
       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
       __ Bind(&slow_path);
       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
@@ -6907,13 +6914,6 @@
       __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
       __ Br(ip1);                           // Jump to the entrypoint.
-      if (holder_reg.Is(base_reg)) {
-        // Add null check slow path. The stack map is at the address pointed to by LR.
-        __ Bind(&throw_npe);
-        int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset));
-        __ Br(ip0);
-      }
       break;
     }
     case BakerReadBarrierKind::kArray: {
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 15d9526..f370586 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -9905,7 +9905,8 @@
                                      vixl32::Register base_reg,
                                      vixl32::MemOperand& lock_word,
                                      vixl32::Label* slow_path,
-                                     int32_t raw_ldr_offset) {
+                                     int32_t raw_ldr_offset,
+                                     vixl32::Label* throw_npe = nullptr) {
   // Load the lock word containing the rb_state.
   __ Ldr(ip, lock_word);
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
@@ -9913,6 +9914,10 @@
   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
   __ B(ne, slow_path, /* is_far_target */ false);
+  // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+  if (throw_npe != nullptr) {
+    __ Bind(throw_npe);
+  }
   __ Add(lr, lr, raw_ldr_offset);
   // Introduce a dependency on the lock_word including rb_state,
   // to prevent load-load reordering, and without using
@@ -9926,7 +9931,7 @@
 static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
                                                        vixl32::Register entrypoint) {
   // The register where the read barrier introspection entrypoint is loaded
-  // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4).
+  // is fixed: `kBakerCcEntrypointRegister` (R4).
   DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
   // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
   DCHECK_EQ(ip.GetCode(), 12u);
@@ -9941,10 +9946,6 @@
   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
   switch (kind) {
     case BakerReadBarrierKind::kField: {
-      // Check if the holder is gray and, if not, add fake dependency to the base register
-      // and return to the LDR instruction to load the reference. Otherwise, use introspection
-      // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister)
-      // that performs further checks on the reference and marks it if needed.
       vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
       CheckValidReg(base_reg.GetCode());
       vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
@@ -9952,19 +9953,26 @@
       BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       temps.Exclude(ip);
-      // If base_reg differs from holder_reg, the offset was too large and we must have
-      // emitted an explicit null check before the load. Otherwise, we need to null-check
-      // the holder as we do not necessarily do that check before going to the thunk.
-      vixl32::Label throw_npe;
-      if (holder_reg.Is(base_reg)) {
-        __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false);
+      // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+      // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+      // null-check the holder as we do not necessarily do that check before going to the thunk.
+      vixl32::Label throw_npe_label;
+      vixl32::Label* throw_npe = nullptr;
+      if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+        throw_npe = &throw_npe_label;
+        __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target */ false);
       }
+      // Check if the holder is gray and, if not, add fake dependency to the base register
+      // and return to the LDR instruction to load the reference. Otherwise, use introspection
+      // to load the reference and call the entrypoint that performs further checks on the
+      // reference and marks it if needed.
       vixl32::Label slow_path;
       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
       const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
           ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
           : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
-      EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+      EmitGrayCheckAndFastPath(
+          assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
       __ Bind(&slow_path);
       const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
                                  raw_ldr_offset;
@@ -9986,13 +9994,6 @@
       }
       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
       __ Bx(ep_reg);                          // Jump to the entrypoint.
-      if (holder_reg.Is(base_reg)) {
-        // Add null check slow path. The stack map is at the address pointed to by LR.
-        __ Bind(&throw_npe);
-        int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value();
-        __ Ldr(ip, MemOperand(/* Thread* */ vixl32::r9, offset));
-        __ Bx(ip);
-      }
       break;
     }
     case BakerReadBarrierKind::kArray: {