Instrument ARM generated code to check the Marking Register.

Generate run-time code in the Optimizing compiler checking that
the Marking Register's value matches `self.tls32_.is.gc_marking`
in debug mode (on target; and on host with JIT, or with AOT when
compiling the core image). If a check fails, abort.

Test: m test-art-target
Test: m test-art-target with tree built with ART_USE_READ_BARRIER=false
Test: ARM device/emulator boot test with libartd
Bug: 37707231
Change-Id: I903f44d385d66ff74d65aa09d7113aa9cb7b9f24
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 430cdde..3d45dd3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -94,6 +94,9 @@
 // The reserved entrypoint register for link-time generated thunks.
 const vixl32::Register kBakerCcEntrypointRegister = r4;
 
+// Using a base helps identify when we hit Marking Register check breakpoints.
+constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
+
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
@@ -2690,6 +2693,8 @@
     __ Mov(temp, 0);
     GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
   }
+
+  MaybeGenerateMarkingRegisterCheck(/* code */ 1);
 }
 
 void CodeGeneratorARMVIXL::GenerateFrameExit() {
@@ -2938,6 +2943,7 @@
   }
   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2);
   }
   if (!codegen_->GoesToNextBlock(block, successor)) {
     __ B(codegen_->GetLabelOf(successor));
@@ -3655,6 +3661,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -3685,12 +3692,15 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4);
     return;
   }
 
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5);
 }
 
 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3709,11 +3719,14 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6);
     return;
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -3790,6 +3803,8 @@
     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
     DCHECK(!codegen_->IsLeafMethod());
   }
+
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8);
 }
 
 void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -3798,6 +3813,7 @@
 
 void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
   codegen_->GenerateInvokePolymorphicCall(invoke);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9);
 }
 
 void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
@@ -5329,6 +5345,7 @@
     codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
     CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10);
 }
 
 void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
@@ -5348,6 +5365,7 @@
   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
   DCHECK(!codegen_->IsLeafMethod());
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
 }
 
 void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -6965,6 +6983,7 @@
     return;
   }
   GenerateSuspendCheck(instruction, nullptr);
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12);
 }
 
 void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
@@ -7326,6 +7345,7 @@
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13);
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
@@ -7405,6 +7425,7 @@
     } else {
       __ Bind(slow_path->GetExitLabel());
     }
+    codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14);
   }
 }
 
@@ -7528,6 +7549,7 @@
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
+      codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15);
       return;
     }
     case HLoadString::LoadKind::kJitTableAddress: {
@@ -7548,6 +7570,7 @@
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16);
 }
 
 static int32_t GetExceptionTlsOffset() {
@@ -8146,6 +8169,7 @@
   } else {
     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17);
 }
 
 void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
@@ -8647,6 +8671,7 @@
     // Note that GC roots are not affected by heap poisoning, thus we
     // do not have to unpoison `root_reg` here.
   }
+  codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
 }
 
 void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8711,31 +8736,34 @@
         base.GetCode(), obj.GetCode(), narrow);
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
-    if (needs_null_check) {
-      MaybeRecordImplicitNullCheck(instruction);
-    }
-    // Note: We need a specific width for the unpoisoning NEG.
-    if (kPoisonHeapReferences) {
-      if (narrow) {
-        // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
-        __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
-      } else {
-        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+      if (needs_null_check) {
+        MaybeRecordImplicitNullCheck(instruction);
       }
+      // Note: We need a specific width for the unpoisoning NEG.
+      if (kPoisonHeapReferences) {
+        if (narrow) {
+          // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+          __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+        } else {
+          __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+        }
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+                       : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
-                     : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8796,23 +8824,26 @@
     vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
 
     __ Add(data_reg, obj, Operand(data_offset));
-    vixl::EmissionCheckScope guard(
-        GetVIXLAssembler(),
-        (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
-    vixl32::Label return_address;
-    EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
-    __ cmp(mr, Operand(0));
-    EmitPlaceholderBne(this, bne_label);
-    ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
-    __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
-    DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
-    // Note: We need a Wide NEG for the unpoisoning.
-    if (kPoisonHeapReferences) {
-      __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+    {
+      vixl::EmissionCheckScope guard(
+          GetVIXLAssembler(),
+          (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+      vixl32::Label return_address;
+      EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+      __ cmp(mr, Operand(0));
+      EmitPlaceholderBne(this, bne_label);
+      ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+      __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+      DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
+      // Note: We need a Wide NEG for the unpoisoning.
+      if (kPoisonHeapReferences) {
+        __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+      }
+      __ Bind(&return_address);
+      DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+                BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
     }
-    __ Bind(&return_address);
-    DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
-              BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+    MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
     return;
   }
 
@@ -8866,6 +8897,7 @@
   // Fast path: the GC is not marking: just load the reference.
   GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 21);
 }
 
 void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -8920,6 +8952,7 @@
   // Fast path: the GC is not marking: nothing to do (the field is
   // up-to-date, and we don't need to load the reference).
   __ Bind(slow_path->GetExitLabel());
+  MaybeGenerateMarkingRegisterCheck(/* code */ 22);
 }
 
 void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -8981,6 +9014,20 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 }
 
+void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+  // The following condition is a compile-time one, so it does not have a run-time cost.
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+    // The following condition is a run-time one; it is executed after the
+    // previous compile-time test, to avoid penalizing non-debug builds.
+    if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
+      GetAssembler()->GenerateMarkingRegisterCheck(temp,
+                                                   kMarkingRegisterCheckBreakCodeBaseCode + code);
+    }
+  }
+}
+
 void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
                                                    Location out,
                                                    Location ref,
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7ab2993..5feb33b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -661,6 +661,28 @@
                                 ScaleFactor scale_factor,
                                 bool needs_null_check);
 
+  // Emit code checking the status of the Marking Register, and
+  // aborting the program if MR does not match the value stored in the
+  // art::Thread object. Code is only emitted in debug mode and if
+  // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+  //
+  // Argument `code` is used to identify the different occurrences of
+  // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+  // used together with kMarkingRegisterCheckBreakCodeBaseCode to
+  // create the value passed to the BKPT instruction. Note that unlike
+  // in the ARM64 code generator, where `__LINE__` is passed as `code`
+  // argument to
+  // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot
+  // realistically do that here, as Encoding T1 for the BKPT
+  // instruction only accepts 8-bit immediate values.
+  //
+  // If `temp_loc` is a valid location, it is expected to be a
+  // register and will be used as a temporary to generate code;
+  // otherwise, a temporary will be fetched from the core register
+  // scratch pool.
+  virtual void MaybeGenerateMarkingRegisterCheck(int code,
+                                                 Location temp_loc = Location::NoLocation());
+
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
   //
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index bdd105f..aa4f5da 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -79,6 +79,21 @@
 };
 
 #ifdef ART_ENABLE_CODEGEN_arm
+// Special ARM code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (R9) and possibly the Marking Register
+//   (R8) before entering the generated function (both registers are
+//   callee-save in AAPCS);
+// - set these registers to meaningful values before or upon entering
+//   the generated function (so that generated code using them is
+//   correct);
+// - restore their original values before leaving the generated
+//   function.
+
 // Provide our own codegen, that ensures the C calling conventions
 // are preserved. Currently, ART and C do not match as R4 is caller-save
 // in ART, and callee-save in C. Alternatively, we could use or write
@@ -100,6 +115,16 @@
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
   }
+
+  void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
+                                         Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+    // When turned on, the marking register checks in
+    // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
+    // Thread Register and the Marking Register to be set to
+    // meaningful values. This is not the case in codegen testing, so
+    // just disable them entirely here (by doing nothing in this
+    // method).
+  }
 };
 #endif