arm/arm64: Use marking register in JNI stubs.

Do not load `is_gc_marking` from the `Thread` when it is
already available in r8 on arm and x20 on arm64.

Golem results for art-opt-cc on Odroid-C2 (higher is better):
linux-armv7                     before after
NativeDowncallStaticNormal      5.4429 5.5021 (+1.088%)
NativeDowncallStaticNormal6     5.1163 5.1498 (+0.6554%)
NativeDowncallStaticNormalRefs6 4.8876 4.9188 (+0.6394%)
NativeDowncallStaticFast        15.992 16.505 (+3.207%)
NativeDowncallStaticFast6       13.466 13.705 (+1.775%)
NativeDowncallStaticFastRefs6   11.994 12.183 (+1.578%)
linux-armv8                     before after
NativeDowncallStaticNormal      5.8594 5.9026 (+0.7378)
NativeDowncallStaticNormal6     5.5198 5.5607 (+0.7414)
NativeDowncallStaticNormalRefs6 5.1498 5.1862 (+0.7072)
NativeDowncallStaticFast        17.057 17.439 (+2.242%)
NativeDowncallStaticFast6       14.478 14.757 (+1.922%)
NativeDowncallStaticFastRefs6   12.183 12.376 (+1.584%)

Test: m test-art-host-gtest
Test: run-gtests.sh
Test: testrunner.py --target --optimizing --gcstress
Bug: 172332525
Change-Id: I595cd0e17a480cdfd86c548a4f9853f4b86f4047
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index c7241c1..1ebc850 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -995,15 +995,25 @@
   CHECK(label != nullptr);
 
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  vixl32::Register scratch = temps.Acquire();
+  vixl32::Register test_reg;
   DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
-  ___ Ldr(scratch, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  DCHECK(kUseReadBarrier);
+  if (kUseBakerReadBarrier) {
+    if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
+      vixl32::Register temp = temps.Acquire();
+      asm_.GenerateMarkingRegisterCheck(temp);
+    }
+    test_reg = mr;
+  } else {
+    test_reg = temps.Acquire();
+    ___ Ldr(test_reg, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  }
   switch (cond) {
     case JNIMacroUnaryCondition::kZero:
-      ___ CompareAndBranchIfZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfNonZero(test_reg, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     default:
       LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index ff83828..3185f1e 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -720,15 +720,26 @@
   CHECK(label != nullptr);
 
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register test_reg;
   DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
-  Register scratch = temps.AcquireW();
-  ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  DCHECK(kUseReadBarrier);
+  if (kUseBakerReadBarrier) {
+    if (kIsDebugBuild && emit_run_time_checks_in_debug_mode_) {
+      Register temp = temps.AcquireW();
+      asm_.GenerateMarkingRegisterCheck(temp);
+    }
+    test_reg = reg_w(MR);
+  } else {
+    test_reg = temps.AcquireW();
+    int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value();
+    ___ Ldr(test_reg, MEM_OP(reg_x(TR), is_gc_marking_offset));
+  }
   switch (cond) {
     case JNIMacroUnaryCondition::kZero:
-      ___ Cbz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbz(test_reg, Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbnz(test_reg, Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     default:
       LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);