Use entrypoint switching on x86 & x86-64 for GC root read barriers.

For consistency reason (with the ARM and ARM64 implementations),
check the read barrier marking entrypoint
(`Thread::Current()->pReadBarrierMarkReg ## root.reg()`)
instead of `Thread::Current()->GetIsGcMarking()` to decide whether
to mark a GC root.

This change should have no impact on the performance or the
size of the generated code.

Test: m test-art-host
Bug: 32638713
Change-Id: Ifd71312992fdfd6067447cccb7d95860f3771b57
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 137b554..9276616 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -7136,9 +7136,10 @@
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
-      //   root = *address;
-      //   if (Thread::Current()->GetIsGcMarking()) {
-      //     root = ReadBarrier::Mark(root)
+      //   root = obj.field;
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *address
@@ -7159,8 +7160,11 @@
           instruction, root, /* unpoison_ref_before_marking */ false);
       codegen_->AddSlowPath(slow_path);
 
-      __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
-                    Immediate(0));
+      // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(root.reg());
+      __ fs()->cmpl(Address::Absolute(entry_point_offset), Immediate(0));
+      // The entrypoint is null when the GC is not marking.
       __ j(kNotEqual, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {