Some read barrier clean-up in Optimizing.

These changes make the read barrier compiler instrumentation
code more uniform among the ARM, ARM64, x86 and x86-64 back
ends.

Bug: 12687968
Change-Id: I6b1c0cf2bc22ed6cd6b14754136bef4a2a036ea5
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f95cb30..f7ccdd8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6009,7 +6009,7 @@
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck: {
       // Note that we indeed only call on slow path, but we always go
-      // into the slow path for the unresolved & interface check
+      // into the slow path for the unresolved and interface check
       // cases.
       //
       // We cannot directly call the InstanceofNonTrivial runtime
@@ -6240,8 +6240,8 @@
 
     case TypeCheckKind::kUnresolvedCheck:
     case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved &
-      // interface check cases.
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6520,6 +6520,8 @@
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
     __ movl(root_reg, Address(obj, offset));
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
   }
 }
 
@@ -6582,7 +6584,9 @@
   // Note: the original implementation in ReadBarrier::Barrier is
   // slightly more complex as:
   // - it implements the load-load fence using a data dependency on
-  //   the high-bits of rb_state, which are expected to be all zeroes;
+  //   the high-bits of rb_state, which are expected to be all zeroes
+  //   (we use CodeGeneratorX86::GenerateMemoryBarrier instead here,
+  //   which is a no-op thanks to the x86 memory model);
   // - it performs additional checks that we do not do here for
   //   performance reasons.