Avoid CAS for marking region space bitmap for baker

Only have the GC thread mark it. This occurs when popping from the
mark stack. The race where an object may be pushed to the mark
stack twice is handled by not scanning if it is already marked.

Also avoid checking is_active when marking from the GC.

EAAC: 1263 -> 1253 (average of 30 runs)
GC time: 7.21s -> 6.83s (average of 18 runs)

Timings on 960 mhz N6P.

Bug: 12687968

Change-Id: I47e98c3e258829d2ba0babd803a219c82a36168c
Test: test-art-host, debug N6P booting with baker CC.
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 0495c95..27f8bd7 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -147,6 +147,18 @@
 #endif
 }
 
+inline Object* Object::GetReadBarrierPointerAcquire() {
+#ifdef USE_BAKER_READ_BARRIER
+  DCHECK(kUseBakerReadBarrier);
+  LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  return reinterpret_cast<Object*>(lw.ReadBarrierState());
+#else
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+#endif
+}
+
+
 inline uint32_t Object::GetMarkBit() {
 #ifdef USE_READ_BARRIER
   return GetLockWord(false).MarkBitState();
@@ -814,6 +826,13 @@
   }
 }
 
+template<typename kSize>
+inline kSize Object::GetFieldAcquire(MemberOffset field_offset) {
+  const uint8_t* raw_addr = reinterpret_cast<const uint8_t*>(this) + field_offset.Int32Value();
+  const kSize* addr = reinterpret_cast<const kSize*>(raw_addr);
+  return reinterpret_cast<const Atomic<kSize>*>(addr)->LoadAcquire();
+}
+
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakSequentiallyConsistent64(MemberOffset field_offset,
                                                          int64_t old_value, int64_t new_value) {