Minor improvements for the CC collector.

- Split Mark() and inline its first part.
- Make sure some other routines are inlined.
- Add some UNLIKELY's.
- Use VisitConcurrentRoots().

Ritz EAAC GC time decreased from 28.9 -> 27.6s (-4.5%) on N5.

Bug: 12687968

Change-Id: I7bd13f162e7daa2a5853000fb22c5fefc318994f
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index c32b19e..27726e2 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -93,7 +93,7 @@
     DCHECK(ref != nullptr);
     return IsMarked(ref) == ref;
   }
-  mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+  ALWAYS_INLINE mirror::Object* Mark(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
   bool IsMarking() const {
     return is_marking_;
@@ -183,6 +183,8 @@
   void DisableMarking() SHARED_REQUIRES(Locks::mutator_lock_);
   void IssueDisableMarkingCheckpoint() SHARED_REQUIRES(Locks::mutator_lock_);
   void ExpandGcMarkStack() SHARED_REQUIRES(Locks::mutator_lock_);
+  mirror::Object* MarkNonMoving(mirror::Object* from_ref) SHARED_REQUIRES(Locks::mutator_lock_)
+      REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_);
 
   space::RegionSpace* region_space_;      // The underlying region space.
   std::unique_ptr<Barrier> gc_barrier_;