Use the card table to speed up the GSS collector.

Scan only dirty cards, as opposed to the whole space, to find
references from the non-moving spaces to the bump pointer spaces at
bump pointer space only collections.

With this change, the Ritz MemAllocTest speeds up by 8-10% on host and
2-3% on N4. The Ritz EvaluateFibonacci speeds up by 8% and its average
pause time is reduced by 43% on N4.

Bug: 11650816
Change-Id: I1eefe75776bc37e24673b301ffa65a25f9bd4cde
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 533e5df..6cc44c9 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -20,6 +20,8 @@
 #include "heap.h"
 
 #include "debugger.h"
+#include "gc/accounting/card_table-inl.h"
+#include "gc/collector/semi_space.h"
 #include "gc/space/bump_pointer_space-inl.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/large_object_space.h"
@@ -75,6 +77,18 @@
     obj->SetBrooksPointer(obj);
     obj->AssertSelfBrooksPointer();
   }
+  if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
+    // (Note this if statement will be constant folded away for the
+    // fast-path quick entry points.) Because SetClass() has no write
+    // barrier, if a non-moving space allocation, we need a write
+    // barrier as the class pointer may point to the bump pointer
+    // space (where the class pointer is an "old-to-young" reference,
+    // though rare) under the GSS collector with the remembered set
+    // enabled. We don't need this for kAllocatorTypeRosAlloc/DlMalloc
+    // cases because we don't directly allocate into the main alloc
+    // space (besides promotions) under the SS/GSS collector.
+    WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
+  }
   pre_fence_visitor(obj, usable_size);
   if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
     CHECK_LE(obj->SizeOf(), usable_size);