x86/x86-64 read barrier support for concurrent GC in Optimizing.

This first implementation uses slow paths to instrument heap
reference loads and GC root loads for the concurrent copying
collector, respectively calling the artReadBarrierSlow and
artReadBarrierForRootSlow (new) runtime entry points.

Notes:
- This implementation does not instrument HInvokeVirtual
  nor HInvokeInterface instructions (for class reference
  loads), as the corresponding read barriers are not stricly
  required with the current concurrent copying collector.
- Intrinsics which may eventually call (on slow path) are
  disabled when read barriers are enabled, as the current
  slow path infrastructure does not support this case.
- When read barriers are enabled, the code generated for a
  HArraySet instruction always go into the array set slow
  path for object arrays (delegating the operation to the
  runtime), as we are lacking a mechanism to keep a
  temporary register live accross a runtime call (needed for
  the instrumentation of type checking code, which requires
  two successive read barriers).

Bug: 12687968
Change-Id: I14cd6107233c326389120336f93955b28ffbb329
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5188e11..77d53fc 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -310,7 +310,7 @@
 
 void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
                                              size_t maximum_number_of_live_core_registers,
-                                             size_t maximum_number_of_live_fp_registers,
+                                             size_t maximum_number_of_live_fpu_registers,
                                              size_t number_of_out_slots,
                                              const ArenaVector<HBasicBlock*>& block_order) {
   block_order_ = &block_order;
@@ -324,14 +324,14 @@
       && IsLeafMethod()
       && !RequiresCurrentMethod()) {
     DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
-    DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+    DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
     SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
   } else {
     SetFrameSize(RoundUp(
         number_of_spill_slots * kVRegSize
         + number_of_out_slots * kVRegSize
         + maximum_number_of_live_core_registers * GetWordSize()
-        + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+        + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
@@ -547,15 +547,19 @@
   }
 }
 
+// TODO: Remove argument `code_generator_supports_read_barrier` when
+// all code generators have read barrier support.
 void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
                                                    Location runtime_type_index_location,
-                                                   Location runtime_return_location) {
+                                                   Location runtime_return_location,
+                                                   bool code_generator_supports_read_barrier) {
   ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
   LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
       ? LocationSummary::kCall
-      : (cls->CanCallRuntime()
-          ? LocationSummary::kCallOnSlowPath
-          : LocationSummary::kNoCall);
+      : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
+          cls->CanCallRuntime())
+            ? LocationSummary::kCallOnSlowPath
+            : LocationSummary::kNoCall);
   LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
   if (cls->NeedsAccessCheck()) {
     locations->SetInAt(0, Location::NoLocation());
@@ -1320,21 +1324,38 @@
   // coherent with the runtime call generated, and that the GC side effect is
   // set when required.
   if (slow_path == nullptr) {
-    DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName();
+    DCHECK(instruction->GetLocations()->WillCall())
+        << "instruction->DebugName()=" << instruction->DebugName();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
-        << instruction->DebugName() << instruction->GetSideEffects().ToString();
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
   } else {
     DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
-        << instruction->DebugName() << slow_path->GetDescription();
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
     DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
            // Control flow would not come back into the code if a fatal slow
            // path is taken, so we do not care if it triggers GC.
            slow_path->IsFatal() ||
            // HDeoptimize is a special case: we know we are not coming back from
            // it into the code.
-           instruction->IsDeoptimize())
-        << instruction->DebugName() << instruction->GetSideEffects().ToString()
-        << slow_path->GetDescription();
+           instruction->IsDeoptimize() ||
+           // When read barriers are enabled, some instructions use a
+           // slow path to emit a read barrier, which does not trigger
+           // GC, is not fatal, nor is emitted by HDeoptimize
+           // instructions.
+           (kEmitCompilerReadBarrier &&
+            (instruction->IsInstanceFieldGet() ||
+             instruction->IsStaticFieldGet() ||
+             instruction->IsArraySet() ||
+             instruction->IsArrayGet() ||
+             instruction->IsLoadClass() ||
+             instruction->IsLoadString() ||
+             instruction->IsInstanceOf() ||
+             instruction->IsCheckCast())))
+        << "instruction->DebugName()=" << instruction->DebugName()
+        << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
+        << " slow_path->GetDescription()=" << slow_path->GetDescription();
   }
 
   // Check the coherency of leaf information.
@@ -1346,11 +1367,12 @@
 }
 
 void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         // If the register holds an object, update the stack mask.
         if (locations->RegisterContainsObject(i)) {
           locations->SetStackBit(stack_offset / kVRegSize);
@@ -1365,7 +1387,7 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
         DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         saved_fpu_stack_offsets_[i] = stack_offset;
@@ -1376,12 +1398,14 @@
 }
 
 void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
-  RegisterSet* register_set = locations->GetLiveRegisters();
+  RegisterSet* live_registers = locations->GetLiveRegisters();
   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
   for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
     if (!codegen->IsCoreCalleeSaveRegister(i)) {
-      if (register_set->ContainsCoreRegister(i)) {
+      if (live_registers->ContainsCoreRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
       }
     }
@@ -1389,8 +1413,9 @@
 
   for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
     if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
-      if (register_set->ContainsFloatingPointRegister(i)) {
+      if (live_registers->ContainsFloatingPointRegister(i)) {
         DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+        DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
         stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
       }
     }