Add write barriers to optimizing compiler.

Change-Id: I43a40954757f51d49782e70bc28f7c314d6dbe17
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index ee51fcd..d75644d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -171,6 +171,7 @@
 
 COMPILER_GTEST_HOST_SRC_FILES := \
   $(COMPILER_GTEST_COMMON_SRC_FILES) \
+  compiler/utils//assembler_thumb_test.cc \
   compiler/utils/x86/assembler_x86_test.cc \
   compiler/utils/x86_64/assembler_x86_64_test.cc
 
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index bc1e75b..93e7367 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,6 +17,7 @@
 #include "code_generator_arm.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "thread.h"
@@ -1032,6 +1033,11 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   instruction->SetLocations(locations);
 }
 
@@ -1056,10 +1062,24 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      Register value = locations->InAt(1).AsArm().AsCoreRegister();
+      __ StoreToOffset(kStoreWord, value, obj, offset);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).AsArm().AsCoreRegister();
       __ StoreToOffset(kStoreWord, value, obj, offset);
+
+      Register temp = locations->GetTemp(0).AsArm().AsCoreRegister();
+      Register card = locations->GetTemp(1).AsArm().AsCoreRegister();
+      Label is_null;
+      __ CompareAndBranchIfZero(value, &is_null);
+      __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kArmWordSize>().Int32Value());
+      __ Lsr(temp, obj, gc::accounting::CardTable::kCardShift);
+      __ strb(card, Address(card, temp));
+      __ Bind(&is_null);
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index af267d8..c44b761 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_x86.h"
+#include "gc/accounting/card_table.h"
 #include "utils/assembler.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/managed_register_x86.h"
@@ -1009,6 +1010,12 @@
   } else {
     locations->SetInAt(1, Location::RequiresRegister());
   }
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    // Ensure the card is in a byte register.
+    locations->AddTemp(X86CpuLocation(ECX));
+  }
   instruction->SetLocations(locations);
 }
 
@@ -1033,10 +1040,25 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      Register value = locations->InAt(1).AsX86().AsCpuRegister();
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       Register value = locations->InAt(1).AsX86().AsCpuRegister();
       __ movl(Address(obj, offset), value);
+      Label is_null;
+      Register temp = locations->GetTemp(0).AsX86().AsCpuRegister();
+      Register card = locations->GetTemp(1).AsX86().AsCpuRegister();
+      __ testl(value, value);
+      __ j(kEqual, &is_null);
+      __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
+      __ movl(temp, obj);
+      __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
+      __ movb(Address(temp, card, TIMES_1, 0),  locations->GetTemp(1).AsX86().AsByteRegister());
+      __ Bind(&is_null);
       break;
     }
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4bda082..d20dff0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -17,6 +17,7 @@
 #include "code_generator_x86_64.h"
 
 #include "entrypoints/quick/quick_entrypoints.h"
+#include "gc/accounting/card_table.h"
 #include "mirror/array.h"
 #include "mirror/art_method.h"
 #include "mirror/object_reference.h"
@@ -871,6 +872,11 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers for the write barrier.
+  if (instruction->InputAt(1)->GetType() == Primitive::kPrimNot) {
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
   instruction->SetLocations(locations);
 }
 
@@ -894,9 +900,24 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      __ movl(Address(obj, offset), value);
+      break;
+    }
+
     case Primitive::kPrimNot: {
       __ movl(Address(obj, offset), value);
+      Label is_null;
+      CpuRegister temp = locations->GetTemp(0).AsX86_64().AsCpuRegister();
+      CpuRegister card = locations->GetTemp(1).AsX86_64().AsCpuRegister();
+      __ testl(value, value);
+      __ j(kEqual, &is_null);
+      __ gs()->movq(card, Address::Absolute(
+          Thread::CardTableOffset<kX86_64WordSize>().Int32Value(), true));
+      __ movq(temp, obj);
+      __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
+      __ movb(Address(temp, card, TIMES_1, 0),  card);
+      __ Bind(&is_null);
       break;
     }
 
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index c693ec0..78ff31a 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -1376,13 +1376,23 @@
       }
 
       if (must_be_32bit) {
-        int32_t encoding = 0x1f << 27 | B22 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
+        int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 |
             ad.encodingThumb(true);
+        if (half) {
+          encoding |= B21;
+        } else if (!byte) {
+          encoding |= B22;
+        }
         Emit32(encoding);
       } else {
         // 16 bit register offset.
         int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) |
             ad.encodingThumb(false);
+        if (byte) {
+          encoding |= B10;
+        } else if (half) {
+          encoding |= B9;
+        }
         Emit16(encoding);
       }
     }
@@ -2513,12 +2523,22 @@
 
 
 void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) {
-  cbz(r, label);
+  if (force_32bit_branches_) {
+    cmp(r, ShifterOperand(0));
+    b(label, EQ);
+  } else {
+    cbz(r, label);
+  }
 }
 
 
 void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) {
-  cbnz(r, label);
+  if (force_32bit_branches_) {
+    cmp(r, ShifterOperand(0));
+    b(label, NE);
+  } else {
+    cbnz(r, label);
+  }
 }
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index fe9349b..1dbef95 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1233,7 +1233,7 @@
 
 
 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(false, 4, reg, imm);
 }
 
 
@@ -1243,7 +1243,12 @@
 
 
 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(false, 5, reg, imm);
+}
+
+
+void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
+  EmitGenericShift(true, 5, reg, imm);
 }
 
 
@@ -1253,7 +1258,7 @@
 
 
 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(false, 7, reg, imm);
 }
 
 
@@ -1569,11 +1574,15 @@
 }
 
 
-void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
-                                    CpuRegister reg,
-                                    const Immediate& imm) {
+void X86_64Assembler::EmitGenericShift(bool wide,
+                                       int reg_or_opcode,
+                                       CpuRegister reg,
+                                       const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
+  if (wide) {
+    EmitRex64(reg);
+  }
   if (imm.value() == 1) {
     EmitUint8(0xD1);
     EmitOperand(reg_or_opcode, Operand(reg));
@@ -1586,8 +1595,8 @@
 
 
 void X86_64Assembler::EmitGenericShift(int reg_or_opcode,
-                                    CpuRegister operand,
-                                    CpuRegister shifter) {
+                                       CpuRegister operand,
+                                       CpuRegister shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter.AsRegister(), RCX);
   EmitUint8(0xD3);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 330d2d5..e988029 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -441,6 +441,8 @@
   void sarl(CpuRegister reg, const Immediate& imm);
   void sarl(CpuRegister operand, CpuRegister shifter);
 
+  void shrq(CpuRegister reg, const Immediate& imm);
+
   void negl(CpuRegister reg);
   void notl(CpuRegister reg);
 
@@ -626,7 +628,7 @@
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, CpuRegister reg, const Immediate& imm);
+  void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
   void EmitGenericShift(int rm, CpuRegister operand, CpuRegister shifter);
 
   // If any input is not false, output the necessary rex prefix.
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index a5192e1..0d8eeb9 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -75,6 +75,16 @@
     if (m.$opt$TestReturnNewObject(m) == m) {
       throw new Error("Unexpected value returned");
     }
+
+    // Loop enough iterations to hope for a crash if no write barrier
+    // is emitted.
+    for (int j = 0; j < 3; j++) {
+      Main m1 = new Main();
+      $opt$SetFieldInOldObject(m1);
+      for (int i = 0; i < 1000; ++i) {
+        Object o = new byte[1024];
+      }
+    }
   }
 
   static int $opt$TestInvokeIntParameter(int param) {
@@ -169,4 +179,10 @@
   public static void throwStaticMethod() {
     throw new Error("Error");
   }
+
+  public static void $opt$SetFieldInOldObject(Main m) {
+    m.o = new Main();
+  }
+
+  Object o;
 }