optimizing: Build HConstructorFence for HNewArray/HNewInstance nodes

Also fixes:
* LSE, code_sinking to keep optimizing new-instance if it did so before
* Various tests to expect constructor fences after new-instance

Sidenote: new-instance String does not get a ConstructorFence; the
special StringFactory calls are assumed to be self-fencing.

Metric changes on go/lem:
* CodeSize -0.262% in ART-Compile (ARMv8)
* RunTime -0.747% for all (linux-armv8)

(No changes expected to x86, constructor fences are no-op).

The RunTime regression is temporary until art_quick_alloc_* entrypoints have their
DMBs removed in a follow up CL.

Test: art/test.py
Bug: 36656456
Change-Id: I6a936a6e51c623e1c6b5b22eee5c3c72bebbed35
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 0b4dcd3..e598e19 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -56,6 +56,17 @@
     return true;
   }
 
+  // Check it is safe to move ConstructorFence.
+  // (Safe to move ConstructorFence for only protecting the new-instance but not for finals.)
+  if (instruction->IsConstructorFence()) {
+    HConstructorFence* ctor_fence = instruction->AsConstructorFence();
+
+    // A fence with "0" inputs is dead and should've been removed in a prior pass.
+    DCHECK_NE(0u, ctor_fence->InputCount());
+
+    return ctor_fence->GetAssociatedAllocation() != nullptr;
+  }
+
   // All other instructions that can throw cannot be moved.
   if (instruction->CanThrow()) {
     return false;
@@ -134,11 +145,11 @@
                             HInstruction* user,
                             const ArenaBitVector& post_dominated) {
   if (instruction->IsNewInstance()) {
-    return user->IsInstanceFieldSet() &&
+    return (user->IsInstanceFieldSet() || user->IsConstructorFence()) &&
         (user->InputAt(0) == instruction) &&
         !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
   } else if (instruction->IsNewArray()) {
-    return user->IsArraySet() &&
+    return (user->IsArraySet() || user->IsConstructorFence()) &&
         (user->InputAt(0) == instruction) &&
         !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
   }
@@ -372,7 +383,9 @@
   // Step (3): Try to move sinking candidates.
   for (HInstruction* instruction : move_in_order) {
     HInstruction* position = nullptr;
-    if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+    if (instruction->IsArraySet()
+            || instruction->IsInstanceFieldSet()
+            || instruction->IsConstructorFence()) {
       if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
         // A store can trivially move, but it can safely do so only if the heap
         // location it stores to can also move.