optimizing: constructor fence redundancy elimination - remove dmb after LSE

Part one of a few upcoming CLs to optimize constructor fences.

This improves load-store-elimination; all singleton objects that are not
returned will have their associated constructor fence removed.

If the allocation is removed, so is the fence. Even if allocation is not
removed, fences can sometimes be removed.

This change is enabled by tracking the "this" object associated with the
constructor fence as an input. Fence inputs are considered weak; they do not keep
the "this" object alive; if the instructions for "this" are all deleted,
the fence can also be deleted.

Bug: 36656456
Test: art/test.py --host && art/test.py --target
Change-Id: I05659ab07e20d6e2ecd4be051b722726776f4ab1
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 874e357..fbab9df 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -179,6 +179,40 @@
                                      uint16_t class_def_index,
                                      bool requires)
       REQUIRES(!requires_constructor_barrier_lock_);
+
+  // Do the <init> methods for this class require a constructor barrier (prior to the return)?
+  // The answer is "yes", if and only if this class has any instance final fields.
+  // (This must not be called for any non-<init> methods; the answer would be "no").
+  //
+  // ---
+  //
+  // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end
+  // of the invoked constructor. The constructor barrier is a conservative implementation means of
+  // enforcing the freezes happen-before the object being constructed is observable by another
+  // thread.
+  //
+  // Note: This question only makes sense for instance constructors;
+  // static constructors (despite possibly having finals) never need
+  // a barrier.
+  //
+  // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes
+  // class initialization as:
+  //
+  //   lock(class.lock)
+  //     class.state = initializing
+  //   unlock(class.lock)
+  //
+  //   invoke <clinit>
+  //
+  //   lock(class.lock)
+  //     class.state = initialized
+  //   unlock(class.lock)              <-- acts as a release
+  //
+  // The last operation in the above example acts as an atomic release
+  // for any stores in <clinit>, which ends up being stricter
+  // than what a constructor barrier needs.
+  //
+  // See also QuasiAtomic::ThreadFenceForConstructor().
   bool RequiresConstructorBarrier(Thread* self,
                                   const DexFile* dex_file,
                                   uint16_t class_def_index)
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d7cc577..ebd578c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3067,6 +3067,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index eee832a..78b627a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -5479,6 +5479,15 @@
   }
 }
 
+void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARM64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index b6678b0..8744cc8 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3103,6 +3103,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index aa030b2..9736626 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -7766,6 +7766,15 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 19250c6..6f37ed4 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -5653,6 +5653,15 @@
   }
 }
 
+void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 08a752f..1e867dd 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2057,6 +2057,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ff6e099..f413739 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -2165,6 +2165,15 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  constructor_fence->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorX86_64::VisitConstructorFence(
+    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 12340b4..6a14045 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -341,7 +341,12 @@
     const HInstructionList& list = input->IsPhi()
         ? input->GetBlock()->GetPhis()
         : input->GetBlock()->GetInstructions();
-    if (!list.Contains(input)) {
+    if (input->GetBlock() == nullptr) {
+      AddError(StringPrintf("Input %d of instruction %d is not in any "
+                            "basic block of the control-flow graph.",
+                            input->GetId(),
+                            instruction->GetId()));
+    } else if (!list.Contains(input)) {
       AddError(StringPrintf("Input %d of instruction %d is not defined "
                             "in a basic block of the control-flow graph.",
                             input->GetId(),
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 1f8a58c..d71d786 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1462,8 +1462,13 @@
         }
       }
       if (needs_constructor_barrier) {
-        HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc);
-        invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction);
+        // See CompilerDriver::RequiresConstructorBarrier for more details.
+        DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
+
+        HConstructorFence* constructor_fence =
+            new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena());
+        invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence,
+                                                                invoke_instruction);
       }
       *return_replacement = nullptr;
       break;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 978c6a2..8b79da8 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -451,10 +451,13 @@
                                                               referrer_method_id.class_idx_,
                                                               parameter_index++,
                                                               Primitive::kPrimNot,
-                                                              true);
+                                                              /* is_this */ true);
     AppendInstruction(parameter);
     UpdateLocal(locals_index++, parameter);
     number_of_parameters--;
+    current_this_parameter_ = parameter;
+  } else {
+    DCHECK(current_this_parameter_ == nullptr);
   }
 
   const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
@@ -465,7 +468,7 @@
         arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
         parameter_index++,
         Primitive::GetType(shorty[shorty_pos]),
-        false);
+        /* is_this */ false);
     ++shorty_pos;
     AppendInstruction(parameter);
     // Store the parameter value in the local that the dex code will use
@@ -588,6 +591,8 @@
   UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
 }
 
+// Does the method being compiled need any constructor barriers being inserted?
+// (Always 'false' for methods that aren't <init>.)
 static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
   // Can be null in unit tests only.
   if (UNLIKELY(cu == nullptr)) {
@@ -596,6 +601,11 @@
 
   Thread* self = Thread::Current();
   return cu->IsConstructor()
+      && !cu->IsStatic()
+      // RequiresConstructorBarrier must only be queried for <init> methods;
+      // it's effectively "false" for every other method.
+      //
+      // See CompilerDriver::RequiresConstructBarrier for more explanation.
       && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
 }
 
@@ -639,13 +649,24 @@
                                       Primitive::Type type,
                                       uint32_t dex_pc) {
   if (type == Primitive::kPrimVoid) {
+    // Only <init> (which is a return-void) could possibly have a constructor fence.
     // This may insert additional redundant constructor fences from the super constructors.
     // TODO: remove redundant constructor fences (b/36656456).
     if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
-      AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc));
+      // Compiling instance constructor.
+      if (kIsDebugBuild) {
+        std::string method_name = graph_->GetMethodName();
+        CHECK_EQ(std::string("<init>"), method_name);
+      }
+
+      HInstruction* fence_target = current_this_parameter_;
+      DCHECK(fence_target != nullptr);
+
+      AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_));
     }
     AppendInstruction(new (arena_) HReturnVoid(dex_pc));
   } else {
+    DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
     HInstruction* value = LoadLocal(instruction.VRegA(), type);
     AppendInstruction(new (arena_) HReturn(value, dex_pc));
   }
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 7fdc188..2fb5c7b 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -62,6 +62,7 @@
         current_block_(nullptr),
         current_locals_(nullptr),
         latest_result_(nullptr),
+        current_this_parameter_(nullptr),
         compiler_driver_(driver),
         code_generator_(code_generator),
         dex_compilation_unit_(dex_compilation_unit),
@@ -325,6 +326,11 @@
   HBasicBlock* current_block_;
   ArenaVector<HInstruction*>* current_locals_;
   HInstruction* latest_result_;
+  // Current "this" parameter.
+  // Valid only after InitializeParameters() finishes.
+  // * Null for static methods.
+  // * Non-null for instance methods.
+  HParameterValue* current_this_parameter_;
 
   CompilerDriver* const compiler_driver_;
 
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 48699b3..8d8cc93 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -566,14 +566,22 @@
       store->GetBlock()->RemoveInstruction(store);
     }
 
-    // Eliminate allocations that are not used.
+    // Eliminate singleton-classified instructions:
+    //   * - Constructor fences (they never escape this thread).
+    //   * - Allocations (if they are unused).
     for (HInstruction* new_instance : singleton_new_instances_) {
+      HConstructorFence::RemoveConstructorFences(new_instance);
+
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
     for (HInstruction* new_array : singleton_new_arrays_) {
+      // TODO: Delete constructor fences for new-array
+      // In the future HNewArray instructions will have HConstructorFence's for them.
+      // HConstructorFence::RemoveConstructorFences(new_array);
+
       if (!new_array->HasNonEnvironmentUses()) {
         new_array->RemoveEnvironmentUsers();
         new_array->GetBlock()->RemoveInstruction(new_array);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ca953a1..f250c1a 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -528,6 +528,15 @@
   return cached_current_method_;
 }
 
+const char* HGraph::GetMethodName() const {
+  const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+  return dex_file_.GetMethodName(method_id);
+}
+
+std::string HGraph::PrettyMethod(bool with_signature) const {
+  return dex_file_.PrettyMethod(method_idx_, with_signature);
+}
+
 HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) {
   switch (type) {
     case Primitive::Type::kPrimBoolean:
@@ -1150,6 +1159,81 @@
   }
 }
 
+void HVariableInputSizeInstruction::RemoveAllInputs() {
+  RemoveAsUserOfAllInputs();
+  DCHECK(!HasNonEnvironmentUses());
+
+  inputs_.clear();
+  DCHECK_EQ(0u, InputCount());
+}
+
+void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) {
+  DCHECK(instruction->GetBlock() != nullptr);
+  // Removing constructor fences only makes sense for instructions with an object return type.
+  DCHECK_EQ(Primitive::kPrimNot, instruction->GetType());
+
+  // Efficient implementation that simultaneously (in one pass):
+  // * Scans the uses list for all constructor fences.
+  // * Deletes that constructor fence from the uses list of `instruction`.
+  // * Deletes `instruction` from the constructor fence's inputs.
+  // * Deletes the constructor fence if it now has 0 inputs.
+
+  const HUseList<HInstruction*>& uses = instruction->GetUses();
+  // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt.
+  for (auto it = uses.begin(), end = uses.end(); it != end; ) {
+    const HUseListNode<HInstruction*>& use_node = *it;
+    HInstruction* const use_instruction = use_node.GetUser();
+
+    // Advance the iterator immediately once we fetch the use_node.
+    // Warning: If the input is removed, the current iterator becomes invalid.
+    ++it;
+
+    if (use_instruction->IsConstructorFence()) {
+      HConstructorFence* ctor_fence = use_instruction->AsConstructorFence();
+      size_t input_index = use_node.GetIndex();
+
+      // Process the candidate instruction for removal
+      // from the graph.
+
+      // Constructor fence instructions are never
+      // used by other instructions.
+      //
+      // If we wanted to make this more generic, it
+      // could be a runtime if statement.
+      DCHECK(!ctor_fence->HasUses());
+
+      // A constructor fence's return type is "kPrimVoid"
+      // and therefore it can't have any environment uses.
+      DCHECK(!ctor_fence->HasEnvironmentUses());
+
+      // Remove the inputs first, otherwise removing the instruction
+      // will try to remove its uses while we are already removing uses
+      // and this operation will fail.
+      DCHECK_EQ(instruction, ctor_fence->InputAt(input_index));
+
+      // Removing the input will also remove the `use_node`.
+      // (Do not look at `use_node` after this, it will be a dangling reference).
+      ctor_fence->RemoveInputAt(input_index);
+
+      // Once all inputs are removed, the fence is considered dead and
+      // is removed.
+      if (ctor_fence->InputCount() == 0u) {
+        ctor_fence->GetBlock()->RemoveInstruction(ctor_fence);
+      }
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // Post-condition checks:
+    // * None of the uses of `instruction` are a constructor fence.
+    // * The `instruction` itself did not get removed from a block.
+    for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) {
+      CHECK(!use_node.GetUser()->IsConstructorFence());
+    }
+    CHECK(instruction->GetBlock() != nullptr);
+  }
+}
+
 #define DEFINE_ACCEPT(name, super)                                             \
 void H##name::Accept(HGraphVisitor* visitor) {                                 \
   visitor->Visit##name(this);                                                  \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 36c7df7..e40361e 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -46,6 +46,7 @@
 
 class GraphChecker;
 class HBasicBlock;
+class HConstructorFence;
 class HCurrentMethod;
 class HDoubleConstant;
 class HEnvironment;
@@ -57,6 +58,7 @@
 class HInvoke;
 class HLongConstant;
 class HNullConstant;
+class HParameterValue;
 class HPhi;
 class HSuspendCheck;
 class HTryBoundary;
@@ -537,6 +539,12 @@
     return method_idx_;
   }
 
+  // Get the method name (without the signature), e.g. "<init>"
+  const char* GetMethodName() const;
+
+  // Get the pretty method name (class + name + optionally signature).
+  std::string PrettyMethod(bool with_signature = true) const;
+
   InvokeType GetInvokeType() const {
     return invoke_type_;
   }
@@ -1297,6 +1305,7 @@
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
+  M(ConstructorFence, Instruction)                                      \
   M(CurrentMethod, Instruction)                                         \
   M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
@@ -1476,8 +1485,11 @@
 template <typename T>
 class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
  public:
+  // Get the instruction which has this use as one of the inputs.
   T GetUser() const { return user_; }
+  // Get the position of the input record that this use corresponds to.
   size_t GetIndex() const { return index_; }
+  // Set the position of the input record that this use corresponds to.
   void SetIndex(size_t index) { index_ = index; }
 
   // Hook for the IntrusiveForwardList<>.
@@ -2037,7 +2049,8 @@
         !IsNativeDebugInfo() &&
         !IsParameterValue() &&
         // If we added an explicit barrier then we should keep it.
-        !IsMemoryBarrier();
+        !IsMemoryBarrier() &&
+        !IsConstructorFence();
   }
 
   bool IsDeadAndRemovable() const {
@@ -2431,6 +2444,11 @@
   void InsertInputAt(size_t index, HInstruction* input);
   void RemoveInputAt(size_t index);
 
+  // Removes all the inputs.
+  // Also removes this instructions from each input's use list
+  // (for non-environment uses only).
+  void RemoveAllInputs();
+
  protected:
   HVariableInputSizeInstruction(SideEffects side_effects,
                                 uint32_t dex_pc,
@@ -5069,7 +5087,7 @@
   const DexFile& GetDexFile() const { return dex_file_; }
   dex::TypeIndex GetTypeIndex() const { return type_index_; }
   uint8_t GetIndex() const { return index_; }
-  bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); }
+  bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
 
   bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
   void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
@@ -6507,6 +6525,137 @@
   DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier);
 };
 
+// A constructor fence orders all prior stores to fields that could be accessed via a final field of
+// the specified object(s), with respect to any subsequent store that might "publish"
+// (i.e. make visible) the specified object to another thread.
+//
+// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens
+// for all final fields (that were set) at the end of the invoked constructor.
+//
+// The constructor fence models the freeze actions for the final fields of an object
+// being constructed (semantically at the end of the constructor). Constructor fences
+// have a per-object affinity; two separate objects being constructed get two separate
+// constructor fences.
+//
+// (Note: that if calling a super-constructor or forwarding to another constructor,
+// the freezes would happen at the end of *that* constructor being invoked).
+//
+// The memory model guarantees that when the object being constructed is "published" after
+// constructor completion (i.e. escapes the current thread via a store), then any final field
+// writes must be observable on other threads (once they observe that publication).
+//
+// Further, anything written before the freeze, and read by dereferencing through the final field,
+// must also be visible (so final object field could itself have an object with non-final fields;
+// yet the freeze must also extend to them).
+//
+// Constructor example:
+//
+//     class HasFinal {
+//        final int field;                              Optimizing IR for <init>()V:
+//        HasFinal() {
+//          field = 123;                                HInstanceFieldSet(this, HasFinal.field, 123)
+//          // freeze(this.field);                      HConstructorFence(this)
+//        }                                             HReturn
+//     }
+//
+// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of
+// already-initialized classes; in that case the allocation must act as a "default-initializer"
+// of the object which effectively writes the class pointer "final field".
+//
+// For example, we can model default-initialiation as roughly the equivalent of the following:
+//
+//     class Object {
+//       private final Class header;
+//     }
+//
+//  Java code:                                           Optimizing IR:
+//
+//     T new_instance<T>() {
+//       Object obj = allocate_memory(T.class.size);     obj = HInvoke(art_quick_alloc_object, T)
+//       obj.header = T.class;                           // header write is done by above call.
+//       // freeze(obj.header)                           HConstructorFence(obj)
+//       return (T)obj;
+//     }
+//
+// See also:
+// * CompilerDriver::RequiresConstructorBarrier
+// * QuasiAtomic::ThreadFenceForConstructor
+//
+class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+                                  // A fence has variable inputs because the inputs can be removed
+                                  // after prepare_for_register_allocation phase.
+                                  // (TODO: In the future a fence could freeze multiple objects
+                                  //        after merging two fences together.)
+ public:
+  // `fence_object` is the reference that needs to be protected for correct publication.
+  //
+  // It makes sense in the following situations:
+  // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true).
+  // * new-instance-like instructions, it's the return value (i.e. HNewInstance).
+  //
+  // After construction the `fence_object` becomes the 0th input.
+  // This is not an input in a real sense, but just a convenient place to stash the information
+  // about the associated object.
+  HConstructorFence(HInstruction* fence_object,
+                    uint32_t dex_pc,
+                    ArenaAllocator* arena)
+    // We strongly suspect there is not a more accurate way to describe the fine-grained reordering
+    // constraints described in the class header. We claim that these SideEffects constraints
+    // enforce a superset of the real constraints.
+    //
+    // The ordering described above is conservatively modeled with SideEffects as follows:
+    //
+    // * To prevent reordering of the publication stores:
+    // ----> "Reads of objects" is the initial SideEffect.
+    // * For every primitive final field store in the constructor:
+    // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect.
+    // * If there are any stores to reference final fields in the constructor:
+    // ----> Use a more conservative "AllReads" SideEffect because any stores to any references
+    //       that are reachable from `fence_object` also need to be prevented for reordering
+    //       (and we do not want to do alias analysis to figure out what those stores are).
+    //
+    // In the implementation, this initially starts out as an "all reads" side effect; this is an
+    // even more conservative approach than the one described above, and prevents all of the
+    // above reordering without analyzing any of the instructions in the constructor.
+    //
+    // If in a later phase we discover that there are no writes to reference final fields,
+    // we can refine the side effect to a smaller set of type reads (see above constraints).
+      : HVariableInputSizeInstruction(SideEffects::AllReads(),
+                                      dex_pc,
+                                      arena,
+                                      /* number_of_inputs */ 1,
+                                      kArenaAllocConstructorFenceInputs) {
+    DCHECK(fence_object != nullptr);
+    SetRawInputAt(0, fence_object);
+  }
+
+  // The object associated with this constructor fence.
+  //
+  // (Note: This will be null after the prepare_for_register_allocation phase,
+  // as all constructor fence inputs are removed there).
+  HInstruction* GetFenceObject() const {
+    return InputAt(0);
+  }
+
+  // Find all the HConstructorFence uses (`fence_use`) for `this` and:
+  // - Delete `fence_use` from `this`'s use list.
+  // - Delete `this` from `fence_use`'s inputs list.
+  // - If the `fence_use` is dead, remove it from the graph.
+  //
+  // A fence is considered dead once it no longer has any uses
+  // and all of the inputs are dead.
+  //
+  // This must *not* be called during/after prepare_for_register_allocation,
+  // because that removes all the inputs to the fences but the fence is actually
+  // still considered live.
+  static void RemoveConstructorFences(HInstruction* instruction);
+
+  DECLARE_INSTRUCTION(ConstructorFence);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HConstructorFence);
+};
+
 class HMonitorOperation FINAL : public HTemplateInstruction<1> {
  public:
   enum class OperationKind {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 66bfea9..c3c141b 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -167,6 +167,13 @@
   }
 }
 
+void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
+  // Delete all the inputs to the constructor fence;
+  // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
+  // LocationSummary in the LocationsBuilder.
+  constructor_fence->RemoveAllInputs();
+}
+
 void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
   if (invoke->IsStaticWithExplicitClinitCheck()) {
     HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass();
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 7ffbe44..395d4ba 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -43,6 +43,7 @@
   void VisitArraySet(HArraySet* instruction) OVERRIDE;
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
+  void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
   void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 935fd81..136ed12 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -33,6 +33,7 @@
 
 template <bool kCount>
 const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = {
+  // Every name should have the same width and end with a space. Abbreviate if necessary:
   "Misc         ",
   "SwitchTbl    ",
   "SlowPaths    ",
@@ -49,6 +50,7 @@
   "Successors   ",
   "Dominated    ",
   "Instruction  ",
+  "CtorFenceIns ",
   "InvokeInputs ",
   "PhiInputs    ",
   "LoopInfo     ",
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index c39429c..60b6ea8 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -59,6 +59,7 @@
   kArenaAllocSuccessors,
   kArenaAllocDominated,
   kArenaAllocInstruction,
+  kArenaAllocConstructorFenceInputs,
   kArenaAllocInvokeInputs,
   kArenaAllocPhiInputs,
   kArenaAllocLoopInfo,
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 330aa74..a538f52 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -17,8 +17,8 @@
 // TODO: Add more tests after we can inline functions with calls.
 
 class ClassWithoutFinals {
-  /// CHECK-START: void ClassWithoutFinals.<init>() register (after)
-  /// CHECK-NOT: MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithoutFinals.<init>() inliner (after)
+  /// CHECK-NOT: ConstructorFence
   public ClassWithoutFinals() {}
 }
 
@@ -33,17 +33,40 @@
     // should not inline this constructor
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
+
+  /*
+   * Check that the correct assembly instructions are selected for a Store/Store fence.
+   *
+   * - ARM variants:   DMB ISHST (store-store fence for inner shareable domain)
+   * - Intel variants: no-op (store-store does not need a fence).
+   */
+
+  /// CHECK-START-ARM64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-ARM: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NEXT: dmb ishst
+
+  /// CHECK-START-X86_64: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
+
+  /// CHECK-START-X86: void ClassWithFinals.<init>() disassembly (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  {{[slm]}}fence
   public ClassWithFinals() {
     // Exactly one constructor barrier.
     x = 0;
   }
 
-  /// CHECK-START: void ClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void ClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public ClassWithFinals(int x) {
     // This should have exactly two barriers:
@@ -55,11 +78,11 @@
 }
 
 class InheritFromClassWithFinals extends ClassWithFinals {
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals() {
     // Should inline the super constructor.
@@ -67,23 +90,23 @@
     // Exactly one constructor barrier here.
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public InheritFromClassWithFinals(boolean cond) {
     super(cond);
     // should not inline the super constructor
   }
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK-NOT:  ConstructorFence
   /// CHECK:      ReturnVoid
 
-  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public InheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and insert a memory barrier.
@@ -96,21 +119,21 @@
 class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals {
   final int y;
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after)
   /// CHECK-NOT: InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals() {
     // Should inline the super constructor and keep the memory barrier.
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) inliner (after)
   /// CHECK:      InvokeStaticOrDirect
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
   public HaveFinalsAndInheritFromClassWithFinals(boolean cond) {
     super(cond);
@@ -118,15 +141,15 @@
     y = 0;
   }
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after)
+  /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public HaveFinalsAndInheritFromClassWithFinals(int unused) {
     // Should inline the super constructor and keep keep both memory barriers.
@@ -141,55 +164,55 @@
 
 public class Main {
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
   /// CHECK:      InvokeStaticOrDirect
 
-  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after)
-  /// CHECK-NOT:  MemoryBarrier kind:StoreStore
+  /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after)
+  /// CHECK-NOT:  ConstructorFence
   public static ClassWithFinals noInlineNoConstructorBarrier() {
     return new ClassWithFinals(false);
     // should not inline the constructor
   }
 
-  /// CHECK-START: void Main.inlineNew() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew() register (after)
+  /// CHECK-START: void Main.inlineNew() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew() {
     new ClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew1() register (after)
+  /// CHECK-START: void Main.inlineNew1() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew1() {
     new InheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew2() register (after)
+  /// CHECK-START: void Main.inlineNew2() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew2() {
     new HaveFinalsAndInheritFromClassWithFinals();
   }
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
-  /// CHECK:      MemoryBarrier kind:StoreStore
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
+  /// CHECK:      ConstructorFence
   /// CHECK-NEXT: ReturnVoid
 
-  /// CHECK-START: void Main.inlineNew3() register (after)
+  /// CHECK-START: void Main.inlineNew3() inliner (after)
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void inlineNew3() {
     new HaveFinalsAndInheritFromClassWithFinals();
diff --git a/test/530-checker-lse-ctor-fences/expected.txt b/test/530-checker-lse-ctor-fences/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/expected.txt
diff --git a/test/530-checker-lse-ctor-fences/info.txt b/test/530-checker-lse-ctor-fences/info.txt
new file mode 100644
index 0000000..ccc7b47
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/info.txt
@@ -0,0 +1 @@
+Checker test for testing load-store elimination with final fields (constructor fences).
diff --git a/test/530-checker-lse-ctor-fences/src/Main.java b/test/530-checker-lse-ctor-fences/src/Main.java
new file mode 100644
index 0000000..7755875
--- /dev/null
+++ b/test/530-checker-lse-ctor-fences/src/Main.java
@@ -0,0 +1,191 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This base class has a single final field;
+// the constructor should have one fence.
+class Circle {
+  Circle(double radius) {
+    this.radius = radius;
+  }
+  public double getRadius() {
+    return radius;
+  }
+  public double getArea() {
+    return radius * radius * Math.PI;
+  }
+
+  public double getCircumference() {
+    return 2 * Math.PI * radius;
+  }
+
+  private final double radius;
+}
+
+// This subclass adds an extra final field;
+// there should be an extra constructor fence added
+// (for a total of 2 after inlining).
+class Ellipse extends Circle {
+  Ellipse(double vertex, double covertex) {
+    super(vertex);
+
+    this.covertex = covertex;
+  }
+
+  public double getVertex() {
+    return getRadius();
+  }
+
+  public double getCovertex() {
+    return covertex;
+  }
+
+  @Override
+  public double getArea() {
+    return getRadius() * covertex * Math.PI;
+  }
+
+  private final double covertex;
+}
+
+class CalcCircleAreaOrCircumference {
+  public static final int TYPE_AREA = 0;
+  public static final int TYPE_CIRCUMFERENCE = 1;
+
+  double value;
+
+  public CalcCircleAreaOrCircumference(int type) {
+    this.type = type;
+  }
+
+  final int type;
+}
+
+public class Main {
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Make sure the constructor fence gets eliminated when the allocation is eliminated.
+  static double calcCircleArea(double radius) {
+    return new Circle(radius).getArea();
+  }
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK-NOT: ConstructorFence
+  /// CHECK-NOT: InstanceFieldGet
+
+  // Multiple constructor fences can accumulate through inheritance, make sure
+  // they are all eliminated when the allocation is eliminated.
+  static double calcEllipseArea(double vertex, double covertex) {
+    return new Ellipse(vertex, covertex).getArea();
+  }
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: ConstructorFence
+  /// CHECK: InstanceFieldGet
+
+  /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK-NOT: ConstructorFence
+
+  //
+  // The object allocation will not be eliminated by LSE because of aliased stores.
+  // However the object is still a singleton, so it never escapes the current thread.
+  // There should not be a constructor fence here after LSE.
+  static double calcCircleAreaOrCircumference(double radius, boolean area_or_circumference) {
+    CalcCircleAreaOrCircumference calc =
+      new CalcCircleAreaOrCircumference(
+          area_or_circumference ? CalcCircleAreaOrCircumference.TYPE_AREA :
+          CalcCircleAreaOrCircumference.TYPE_CIRCUMFERENCE);
+
+    if (area_or_circumference) {
+      // Area
+      calc.value = Math.PI * Math.PI * radius;
+    } else {
+      // Circumference
+      calc.value = 2 * Math.PI * radius;
+    }
+
+    return calc.value;
+  }
+
+  /// CHECK-START: Circle Main.makeCircle(double) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: ConstructorFence
+
+  // The object allocation is considered a singleton by LSE,
+  // but we cannot eliminate the new because it is returned.
+  //
+  // The constructor fence must also not be removed because the object could escape the
+  // current thread (in the caller).
+  static Circle makeCircle(double radius) {
+    return new Circle(radius);
+  }
+
+  static void assertIntEquals(int result, int expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertFloatEquals(float result, float expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertDoubleEquals(double result, double expected) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  static void assertInstanceOf(Object result, Class<?> expected) {
+    if (result.getClass() != expected) {
+      throw new Error("Expected type: " + expected + ", found : " + result.getClass());
+    }
+  }
+
+  public static void main(String[] args) {
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI));
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcEllipseArea(Math.PI, Math.PI));
+    assertDoubleEquals(2 * Math.PI * Math.PI, calcCircleAreaOrCircumference(Math.PI, false));
+    assertInstanceOf(makeCircle(Math.PI), Circle.class);
+  }
+
+  static boolean sFlag;
+}
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
index 0fe3d87..491a9a1 100644
--- a/test/530-checker-lse2/src/Main.java
+++ b/test/530-checker-lse2/src/Main.java
@@ -76,16 +76,27 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: ConstructorFence
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
   /// CHECK-DAG: NewInstance
@@ -95,9 +106,14 @@
   /// CHECK-DAG: Deoptimize
   /// CHECK-DAG: Deoptimize
   /// CHECK-NOT: NewInstance
+  /// CHECK-NOT: ConstructorFence
 
   private float testMethod() {
     {
+      // Each of the "new" statements here will initialize an object with final fields,
+      // which after inlining will also retain a constructor fence.
+      //
+      // After LSE we remove the 'new-instance' and the associated constructor fence.
       int lI0 = (-1456058746 << mI);
       mD = ((double)(int)(double) mD);
       for (int i0 = 56 - 1; i0 >= 0; i0--) {
diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java
index 345e9fd..26d87b1 100644
--- a/test/569-checker-pattern-replacement/src/Main.java
+++ b/test/569-checker-pattern-replacement/src/Main.java
@@ -331,7 +331,7 @@
 
   /// CHECK-START: double Main.constructBase() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBase() {
@@ -347,7 +347,7 @@
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -371,7 +371,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0() {
@@ -387,7 +387,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after)
   /// CHECK-DAG:  <<Value:l\d+>>      ParameterValue
@@ -411,7 +411,7 @@
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -431,7 +431,7 @@
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -460,7 +460,7 @@
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -493,7 +493,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -543,7 +543,7 @@
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -567,7 +567,7 @@
 
   /// CHECK-START: double Main.constructBaseWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructBaseWith0d() {
@@ -605,7 +605,7 @@
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructBase(int, long) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -628,7 +628,7 @@
 
   /// CHECK-START: double Main.constructDerived() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerived() {
@@ -644,7 +644,7 @@
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int) inliner (after)
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
@@ -668,7 +668,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0() {
@@ -684,7 +684,7 @@
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after)
   /// CHECK-NOT:                      InstanceFieldSet
@@ -702,7 +702,7 @@
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(double) inliner (after)
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
@@ -726,7 +726,7 @@
 
   /// CHECK-START: double Main.constructDerivedWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWith0d() {
@@ -744,7 +744,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after)
   /// CHECK-DAG:  <<DValue:d\d+>>     ParameterValue
@@ -794,7 +794,7 @@
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(float) inliner (after)
   /// CHECK-DAG:  <<Value:f\d+>>      ParameterValue
@@ -821,7 +821,7 @@
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after)
   /// CHECK-DAG:  <<IValue:i\d+>>     ParameterValue
@@ -852,7 +852,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalField() {
@@ -873,7 +873,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -892,7 +892,7 @@
 
   /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructBaseWithFinalFieldWith0() {
@@ -907,7 +907,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalField() {
@@ -928,7 +928,7 @@
   /// CHECK-DAG:  <<Value:i\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -947,7 +947,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0() {
@@ -968,7 +968,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -987,7 +987,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0d() {
@@ -1009,7 +1009,7 @@
   /// CHECK-DAG:  <<Value:d\d+>>      ParameterValue
   /// CHECK-DAG:  <<Obj:l\d+>>        NewInstance
   /// CHECK-DAG:                      InstanceFieldSet [<<Obj>>,<<Value>>]
-  /// CHECK-DAG:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
   /// CHECK-DAG:                      InstanceFieldSet
@@ -1017,8 +1017,8 @@
   /// CHECK-NOT:                      InstanceFieldSet
 
   /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after)
-  /// CHECK-DAG:                      MemoryBarrier
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-DAG:                      ConstructorFence
+  /// CHECK-NOT:                      ConstructorFence
 
   public static double constructDerivedWithFinalField(int intValue, double doubleValue) {
     DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue);
@@ -1034,7 +1034,7 @@
 
   /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static double constructDerivedWithFinalFieldWith0And0d() {
@@ -1049,7 +1049,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex() {
@@ -1070,7 +1070,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(int intValue) {
@@ -1091,7 +1091,7 @@
   /// CHECK-DAG:                      InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init>
 
   /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after)
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDexWith0() {
@@ -1107,7 +1107,7 @@
 
   /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after)
   /// CHECK-NOT:                      InvokeStaticOrDirect
-  /// CHECK-NOT:                      MemoryBarrier
+  /// CHECK-NOT:                      ConstructorFence
   /// CHECK-NOT:                      InstanceFieldSet
 
   public static int constructDerivedInSecondDex(long dummy) {