optimizing: Build HConstructorFence for HNewArray/HNewInstance nodes
Also fixes:
* LSE, code_sinking to keep optimizing new-instance if it did so before
* Various tests to expect constructor fences after new-instance
Sidenote: new-instance String does not get a ConstructorFence; the
special StringFactory calls are assumed to be self-fencing.
Metric changes on go/lem:
* CodeSize -0.262% in ART-Compile (ARMv8)
* RunTime -0.747% for all (linux-armv8)
(No changes expected to x86, constructor fences are no-op).
The RunTime regression is temporary until art_quick_alloc_* entrypoints have their
DMBs removed in a follow up CL.
Test: art/test.py
Bug: 36656456
Change-Id: I6a936a6e51c623e1c6b5b22eee5c3c72bebbed35
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 0b4dcd3..e598e19 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -56,6 +56,17 @@
return true;
}
+ // Check it is safe to move ConstructorFence.
+ // (Safe to move ConstructorFence for only protecting the new-instance but not for finals.)
+ if (instruction->IsConstructorFence()) {
+ HConstructorFence* ctor_fence = instruction->AsConstructorFence();
+
+ // A fence with "0" inputs is dead and should've been removed in a prior pass.
+ DCHECK_NE(0u, ctor_fence->InputCount());
+
+ return ctor_fence->GetAssociatedAllocation() != nullptr;
+ }
+
// All other instructions that can throw cannot be moved.
if (instruction->CanThrow()) {
return false;
@@ -134,11 +145,11 @@
HInstruction* user,
const ArenaBitVector& post_dominated) {
if (instruction->IsNewInstance()) {
- return user->IsInstanceFieldSet() &&
+ return (user->IsInstanceFieldSet() || user->IsConstructorFence()) &&
(user->InputAt(0) == instruction) &&
!post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
} else if (instruction->IsNewArray()) {
- return user->IsArraySet() &&
+ return (user->IsArraySet() || user->IsConstructorFence()) &&
(user->InputAt(0) == instruction) &&
!post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
}
@@ -372,7 +383,9 @@
// Step (3): Try to move sinking candidates.
for (HInstruction* instruction : move_in_order) {
HInstruction* position = nullptr;
- if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+ if (instruction->IsArraySet()
+ || instruction->IsInstanceFieldSet()
+ || instruction->IsConstructorFence()) {
if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
// A store can trivially move, but it can safely do so only if the heap
// location it stores to can also move.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 8b79da8..40fafb0 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -962,7 +962,7 @@
false /* is_unresolved */);
}
-bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
+HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
HLoadClass* load_class = BuildLoadClass(type_index, dex_pc);
@@ -986,14 +986,65 @@
// Consider classes we haven't resolved as potentially finalizable.
bool finalizable = (klass == nullptr) || klass->IsFinalizable();
- AppendInstruction(new (arena_) HNewInstance(
+ HNewInstance* new_instance = new (arena_) HNewInstance(
cls,
dex_pc,
type_index,
*dex_compilation_unit_->GetDexFile(),
finalizable,
- entrypoint));
- return true;
+ entrypoint);
+ AppendInstruction(new_instance);
+
+ return new_instance;
+}
+
+void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
+ DCHECK(allocation != nullptr &&
+ allocation->IsNewInstance() ||
+ allocation->IsNewArray()); // corresponding to "new" keyword in JLS.
+
+ if (allocation->IsNewInstance()) {
+ // STRING SPECIAL HANDLING:
+ // -------------------------------
+ // Strings have a real HNewInstance node but they end up always having 0 uses.
+ // All uses of a String HNewInstance are always transformed to replace their input
+ // of the HNewInstance with an input of the invoke to StringFactory.
+ //
+ // Do not emit an HConstructorFence here since it can inhibit some String new-instance
+ // optimizations (to pass checker tests that rely on those optimizations).
+ HNewInstance* new_inst = allocation->AsNewInstance();
+ HLoadClass* load_class = new_inst->GetLoadClass();
+
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> klass = load_class->GetClass();
+ if (klass != nullptr && klass->IsStringClass()) {
+ return;
+ // Note: Do not use allocation->IsStringAlloc which requires
+ // a valid ReferenceTypeInfo, but that doesn't get made until after reference type
+ // propagation (and instruction builder is too early).
+ }
+ // (In terms of correctness, the StringFactory needs to provide its own
+ // default initialization barrier, see below.)
+ }
+
+ // JLS 17.4.5 "Happens-before Order" describes:
+ //
+ // The default initialization of any object happens-before any other actions (other than
+ // default-writes) of a program.
+ //
+ // In our implementation the default initialization of an object to type T means
+ // setting all of its initial data (object[0..size)) to 0, and setting the
+ // object's class header (i.e. object.getClass() == T.class).
+ //
+ // In practice this fence ensures that the writes to the object header
+ // are visible to other threads if this object escapes the current thread.
+ // (and in theory the 0-initializing, but that happens automatically
+ // when new memory pages are mapped in by the OS).
+ HConstructorFence* ctor_fence =
+ new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
+ AppendInstruction(ctor_fence);
}
static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1522,15 +1573,15 @@
graph_->SetHasBoundsChecks(true);
}
-void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
- dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
+HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ uint32_t number_of_vreg_arguments,
+ bool is_range,
+ uint32_t* args,
+ uint32_t register_index) {
HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- HInstruction* object = new (arena_) HNewArray(cls, length, dex_pc);
+ HNewArray* const object = new (arena_) HNewArray(cls, length, dex_pc);
AppendInstruction(object);
const char* descriptor = dex_file_->StringByTypeIdx(type_index);
@@ -1550,6 +1601,8 @@
AppendInstruction(aset);
}
latest_result_ = object;
+
+ return object;
}
template <typename T>
@@ -2534,10 +2587,12 @@
}
case Instruction::NEW_INSTANCE: {
- if (!BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc)) {
- return false;
- }
+ HNewInstance* new_instance =
+ BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc);
+ DCHECK(new_instance != nullptr);
+
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_instance);
break;
}
@@ -2545,8 +2600,11 @@
dex::TypeIndex type_index(instruction.VRegC_22c());
HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- AppendInstruction(new (arena_) HNewArray(cls, length, dex_pc));
+
+ HNewArray* new_array = new (arena_) HNewArray(cls, length, dex_pc);
+ AppendInstruction(new_array);
UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2555,7 +2613,13 @@
dex::TypeIndex type_index(instruction.VRegB_35c());
uint32_t args[5];
instruction.GetVarArgs(args);
- BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ false,
+ args,
+ /* register_index */ 0);
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2563,8 +2627,13 @@
uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
dex::TypeIndex type_index(instruction.VRegB_3rc());
uint32_t register_index = instruction.VRegC_3rc();
- BuildFilledNewArray(
- dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ true,
+ /* args*/ nullptr,
+ register_index);
+ BuildConstructorFenceForAllocation(new_array);
break;
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 2fb5c7b..e968760 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -194,12 +194,12 @@
uint32_t register_index);
// Builds a new array node and the instructions that fill it.
- void BuildFilledNewArray(uint32_t dex_pc,
- dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index);
+ HNewArray* BuildFilledNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ uint32_t number_of_vreg_arguments,
+ bool is_range,
+ uint32_t* args,
+ uint32_t register_index);
void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
@@ -288,7 +288,11 @@
REQUIRES_SHARED(Locks::mutator_lock_);
// Build a HNewInstance instruction.
- bool BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc);
+ HNewInstance* BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc);
+
+ // Build a HConstructorFence for HNewInstance and HNewArray instructions. This ensures the
+ // happens-before ordering for default-initialization of the object referred to by new_instance.
+ void BuildConstructorFenceForAllocation(HInstruction* allocation);
// Return whether the compiler can assume `cls` is initialized.
bool IsInitialized(Handle<mirror::Class> cls) const
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 8d8cc93..76c9d23 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -578,9 +578,7 @@
}
}
for (HInstruction* new_array : singleton_new_arrays_) {
- // TODO: Delete constructor fences for new-array
- // In the future HNewArray instructions will have HConstructorFence's for them.
- // HConstructorFence::RemoveConstructorFences(new_array);
+ HConstructorFence::RemoveConstructorFences(new_array);
if (!new_array->HasNonEnvironmentUses()) {
new_array->RemoveEnvironmentUsers();
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f250c1a..1460b26 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1234,6 +1234,20 @@
}
}
+HInstruction* HConstructorFence::GetAssociatedAllocation() {
+ HInstruction* new_instance_inst = GetPrevious();
+ // Check if the immediately preceding instruction is a new-instance/new-array.
+ // Otherwise this fence is for protecting final fields.
+ if (new_instance_inst != nullptr &&
+ (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) {
+ // TODO: Need to update this code to handle multiple inputs.
+ DCHECK_EQ(InputCount(), 1u);
+ return new_instance_inst;
+ } else {
+ return nullptr;
+ }
+}
+
#define DEFINE_ACCEPT(name, super) \
void H##name::Accept(HGraphVisitor* visitor) { \
visitor->Visit##name(this); \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index e40361e..72521fd 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -6650,6 +6650,14 @@
// still considered live.
static void RemoveConstructorFences(HInstruction* instruction);
+ // Check if this constructor fence is protecting
+ // an HNewInstance or HNewArray that is also the immediate
+ // predecessor of `this`.
+ //
+ // Returns the associated HNewArray or HNewInstance,
+ // or null otherwise.
+ HInstruction* GetAssociatedAllocation();
+
DECLARE_INSTRUCTION(ConstructorFence);
private:
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index c3c141b..aa42fd6 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -168,7 +168,39 @@
}
void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) {
- // Delete all the inputs to the constructor fence;
+ // Trivially remove redundant HConstructorFence when it immediately follows an HNewInstance
+ // to an uninitialized class. In this special case, the art_quick_alloc_object_resolved
+ // will already have the 'dmb' which is strictly stronger than an HConstructorFence.
+ //
+ // The instruction builder always emits "x = HNewInstance; HConstructorFence(x)" so this
+ // is effectively pattern-matching that particular case and undoing the redundancy the builder
+ // had introduced.
+ //
+ // TODO: Move this to a separate pass.
+ HInstruction* allocation_inst = constructor_fence->GetAssociatedAllocation();
+ if (allocation_inst != nullptr && allocation_inst->IsNewInstance()) {
+ HNewInstance* new_inst = allocation_inst->AsNewInstance();
+ // This relies on the entrypoint already being set to the more optimized version;
+ // as that happens in this pass, this redundancy removal also cannot happen any earlier.
+ if (new_inst != nullptr && new_inst->GetEntrypoint() == kQuickAllocObjectResolved) {
+ // If this was done in an earlier pass, we would want to match that `previous` was an input
+ // to the `constructor_fence`. However, since this pass removes the inputs to the fence,
+ // we can ignore the inputs and just remove the instruction from its block.
+ DCHECK_EQ(1u, constructor_fence->InputCount());
+ // TODO: GetAssociatedAllocation should not care about multiple inputs
+ // if we are in prepare_for_register_allocation pass only.
+ constructor_fence->GetBlock()->RemoveInstruction(constructor_fence);
+ return;
+ // TODO: actually remove the dmb from the .S entrypoints (initialized variants only).
+ }
+
+ // HNewArray does not need this check because the art_quick_alloc_array does not itself
+ // have a dmb in any normal situation (i.e. the array class is never exactly in the
+ // "resolved" state). If the array class is not yet loaded, it will always go from
+ // Unloaded->Initialized state.
+ }
+
+ // Remove all the inputs to the constructor fence;
// they aren't used by the InstructionCodeGenerator and this lets us avoid creating a
// LocationSummary in the LocationsBuilder.
constructor_fence->RemoveAllInputs();