optimizing: Build HConstructorFence for HNewArray/HNewInstance nodes
Also fixes:
* LSE, code_sinking to keep optimizing new-instance if it did so before
* Various tests to expect constructor fences after new-instance
Sidenote: new-instance String does not get a ConstructorFence; the
special StringFactory calls are assumed to be self-fencing.
Metric changes on go/lem:
* CodeSize -0.262% in ART-Compile (ARMv8)
* RunTime -0.747% for all (linux-armv8)
(No changes expected to x86, constructor fences are no-op).
The RunTime regression is temporary until art_quick_alloc_* entrypoints have their
DMBs removed in a follow up CL.
Test: art/test.py
Bug: 36656456
Change-Id: I6a936a6e51c623e1c6b5b22eee5c3c72bebbed35
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 8b79da8..40fafb0 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -962,7 +962,7 @@
false /* is_unresolved */);
}
-bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
+HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
HLoadClass* load_class = BuildLoadClass(type_index, dex_pc);
@@ -986,14 +986,65 @@
// Consider classes we haven't resolved as potentially finalizable.
bool finalizable = (klass == nullptr) || klass->IsFinalizable();
- AppendInstruction(new (arena_) HNewInstance(
+ HNewInstance* new_instance = new (arena_) HNewInstance(
cls,
dex_pc,
type_index,
*dex_compilation_unit_->GetDexFile(),
finalizable,
- entrypoint));
- return true;
+ entrypoint);
+ AppendInstruction(new_instance);
+
+ return new_instance;
+}
+
+void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
+ DCHECK(allocation != nullptr &&
+ allocation->IsNewInstance() ||
+ allocation->IsNewArray()); // corresponding to "new" keyword in JLS.
+
+ if (allocation->IsNewInstance()) {
+ // STRING SPECIAL HANDLING:
+ // -------------------------------
+ // Strings have a real HNewInstance node but they end up always having 0 uses.
+ // All uses of a String HNewInstance are always transformed to replace their input
+ // of the HNewInstance with an input of the invoke to StringFactory.
+ //
+ // Do not emit an HConstructorFence here since it can inhibit some String new-instance
+ // optimizations (to pass checker tests that rely on those optimizations).
+ HNewInstance* new_inst = allocation->AsNewInstance();
+ HLoadClass* load_class = new_inst->GetLoadClass();
+
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ StackHandleScope<1> hs(self);
+ Handle<mirror::Class> klass = load_class->GetClass();
+ if (klass != nullptr && klass->IsStringClass()) {
+ return;
+ // Note: Do not use allocation->IsStringAlloc which requires
+ // a valid ReferenceTypeInfo, but that doesn't get made until after reference type
+ // propagation (and instruction builder is too early).
+ }
+ // (In terms of correctness, the StringFactory needs to provide its own
+ // default initialization barrier, see below.)
+ }
+
+ // JLS 17.4.5 "Happens-before Order" describes:
+ //
+ // The default initialization of any object happens-before any other actions (other than
+ // default-writes) of a program.
+ //
+ // In our implementation the default initialization of an object to type T means
+ // setting all of its initial data (object[0..size)) to 0, and setting the
+ // object's class header (i.e. object.getClass() == T.class).
+ //
+ // In practice this fence ensures that the writes to the object header
+ // are visible to other threads if this object escapes the current thread.
+ // (and in theory the 0-initializing, but that happens automatically
+ // when new memory pages are mapped in by the OS).
+ HConstructorFence* ctor_fence =
+ new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_);
+ AppendInstruction(ctor_fence);
}
static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
@@ -1522,15 +1573,15 @@
graph_->SetHasBoundsChecks(true);
}
-void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
- dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
+HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ uint32_t number_of_vreg_arguments,
+ bool is_range,
+ uint32_t* args,
+ uint32_t register_index) {
HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- HInstruction* object = new (arena_) HNewArray(cls, length, dex_pc);
+ HNewArray* const object = new (arena_) HNewArray(cls, length, dex_pc);
AppendInstruction(object);
const char* descriptor = dex_file_->StringByTypeIdx(type_index);
@@ -1550,6 +1601,8 @@
AppendInstruction(aset);
}
latest_result_ = object;
+
+ return object;
}
template <typename T>
@@ -2534,10 +2587,12 @@
}
case Instruction::NEW_INSTANCE: {
- if (!BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc)) {
- return false;
- }
+ HNewInstance* new_instance =
+ BuildNewInstance(dex::TypeIndex(instruction.VRegB_21c()), dex_pc);
+ DCHECK(new_instance != nullptr);
+
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_instance);
break;
}
@@ -2545,8 +2600,11 @@
dex::TypeIndex type_index(instruction.VRegC_22c());
HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- AppendInstruction(new (arena_) HNewArray(cls, length, dex_pc));
+
+ HNewArray* new_array = new (arena_) HNewArray(cls, length, dex_pc);
+ AppendInstruction(new_array);
UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2555,7 +2613,13 @@
dex::TypeIndex type_index(instruction.VRegB_35c());
uint32_t args[5];
instruction.GetVarArgs(args);
- BuildFilledNewArray(dex_pc, type_index, number_of_vreg_arguments, false, args, 0);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ false,
+ args,
+ /* register_index */ 0);
+ BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2563,8 +2627,13 @@
uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
dex::TypeIndex type_index(instruction.VRegB_3rc());
uint32_t register_index = instruction.VRegC_3rc();
- BuildFilledNewArray(
- dex_pc, type_index, number_of_vreg_arguments, true, nullptr, register_index);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc,
+ type_index,
+ number_of_vreg_arguments,
+ /* is_range */ true,
+ /* args*/ nullptr,
+ register_index);
+ BuildConstructorFenceForAllocation(new_array);
break;
}