optimizing: add block-scoped constructor fence merging pass
Introduce a new "Constructor Fence Redundancy Elimination" pass.
The pass currently performs local optimization only, i.e. within instructions
in the same basic block.
All constructor fences preceding a publish (e.g. store, invoke) get
merged into one instruction.
==============
OptStat#ConstructorFenceGeneratedNew: 43825
OptStat#ConstructorFenceGeneratedFinal: 17631 <+++
OptStat#ConstructorFenceRemovedLSE: 164
OptStat#ConstructorFenceRemovedPFRA: 9391
OptStat#ConstructorFenceRemovedCFRE: 16133 <---
Removes ~91.5% of the 'final' constructor fences in RitzBenchmark:
(We do not distinguish the exact reason that a fence was created, so
it's possible some "new" fences were also removed.)
==============
Test: art/test/run-test --host --optimizing 476-checker-ctor-fence-redun-elim
Bug: 36656456
Change-Id: I8020217b448ad96ce9b7640aa312ae784690ad99
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 6c3a9fd..b558eb1 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -64,6 +64,11 @@
// A fence with "0" inputs is dead and should've been removed in a prior pass.
DCHECK_NE(0u, ctor_fence->InputCount());
+ // TODO: this should be simplified to 'return true' since it's
+ // potentially pessimizing any code sinking for inlined constructors with final fields.
+ // TODO: double check that if the final field assignments are not moved,
+ // then the fence is not moved either.
+
return ctor_fence->GetAssociatedAllocation() != nullptr;
}
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
new file mode 100644
index 0000000..ff7ce60
--- /dev/null
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "constructor_fence_redundancy_elimination.h"
+
+#include "base/arena_allocator.h"
+
+namespace art {
+
+static constexpr bool kCfreLogFenceInputCount = false;
+
+// TODO: refactor this code by reusing escape analysis.
+class CFREVisitor : public HGraphVisitor {
+ public:
+ CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ scoped_allocator_(graph->GetArena()->GetArenaPool()),
+ candidate_fences_(scoped_allocator_.Adapter(kArenaAllocCFRE)),
+ candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)),
+ stats_(stats) {}
+
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ // Visit all instructions in block.
+ HGraphVisitor::VisitBasicBlock(block);
+
+ // If there were any unmerged fences left, merge them together,
+ // the objects are considered 'published' at the end of the block.
+ MergeCandidateFences();
+ }
+
+ void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE {
+ candidate_fences_.push_back(constructor_fence);
+
+ for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) {
+ candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx));
+ }
+ }
+
+ void VisitBoundType(HBoundType* bound_type) OVERRIDE {
+ VisitAlias(bound_type);
+ }
+
+ void VisitNullCheck(HNullCheck* null_check) OVERRIDE {
+ VisitAlias(null_check);
+ }
+
+ void VisitSelect(HSelect* select) OVERRIDE {
+ VisitAlias(select);
+ }
+
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(1);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(1);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitArraySet(HArraySet* instruction) OVERRIDE {
+ HInstruction* value = instruction->InputAt(2);
+ VisitSetLocation(instruction, value);
+ }
+
+ void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) {
+ // Pessimize: Merge all fences.
+ MergeCandidateFences();
+ }
+
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE {
+ HandleInvoke(invoke);
+ }
+
+ void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE {
+ HandleInvoke(clinit);
+ }
+
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE {
+ // Conservatively treat it as an invocation.
+ HandleInvoke(instruction);
+ }
+
+ private:
+ void HandleInvoke(HInstruction* invoke) {
+ // An object is considered "published" if it escapes into an invoke as any of the parameters.
+ if (HasInterestingPublishTargetAsInput(invoke)) {
+ MergeCandidateFences();
+ }
+ }
+
+ // Called by any instruction visitor that may create an alias.
+ // These instructions may create an alias:
+ // - BoundType
+ // - NullCheck
+ // - Select
+ //
+ // These also create an alias, but are not handled by this function:
+ // - Phi: propagates values across blocks, but we always merge at the end of a block.
+ // - Invoke: this is handled by HandleInvoke.
+ void VisitAlias(HInstruction* aliasing_inst) {
+ // An object is considered "published" if it becomes aliased by other instructions.
+ if (HasInterestingPublishTargetAsInput(aliasing_inst)) {
+ // Note that constructing a "NullCheck" for new-instance, new-array,
+ // or a 'this' (receiver) reference is impossible.
+ //
+ // If by some reason we actually encounter such a NullCheck(FenceTarget),
+ // we LOG(WARNING).
+ if (UNLIKELY(aliasing_inst->IsNullCheck())) {
+ LOG(kIsDebugBuild ? FATAL : WARNING)
+ << "Unexpected instruction: NullCheck; should not be legal in graph";
+ // We then do a best-effort to handle this case.
+ }
+ MergeCandidateFences();
+ }
+ }
+
+ void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) {
+ // An object is considered "published" if it's stored onto the heap.
+ // Sidenote: A later "LSE" pass can still remove the fence if it proves the
+ // object doesn't actually escape.
+ if (IsInterestingPublishTarget(store_input)) {
+ // Merge all constructor fences that we've seen since
+ // the last interesting store (or since the beginning).
+ MergeCandidateFences();
+ }
+ }
+
+ bool HasInterestingPublishTargetAsInput(HInstruction* inst) {
+ for (size_t input_count = 0; input_count < inst->InputCount(); ++input_count) {
+ if (IsInterestingPublishTarget(inst->InputAt(input_count))) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ // Merges all the existing fences we've seen so far into the last-most fence.
+ //
+ // This resets the list of candidate fences and their targets back to {}.
+ void MergeCandidateFences() {
+ if (candidate_fences_.empty()) {
+ // Nothing to do, need 1+ fences to merge.
+ return;
+ }
+
+ // The merge target is always the "last" candidate fence.
+ HConstructorFence* merge_target = candidate_fences_[candidate_fences_.size() - 1];
+
+ for (HConstructorFence* fence : candidate_fences_) {
+ MaybeMerge(merge_target, fence);
+ }
+
+ if (kCfreLogFenceInputCount) {
+ LOG(INFO) << "CFRE-MergeCandidateFences: Post-merge fence input count "
+ << merge_target->InputCount();
+ }
+
+ // Each merge acts as a cut-off point. The optimization is reset completely.
+ // In theory, we could push the fence as far as its publish, but in practice
+ // there is no benefit to this extra complexity unless we also reordered
+ // the stores to come later.
+ candidate_fences_.clear();
+ candidate_fence_targets_.Clear();
+ }
+
+ // A publishing 'store' is only interesting if the value being stored
+ // is one of the fence `targets` in `candidate_fences`.
+ bool IsInterestingPublishTarget(HInstruction* store_input) const {
+ return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end();
+ }
+
+ void MaybeMerge(HConstructorFence* target, HConstructorFence* src) {
+ if (target == src) {
+ return; // Don't merge a fence into itself.
+ // This is mostly for stats-purposes, we don't want to count merge(x,x)
+ // as removing a fence because it's a no-op.
+ }
+
+ target->Merge(src);
+
+ MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedCFRE);
+ }
+
+ // Phase-local heap memory allocator for CFRE optimizer. Storage obtained
+ // through this allocator is immediately released when the CFRE optimizer is done.
+ ArenaAllocator scoped_allocator_;
+
+ // Set of constructor fences that we've seen in the current block.
+ // Each constructor fences acts as a guard for one or more `targets`.
+ // There exist no stores to any `targets` between any of these fences.
+ //
+ // Fences are in succession order (e.g. fence[i] succeeds fence[i-1]
+ // within the same basic block).
+ ArenaVector<HConstructorFence*> candidate_fences_;
+
+ // Stores a set of the fence targets, to allow faster lookup of whether
+ // a detected publish is a target of one of the candidate fences.
+ ArenaHashSet<HInstruction*> candidate_fence_targets_;
+
+ // Used to record stats about the optimization.
+ OptimizingCompilerStats* const stats_;
+
+ DISALLOW_COPY_AND_ASSIGN(CFREVisitor);
+};
+
+void ConstructorFenceRedundancyElimination::Run() {
+ CFREVisitor cfre_visitor(graph_, stats_);
+
+ // Arbitrarily visit in reverse-post order.
+ // The exact block visit order does not matter, as the algorithm
+ // only operates on a single block at a time.
+ cfre_visitor.VisitReversePostOrder();
+}
+
+} // namespace art
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h
new file mode 100644
index 0000000..d89210c
--- /dev/null
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+/*
+ * Constructor Fence Redundancy Elimination (CFRE).
+ *
+ * A local optimization pass that merges redundant constructor fences
+ * together within the same basic block.
+ *
+ * Abbreviations:
+ * - CF: Constructor Fence
+ * - CFS: Constructor Fence Set
+ * - CFTargets: The unique set of the inputs of all the instructions in CFS.
+ *
+ * Given any CFS = { CF(x), CF(y), CF(z), ... }, define CFTargets = { x, y, z, ... }.
+ * - Publish(R) must not exist for any R in CFTargets if this Publish(R) is between any CF in CFS.
+ * - This type of Publish(R) is called an "interesting publish".
+ *
+ * A Publish(R) is considered any instruction at which the reference to "R"
+ * may escape (e.g. invoke, store, return, etc) to another thread.
+ *
+ * Starting at the beginning of the block:
+ * - Find the largest contiguous CFS.
+ * - If we see an interesting publish, merge all instructions in CFS into a single CF(CFTargets).
+ * - Repeat until the block is fully visited.
+ * - At the end of the block, merge all instructions in CFS into a single CF(CFTargets).
+ */
+class ConstructorFenceRedundancyElimination : public HOptimization {
+ public:
+ ConstructorFenceRedundancyElimination(HGraph* graph,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kPassName, stats) {}
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kPassName = "constructor_fence_redundancy_elimination";
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ConstructorFenceRedundancyElimination);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 8644f67..e34d4a2 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1271,18 +1271,59 @@
return remove_count;
}
-HInstruction* HConstructorFence::GetAssociatedAllocation() {
+void HConstructorFence::Merge(HConstructorFence* other) {
+ // Do not delete yourself from the graph.
+ DCHECK(this != other);
+ // Don't try to merge with an instruction not associated with a block.
+ DCHECK(other->GetBlock() != nullptr);
+ // A constructor fence's return type is "kPrimVoid"
+ // and therefore it cannot have any environment uses.
+ DCHECK(!other->HasEnvironmentUses());
+
+ auto has_input = [](HInstruction* haystack, HInstruction* needle) {
+ // Check if `haystack` has `needle` as any of its inputs.
+ for (size_t input_count = 0; input_count < haystack->InputCount(); ++input_count) {
+ if (haystack->InputAt(input_count) == needle) {
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Add any inputs from `other` into `this` if it wasn't already an input.
+ for (size_t input_count = 0; input_count < other->InputCount(); ++input_count) {
+ HInstruction* other_input = other->InputAt(input_count);
+ if (!has_input(this, other_input)) {
+ AddInput(other_input);
+ }
+ }
+
+ other->GetBlock()->RemoveInstruction(other);
+}
+
+HInstruction* HConstructorFence::GetAssociatedAllocation(bool ignore_inputs) {
HInstruction* new_instance_inst = GetPrevious();
// Check if the immediately preceding instruction is a new-instance/new-array.
// Otherwise this fence is for protecting final fields.
if (new_instance_inst != nullptr &&
(new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) {
- // TODO: Need to update this code to handle multiple inputs.
- DCHECK_EQ(InputCount(), 1u);
- return new_instance_inst;
- } else {
- return nullptr;
+ if (ignore_inputs) {
+ // If inputs are ignored, simply check if the predecessor is
+ // *any* HNewInstance/HNewArray.
+ //
+ // Inputs are normally only ignored for prepare_for_register_allocation,
+ // at which point *any* prior HNewInstance/Array can be considered
+ // associated.
+ return new_instance_inst;
+ } else {
+ // Normal case: There must be exactly 1 input and the previous instruction
+ // must be that input.
+ if (InputCount() == 1u && InputAt(0) == new_instance_inst) {
+ return new_instance_inst;
+ }
+ }
}
+ return nullptr;
}
#define DEFINE_ACCEPT(name, super) \
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 29be8ac..3e4928b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -6634,13 +6634,24 @@
// Returns how many HConstructorFence instructions were removed from graph.
static size_t RemoveConstructorFences(HInstruction* instruction);
+ // Combine all inputs of `this` and `other` instruction and remove
+ // `other` from the graph.
+ //
+ // Inputs are unique after the merge.
+ //
+ // Requirement: `this` must not be the same as `other.
+ void Merge(HConstructorFence* other);
+
// Check if this constructor fence is protecting
// an HNewInstance or HNewArray that is also the immediate
// predecessor of `this`.
//
+ // If `ignore_inputs` is true, then the immediate predecessor doesn't need
+ // to be one of the inputs of `this`.
+ //
// Returns the associated HNewArray or HNewInstance,
// or null otherwise.
- HInstruction* GetAssociatedAllocation();
+ HInstruction* GetAssociatedAllocation(bool ignore_inputs = false);
DECLARE_INSTRUCTION(ConstructorFence);
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 435ca1c..b45f3c6 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -55,6 +55,7 @@
#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
+#include "constructor_fence_redundancy_elimination.h"
#include "dead_code_elimination.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
@@ -516,6 +517,8 @@
return new (arena) CHAGuardOptimization(graph);
} else if (opt_name == CodeSinking::kCodeSinkingPassName) {
return new (arena) CodeSinking(graph, stats);
+ } else if (opt_name == ConstructorFenceRedundancyElimination::kPassName) {
+ return new (arena) ConstructorFenceRedundancyElimination(graph, stats);
#ifdef ART_ENABLE_CODEGEN_arm
} else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
return new (arena) arm::InstructionSimplifierArm(graph, stats);
@@ -786,6 +789,8 @@
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
+ ConstructorFenceRedundancyElimination* cfre =
+ new (arena) ConstructorFenceRedundancyElimination(graph, stats);
HOptimization* optimizations1[] = {
intrinsics,
@@ -823,6 +828,8 @@
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
simplify4,
+ cfre, // Eliminate constructor fences after code sinking to avoid
+ // complicated sinking logic to split a fence with many inputs.
};
RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index d6da73c..af7ab2f 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -91,6 +91,7 @@
kConstructorFenceGeneratedFinal,
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
+ kConstructorFenceRemovedCFRE,
kLastStat
};
@@ -211,6 +212,7 @@
case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break;
case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break;
case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break;
+ case kConstructorFenceRemovedCFRE: name = "ConstructorFenceRemovedCFRE"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "