diff options
author | 2017-09-08 16:16:46 +0000 | |
---|---|---|
committer | 2017-09-08 16:16:46 +0000 | |
commit | 86ce50481f91e3be2e5f2686e18e11babde721ae (patch) | |
tree | 3179155108b883d8c0c4bb3dbac0b70db0d7f698 /compiler/optimizing | |
parent | 88f929ce72a5eabdb1ae1b32e5ec157d0e9f1ef8 (diff) | |
parent | dd018df8a00e841fe38fabe38520b7d297a885c1 (diff) |
Merge "optimizing: add block-scoped constructor fence merging pass"
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_sinking.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/constructor_fence_redundancy_elimination.cc | 261 | ||||
-rw-r--r-- | compiler/optimizing/constructor_fence_redundancy_elimination.h | 64 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 53 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 2 |
7 files changed, 398 insertions, 7 deletions
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 6c3a9fd6b5..b558eb17a7 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -64,6 +64,11 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // A fence with "0" inputs is dead and should've been removed in a prior pass. DCHECK_NE(0u, ctor_fence->InputCount()); + // TODO: this should be simplified to 'return true' since it's + // potentially pessimizing any code sinking for inlined constructors with final fields. + // TODO: double check that if the final field assignments are not moved, + // then the fence is not moved either. + return ctor_fence->GetAssociatedAllocation() != nullptr; } diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc new file mode 100644 index 0000000000..ff7ce60905 --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constructor_fence_redundancy_elimination.h" + +#include "base/arena_allocator.h" + +namespace art { + +static constexpr bool kCfreLogFenceInputCount = false; + +// TODO: refactor this code by reusing escape analysis. +class CFREVisitor : public HGraphVisitor { + public: + CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArena()->GetArenaPool()), + candidate_fences_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // Visit all instructions in block. + HGraphVisitor::VisitBasicBlock(block); + + // If there were any unmerged fences left, merge them together, + // the objects are considered 'published' at the end of the block. + MergeCandidateFences(); + } + + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE { + candidate_fences_.push_back(constructor_fence); + + for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { + candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + } + } + + void VisitBoundType(HBoundType* bound_type) OVERRIDE { + VisitAlias(bound_type); + } + + void VisitNullCheck(HNullCheck* null_check) OVERRIDE { + VisitAlias(null_check); + } + + void VisitSelect(HSelect* select) OVERRIDE { + VisitAlias(select); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(2); + VisitSetLocation(instruction, value); + } + + void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) { + // Pessimize: Merge all fences. + MergeCandidateFences(); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + HandleInvoke(clinit); + } + + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + private: + void HandleInvoke(HInstruction* invoke) { + // An object is considered "published" if it escapes into an invoke as any of the parameters. + if (HasInterestingPublishTargetAsInput(invoke)) { + MergeCandidateFences(); + } + } + + // Called by any instruction visitor that may create an alias. + // These instructions may create an alias: + // - BoundType + // - NullCheck + // - Select + // + // These also create an alias, but are not handled by this function: + // - Phi: propagates values across blocks, but we always merge at the end of a block. + // - Invoke: this is handled by HandleInvoke. + void VisitAlias(HInstruction* aliasing_inst) { + // An object is considered "published" if it becomes aliased by other instructions. + if (HasInterestingPublishTargetAsInput(aliasing_inst)) { + // Note that constructing a "NullCheck" for new-instance, new-array, + // or a 'this' (receiver) reference is impossible. + // + // If by some reason we actually encounter such a NullCheck(FenceTarget), + // we LOG(WARNING). + if (UNLIKELY(aliasing_inst->IsNullCheck())) { + LOG(kIsDebugBuild ? FATAL : WARNING) + << "Unexpected instruction: NullCheck; should not be legal in graph"; + // We then do a best-effort to handle this case. + } + MergeCandidateFences(); + } + } + + void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) { + // An object is considered "published" if it's stored onto the heap. + // Sidenote: A later "LSE" pass can still remove the fence if it proves the + // object doesn't actually escape. + if (IsInterestingPublishTarget(store_input)) { + // Merge all constructor fences that we've seen since + // the last interesting store (or since the beginning). + MergeCandidateFences(); + } + } + + bool HasInterestingPublishTargetAsInput(HInstruction* inst) { + for (size_t input_count = 0; input_count < inst->InputCount(); ++input_count) { + if (IsInterestingPublishTarget(inst->InputAt(input_count))) { + return true; + } + } + + return false; + } + + // Merges all the existing fences we've seen so far into the last-most fence. + // + // This resets the list of candidate fences and their targets back to {}. + void MergeCandidateFences() { + if (candidate_fences_.empty()) { + // Nothing to do, need 1+ fences to merge. + return; + } + + // The merge target is always the "last" candidate fence. + HConstructorFence* merge_target = candidate_fences_[candidate_fences_.size() - 1]; + + for (HConstructorFence* fence : candidate_fences_) { + MaybeMerge(merge_target, fence); + } + + if (kCfreLogFenceInputCount) { + LOG(INFO) << "CFRE-MergeCandidateFences: Post-merge fence input count " + << merge_target->InputCount(); + } + + // Each merge acts as a cut-off point. The optimization is reset completely. + // In theory, we could push the fence as far as its publish, but in practice + // there is no benefit to this extra complexity unless we also reordered + // the stores to come later. + candidate_fences_.clear(); + candidate_fence_targets_.Clear(); + } + + // A publishing 'store' is only interesting if the value being stored + // is one of the fence `targets` in `candidate_fences`. + bool IsInterestingPublishTarget(HInstruction* store_input) const { + return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + } + + void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { + if (target == src) { + return; // Don't merge a fence into itself. + // This is mostly for stats-purposes, we don't want to count merge(x,x) + // as removing a fence because it's a no-op. + } + + target->Merge(src); + + MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedCFRE); + } + + // Phase-local heap memory allocator for CFRE optimizer. Storage obtained + // through this allocator is immediately released when the CFRE optimizer is done. + ArenaAllocator scoped_allocator_; + + // Set of constructor fences that we've seen in the current block. + // Each constructor fences acts as a guard for one or more `targets`. + // There exist no stores to any `targets` between any of these fences. + // + // Fences are in succession order (e.g. fence[i] succeeds fence[i-1] + // within the same basic block). + ArenaVector<HConstructorFence*> candidate_fences_; + + // Stores a set of the fence targets, to allow faster lookup of whether + // a detected publish is a target of one of the candidate fences. + ArenaHashSet<HInstruction*> candidate_fence_targets_; + + // Used to record stats about the optimization. + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(CFREVisitor); +}; + +void ConstructorFenceRedundancyElimination::Run() { + CFREVisitor cfre_visitor(graph_, stats_); + + // Arbitrarily visit in reverse-post order. + // The exact block visit order does not matter, as the algorithm + // only operates on a single block at a time. + cfre_visitor.VisitReversePostOrder(); +} + +} // namespace art diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h new file mode 100644 index 0000000000..d89210cd1c --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ + +#include "optimization.h" + +namespace art { + +/* + * Constructor Fence Redundancy Elimination (CFRE). + * + * A local optimization pass that merges redundant constructor fences + * together within the same basic block. + * + * Abbreviations: + * - CF: Constructor Fence + * - CFS: Constructor Fence Set + * - CFTargets: The unique set of the inputs of all the instructions in CFS. + * + * Given any CFS = { CF(x), CF(y), CF(z), ... }, define CFTargets = { x, y, z, ... }. + * - Publish(R) must not exist for any R in CFTargets if this Publish(R) is between any CF in CFS. + * - This type of Publish(R) is called an "interesting publish". + * + * A Publish(R) is considered any instruction at which the reference to "R" + * may escape (e.g. invoke, store, return, etc) to another thread. + * + * Starting at the beginning of the block: + * - Find the largest contiguous CFS. + * - If we see an interesting publish, merge all instructions in CFS into a single CF(CFTargets). + * - Repeat until the block is fully visited. + * - At the end of the block, merge all instructions in CFS into a single CF(CFTargets). + */ +class ConstructorFenceRedundancyElimination : public HOptimization { + public: + ConstructorFenceRedundancyElimination(HGraph* graph, + OptimizingCompilerStats* stats) + : HOptimization(graph, kPassName, stats) {} + + void Run() OVERRIDE; + + static constexpr const char* kPassName = "constructor_fence_redundancy_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(ConstructorFenceRedundancyElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ebbea27e08..217a8f29a8 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1289,18 +1289,59 @@ size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { return remove_count; } -HInstruction* HConstructorFence::GetAssociatedAllocation() { +void HConstructorFence::Merge(HConstructorFence* other) { + // Do not delete yourself from the graph. + DCHECK(this != other); + // Don't try to merge with an instruction not associated with a block. + DCHECK(other->GetBlock() != nullptr); + // A constructor fence's return type is "kPrimVoid" + // and therefore it cannot have any environment uses. + DCHECK(!other->HasEnvironmentUses()); + + auto has_input = [](HInstruction* haystack, HInstruction* needle) { + // Check if `haystack` has `needle` as any of its inputs. + for (size_t input_count = 0; input_count < haystack->InputCount(); ++input_count) { + if (haystack->InputAt(input_count) == needle) { + return true; + } + } + return false; + }; + + // Add any inputs from `other` into `this` if it wasn't already an input. + for (size_t input_count = 0; input_count < other->InputCount(); ++input_count) { + HInstruction* other_input = other->InputAt(input_count); + if (!has_input(this, other_input)) { + AddInput(other_input); + } + } + + other->GetBlock()->RemoveInstruction(other); +} + +HInstruction* HConstructorFence::GetAssociatedAllocation(bool ignore_inputs) { HInstruction* new_instance_inst = GetPrevious(); // Check if the immediately preceding instruction is a new-instance/new-array. // Otherwise this fence is for protecting final fields. if (new_instance_inst != nullptr && (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) { - // TODO: Need to update this code to handle multiple inputs. - DCHECK_EQ(InputCount(), 1u); - return new_instance_inst; - } else { - return nullptr; + if (ignore_inputs) { + // If inputs are ignored, simply check if the predecessor is + // *any* HNewInstance/HNewArray. + // + // Inputs are normally only ignored for prepare_for_register_allocation, + // at which point *any* prior HNewInstance/Array can be considered + // associated. + return new_instance_inst; + } else { + // Normal case: There must be exactly 1 input and the previous instruction + // must be that input. + if (InputCount() == 1u && InputAt(0) == new_instance_inst) { + return new_instance_inst; + } + } } + return nullptr; } #define DEFINE_ACCEPT(name, super) \ diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 93677f66a8..6bf53f7147 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -6643,13 +6643,24 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // Returns how many HConstructorFence instructions were removed from graph. static size_t RemoveConstructorFences(HInstruction* instruction); + // Combine all inputs of `this` and `other` instruction and remove + // `other` from the graph. + // + // Inputs are unique after the merge. + // + // Requirement: `this` must not be the same as `other. + void Merge(HConstructorFence* other); + // Check if this constructor fence is protecting // an HNewInstance or HNewArray that is also the immediate // predecessor of `this`. // + // If `ignore_inputs` is true, then the immediate predecessor doesn't need + // to be one of the inputs of `this`. + // // Returns the associated HNewArray or HNewInstance, // or null otherwise. - HInstruction* GetAssociatedAllocation(); + HInstruction* GetAssociatedAllocation(bool ignore_inputs = false); DECLARE_INSTRUCTION(ConstructorFence); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 71d91ae38f..399cd98983 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -53,6 +53,7 @@ #include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" +#include "constructor_fence_redundancy_elimination.h" #include "dead_code_elimination.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" @@ -514,6 +515,8 @@ static HOptimization* BuildOptimization( return new (arena) CHAGuardOptimization(graph); } else if (opt_name == CodeSinking::kCodeSinkingPassName) { return new (arena) CodeSinking(graph, stats); + } else if (opt_name == ConstructorFenceRedundancyElimination::kPassName) { + return new (arena) ConstructorFenceRedundancyElimination(graph, stats); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { return new (arena) arm::InstructionSimplifierArm(graph, stats); @@ -784,6 +787,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); + ConstructorFenceRedundancyElimination* cfre = + new (arena) ConstructorFenceRedundancyElimination(graph, stats); HOptimization* optimizations1[] = { intrinsics, @@ -821,6 +826,8 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. simplify4, + cfre, // Eliminate constructor fences after code sinking to avoid + // complicated sinking logic to split a fence with many inputs. }; RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index ff49056798..07f9635aba 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -93,6 +93,7 @@ enum MethodCompilationStat { kConstructorFenceGeneratedFinal, kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, + kConstructorFenceRemovedCFRE, kLastStat }; @@ -215,6 +216,7 @@ class OptimizingCompilerStats { case kConstructorFenceGeneratedFinal: name = "ConstructorFenceGeneratedFinal"; break; case kConstructorFenceRemovedLSE: name = "ConstructorFenceRemovedLSE"; break; case kConstructorFenceRemovedPFRA: name = "ConstructorFenceRemovedPFRA"; break; + case kConstructorFenceRemovedCFRE: name = "ConstructorFenceRemovedCFRE"; break; case kLastStat: LOG(FATAL) << "invalid stat " |