diff options
author | 2016-01-14 21:25:16 +0000 | |
---|---|---|
committer | 2016-01-14 21:25:16 +0000 | |
commit | 947cb4f5582d1f57270b48d3c47ea95e7f9085b5 (patch) | |
tree | 6f6aed8f8cca3177b06521a8db6ca845d18623ad /compiler/optimizing | |
parent | 7b4199a5fa9f151fbf3af2a34f26d04215a1016c (diff) | |
parent | 15bd22849ee6a1ffb3fb3630f686c2870bdf1bbc (diff) |
Merge "Implement irreducible loop support in optimizing."
Diffstat (limited to 'compiler/optimizing')
26 files changed, 359 insertions, 186 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d710747e76..eee6116098 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1296,8 +1296,13 @@ class BCEVisitor : public HGraphVisitor { */ bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) { if (loop != nullptr) { + // The loop preheader of an irreducible loop does not dominate all the blocks in + // the loop. We would need to find the common dominator of all blocks in the loop. + if (loop->IsIrreducible()) { + return false; + } // A try boundary preheader is hard to handle. - // TODO: remove this restriction + // TODO: remove this restriction. if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) { return false; } diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index dbeb1ccc22..b7c24ff207 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -42,7 +42,6 @@ class BoundsCheckEliminationTest : public testing::Test { void RunBCE() { graph_->BuildDominatorTree(); - graph_->AnalyzeNaturalLoops(); InstructionSimplifier(graph_).Run(); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 45520b45bf..d64b8784e1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -6225,8 +6225,16 @@ void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) { - if (desired_dispatch_info.code_ptr_location == - HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + if (GetGraph()->HasIrreducibleLoops() && + (dispatch_info.method_load_kind == + HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + } + + if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { const DexFile& outer_dex_file = GetGraph()->GetDexFile(); if (&outer_dex_file != target_method.dex_file) { // Calls across dex files are more likely to exceed the available BL range, @@ -6237,14 +6245,14 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, + dispatch_info.method_load_kind, code_ptr_location, - desired_dispatch_info.method_load_data, + dispatch_info.method_load_data, 0u }; } } - return desired_dispatch_info; + return dispatch_info; } Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c24d25876c..6ab3aaff4b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4187,19 +4187,28 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - switch (desired_dispatch_info.code_ptr_location) { + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + if (GetGraph()->HasIrreducibleLoops() && + (dispatch_info.method_load_kind == + HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + } + switch (dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: // For direct code, we actually prefer to call via the code pointer from ArtMethod*. // (Though the direct CALL ptr16:32 is available for consideration). return HInvokeStaticOrDirect::DispatchInfo { - desired_dispatch_info.method_load_kind, + dispatch_info.method_load_kind, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - desired_dispatch_info.method_load_data, + dispatch_info.method_load_data, 0u }; default: - return desired_dispatch_info; + return dispatch_info; } } diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 67ff87a759..86a695b152 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -81,12 +81,6 @@ static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { } } -static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) { - for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) { - set->SetBit(it.Current()->GetHeader()->GetBlockId()); - } -} - void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, @@ -98,36 +92,45 @@ void HDeadCodeElimination::RemoveDeadBlocks() { // Classify blocks as reachable/unreachable. ArenaAllocator* allocator = graph_->GetArena(); ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false); - ArenaBitVector affected_loops(allocator, graph_->GetBlocks().size(), false); MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; + // If the graph has irreducible loops we need to reset all graph analysis we have done + // before: the irreducible loop can be turned into a reducible one. + // For simplicity, we do the full computation regardless of the type of the loops. + bool rerun_dominance_and_loop_analysis = false; // Remove all dead blocks. Iterate in post order because removal needs the // block's chain of dominators and nested loops need to be updated from the // inside out. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); + if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { + rerun_dominance_and_loop_analysis = true; + } int id = block->GetBlockId(); - if (live_blocks.IsBitSet(id)) { - if (affected_loops.IsBitSet(id)) { - DCHECK(block->IsLoopHeader()); - block->GetLoopInformation()->Update(); - } - } else { + if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); - MarkLoopHeadersContaining(*block, &affected_loops); block->DisconnectAndDelete(); removed_one_or_more_blocks = true; + if (block->IsInLoop()) { + rerun_dominance_and_loop_analysis = true; + } } } // If we removed at least one block, we need to recompute the full // dominator tree and try block membership. if (removed_one_or_more_blocks) { - graph_->ClearDominanceInformation(); - graph_->ComputeDominanceInformation(); - graph_->ComputeTryBlockInformation(); + if (rerun_dominance_and_loop_analysis) { + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + } else { + graph_->ClearDominanceInformation(); + graph_->ComputeDominanceInformation(); + graph_->ComputeTryBlockInformation(); + } } // Connect successive blocks created by dead branches. Order does not matter. diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 65820630f8..3db254a004 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -83,6 +83,11 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { }; void DexCacheArrayFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } DexCacheArrayFixupsVisitor visitor(graph_); visitor.VisitInsertionOrder(); visitor.MoveBasesIfNeeded(); diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 9b0eb70742..4770fa2eca 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -33,7 +33,6 @@ static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); graph->BuildDominatorTree(); - graph->AnalyzeNaturalLoops(); return graph; } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 6d0bdbe19b..d60f3e31a5 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -391,6 +391,15 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { } } + // Ensure dominated blocks have `block` as the dominator. + for (HBasicBlock* dominated : block->GetDominatedBlocks()) { + if (dominated->GetDominator() != block) { + AddError(StringPrintf("Block %d should be the dominator of %d.", + block->GetBlockId(), + dominated->GetBlockId())); + } + } + // Ensure there is no critical edge (i.e., an edge connecting a // block with multiple successors to a block with multiple // predecessors). Exceptional edges are synthesized and hence @@ -467,13 +476,7 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); - // Ensure the pre-header block is first in the list of predecessors of a loop - // header and that the header block is its only successor. - if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { - AddError(StringPrintf( - "Loop pre-header is not the first predecessor of the loop header %d.", - id)); - } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) { + if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) { AddError(StringPrintf( "Loop pre-header %d of loop defined by header %d has %zu successors.", loop_information->GetPreHeader()->GetBlockId(), @@ -532,30 +535,39 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { } } - // Ensure all blocks in the loop are live and dominated by the loop header. + // If this is a nested loop, ensure the outer loops contain a superset of the blocks. + for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) { + HLoopInformation* outer_info = it.Current(); + if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { + AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " + "an outer loop defined by header %d.", + id, + outer_info->GetHeader()->GetBlockId())); + } + } + + // Ensure the pre-header block is first in the list of predecessors of a loop + // header and that the header block is its only successor. + if (!loop_header->IsLoopPreHeaderFirstPredecessor()) { + AddError(StringPrintf( + "Loop pre-header is not the first predecessor of the loop header %d.", + id)); + } + + // Ensure all blocks in the loop are live and dominated by the loop header in + // the case of natural loops. for (uint32_t i : loop_blocks.Indexes()) { HBasicBlock* loop_block = GetGraph()->GetBlocks()[i]; if (loop_block == nullptr) { AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.", id, i)); - } else if (!loop_header->Dominates(loop_block)) { + } else if (!loop_information->IsIrreducible() && !loop_header->Dominates(loop_block)) { AddError(StringPrintf("Loop block %d not dominated by loop header %d.", i, id)); } } - - // If this is a nested loop, ensure the outer loops contain a superset of the blocks. - for (HLoopInformationOutwardIterator it(*loop_header); !it.Done(); it.Advance()) { - HLoopInformation* outer_info = it.Current(); - if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { - AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " - "an outer loop defined by header %d.", - id, - outer_info->GetHeader()->GetBlockId())); - } - } } void SSAChecker::VisitInstruction(HInstruction* instruction) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5f1328f545..32c3a925e0 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -486,7 +486,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("is_low") << interval->IsLowInterval(); StartAttributeStream("is_high") << interval->IsHighInterval(); } - } else if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) { + } + + if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) { StartAttributeStream("liveness") << instruction->GetLifetimePosition(); LocationSummary* locations = instruction->GetLocations(); if (locations != nullptr) { @@ -498,15 +500,29 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { attr << inputs << "->"; DumpLocation(attr, locations->Out()); } - } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName) - || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) { + } + + if (IsPass(LICM::kLoopInvariantCodeMotionPassName) + || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName) + || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName) + || IsPass(SsaBuilder::kSsaBuilderPassName)) { HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { StartAttributeStream("loop") << "none"; } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); + HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation(); + if (outer != nullptr) { + StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId(); + } else { + StartAttributeStream("outer_loop") << "none"; + } + StartAttributeStream("irreducible") + << std::boolalpha << info->IsIrreducible() << std::noboolalpha; } - } else if ((IsPass(SsaBuilder::kSsaBuilderPassName) + } + + if ((IsPass(SsaBuilder::kSsaBuilderPassName) || IsPass(HInliner::kInlinerPassName)) && (instruction->GetType() == Primitive::kPrimNot)) { ReferenceTypeInfo info = instruction->IsLoadClass() diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 4af111b784..b4922789d4 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -125,6 +125,14 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { }); } + void Clear() { + num_entries_ = 0; + for (size_t i = 0; i < num_buckets_; ++i) { + buckets_[i] = nullptr; + } + buckets_owned_.SetInitialBits(num_buckets_); + } + // Updates this set by intersecting with instructions in a predecessor's set. void IntersectWith(ValueSet* predecessor) { if (IsEmpty()) { @@ -191,14 +199,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { return clone_iterator; } - void Clear() { - num_entries_ = 0; - for (size_t i = 0; i < num_buckets_; ++i) { - buckets_[i] = nullptr; - } - buckets_owned_.SetInitialBits(num_buckets_); - } - // Iterates over buckets with impure instructions (even indices) and deletes // the ones on which 'cond' returns true. template<typename Functor> @@ -261,11 +261,14 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { } } - // Generates a hash code for an instruction. Pure instructions are put into - // odd buckets to speed up deletion. + // Generates a hash code for an instruction. size_t HashCode(HInstruction* instruction) const { size_t hash_code = instruction->ComputeHashCode(); - if (instruction->GetSideEffects().HasDependencies()) { + // Pure instructions are put into odd buckets to speed up deletion. Note that in the + // case of irreducible loops, we don't put pure instructions in odd buckets, as we + // need to delete them when entering the loop. + if (instruction->GetSideEffects().HasDependencies() || + instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { return (hash_code << 1) | 0; } else { return (hash_code << 1) | 1; @@ -360,8 +363,14 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { } if (!set->IsEmpty()) { if (block->IsLoopHeader()) { - DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); - set->Kill(side_effects_.GetLoopEffects(block)); + if (block->GetLoopInformation()->IsIrreducible()) { + // To satisfy our linear scan algorithm, no instruction should flow in an irreducible + // loop header. + set->Clear(); + } else { + DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); + set->Kill(side_effects_.GetLoopEffects(block)); + } } else if (predecessors.size() > 1) { for (HBasicBlock* predecessor : predecessors) { set->IntersectWith(sets_[predecessor->GetBlockId()]); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index eef6cef5f0..37f2d79536 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -76,7 +76,9 @@ void HInductionVarAnalysis::Run() { // range analysis on outer loop while visiting inner loops. for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { HBasicBlock* graph_block = it_graph.Current(); - if (graph_block->IsLoopHeader()) { + // Don't analyze irreducible loops. + // TODO(ajcbik): could/should we remove this restriction? + if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { VisitLoop(graph_block->GetLoopInformation()); } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 48d32999b7..293282edbb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -539,7 +539,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, return false; } - if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) { + if (callee_graph->TryBuildingSsa(handles_) != kAnalysisSuccess) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; return false; diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 02befc011a..a6b4078f46 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -94,6 +94,16 @@ void LICM::Run() { SideEffects loop_effects = side_effects_.GetLoopEffects(block); HBasicBlock* pre_header = loop_info->GetPreHeader(); + bool contains_irreducible_loop = false; + if (graph_->HasIrreducibleLoops()) { + for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { + if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) { + contains_irreducible_loop = true; + break; + } + } + } + for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { HBasicBlock* inner = it_loop.Current(); DCHECK(inner->IsInLoop()); @@ -104,6 +114,12 @@ void LICM::Run() { } visited.SetBit(inner->GetBlockId()); + if (contains_irreducible_loop) { + // We cannot licm in an irreducible loop, or in a natural loop containing an + // irreducible loop. + continue; + } + // We can move an instruction that can throw only if it is the first // throwing instruction in the loop. Note that the first potentially // throwing instruction encountered that is not hoisted stops this diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 2b313f6b81..c4492c8f92 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -582,9 +582,22 @@ class LSEVisitor : public HGraphVisitor { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + + // Don't eliminate loads in irreducible loops. This is safe for singletons, because + // they are always used by the non-eliminated loop-phi. + if (block->GetLoopInformation()->IsIrreducible()) { + if (kIsDebugBuild) { + for (size_t i = 0; i < heap_values.size(); i++) { + DCHECK_EQ(heap_values[i], kUnknownHeapValue); + } + } + return; + } + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ArenaVector<HInstruction*>& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; + // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 8de9700250..b80c6bde82 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "nodes.h" #include "code_generator.h" @@ -90,6 +89,7 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; + if (block == nullptr) continue; DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage"; for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); @@ -102,6 +102,7 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { for (size_t i = 0; i < blocks_.size(); ++i) { if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; + if (block == nullptr) continue; // We only need to update the successor, which might be live. for (HBasicBlock* successor : block->GetSuccessors()) { successor->RemovePredecessor(block); @@ -113,7 +114,7 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { } } -void HGraph::BuildDominatorTree() { +GraphAnalysisResult HGraph::BuildDominatorTree() { // (1) Simplify the CFG so that catch blocks have only exceptional incoming // edges. This invariant simplifies building SSA form because Phis cannot // collect both normal- and exceptional-flow values at the same time. @@ -130,7 +131,7 @@ void HGraph::BuildDominatorTree() { RemoveInstructionsAsUsersFromDeadBlocks(visited); // (4) Remove blocks not visited during the initial DFS. - // Step (4) requires dead blocks to be removed from the + // Step (5) requires dead blocks to be removed from the // predecessors list of live blocks. RemoveDeadBlocks(visited); @@ -140,6 +141,20 @@ void HGraph::BuildDominatorTree() { // (6) Compute the dominance information and the reverse post order. ComputeDominanceInformation(); + + // (7) Analyze loops discover through back edge analysis, and + // set the loop information on each block. + GraphAnalysisResult result = AnalyzeLoops(); + if (result != kAnalysisSuccess) { + return result; + } + + // (8) Precompute per-block try membership before entering the SSA builder, + // which needs the information to build catch block phis from values of + // locals at throwing instructions inside try blocks. + ComputeTryBlockInformation(); + + return kAnalysisSuccess; } void HGraph::ClearDominanceInformation() { @@ -149,6 +164,17 @@ void HGraph::ClearDominanceInformation() { reverse_post_order_.clear(); } +void HGraph::ClearLoopInformation() { + SetHasIrreducibleLoops(false); + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current->IsLoopHeader()) { + current->RemoveInstruction(current->GetLoopInformation()->GetSuspendCheck()); + } + current->SetLoopInformation(nullptr); + } +} + void HBasicBlock::ClearDominanceInformation() { dominated_blocks_.clear(); dominator_ = nullptr; @@ -190,31 +216,28 @@ void HGraph::ComputeDominanceInformation() { // dominator of the block. We can then start visiting its successors. if (++visits[successor->GetBlockId()] == successor->GetPredecessors().size() - successor->NumberOfBackEdges()) { - successor->GetDominator()->AddDominatedBlock(successor); reverse_post_order_.push_back(successor); worklist.push_back(successor); } } } -} -BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) { - BuildDominatorTree(); + // Populate `dominated_blocks_` information after computing all dominators. + // The potential presence of irreducible loops require to do it after. + for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (!block->IsEntryBlock()) { + block->GetDominator()->AddDominatedBlock(block); + } + } +} - // The SSA builder requires loops to all be natural. Specifically, the dead phi - // elimination phase checks the consistency of the graph when doing a post-order - // visit for eliminating dead phis: a dead phi can only have loop header phi - // users remaining when being visited. - BuildSsaResult result = AnalyzeNaturalLoops(); - if (result != kBuildSsaSuccess) { +GraphAnalysisResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) { + GraphAnalysisResult result = BuildDominatorTree(); + if (result != kAnalysisSuccess) { return result; } - // Precompute per-block try membership before entering the SSA builder, - // which needs the information to build catch block phis from values of - // locals at throwing instructions inside try blocks. - ComputeTryBlockInformation(); - // Create the inexact Object reference type and store it in the HGraph. ScopedObjectAccess soa(Thread::Current()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); @@ -224,12 +247,12 @@ BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) { // Tranforms graph to SSA form. result = SsaBuilder(this, handles).BuildSsa(); - if (result != kBuildSsaSuccess) { + if (result != kAnalysisSuccess) { return result; } in_ssa_form_ = true; - return kBuildSsaSuccess; + return kAnalysisSuccess; } HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) { @@ -331,7 +354,7 @@ void HGraph::SimplifyCatchBlocks() { // can be invalidated. We remember the initial size to avoid iterating over the new blocks. for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) { HBasicBlock* catch_block = blocks_[block_id]; - if (!catch_block->IsCatchBlock()) { + if (catch_block == nullptr || !catch_block->IsCatchBlock()) { continue; } @@ -438,7 +461,7 @@ void HGraph::SimplifyCFG() { } } -BuildSsaResult HGraph::AnalyzeNaturalLoops() const { +GraphAnalysisResult HGraph::AnalyzeLoops() const { // Order does not matter. for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -446,16 +469,26 @@ BuildSsaResult HGraph::AnalyzeNaturalLoops() const { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because // they only approximate the real control flow. Bail out for now. - return kBuildSsaFailThrowCatchLoop; - } - HLoopInformation* info = block->GetLoopInformation(); - if (!info->Populate()) { - // Abort if the loop is non natural. We currently bailout in such cases. - return kBuildSsaFailNonNaturalLoop; + return kAnalysisFailThrowCatchLoop; } + block->GetLoopInformation()->Populate(); } } - return kBuildSsaSuccess; + return kAnalysisSuccess; +} + +void HLoopInformation::Dump(std::ostream& os) { + os << "header: " << header_->GetBlockId() << std::endl; + os << "pre header: " << GetPreHeader()->GetBlockId() << std::endl; + for (HBasicBlock* block : back_edges_) { + os << "back edge: " << block->GetBlockId() << std::endl; + } + for (HBasicBlock* block : header_->GetPredecessors()) { + os << "predecessor: " << block->GetBlockId() << std::endl; + } + for (uint32_t idx : blocks_.Indexes()) { + os << " in loop: " << idx << std::endl; + } } void HGraph::InsertConstant(HConstant* constant) { @@ -555,61 +588,65 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } } -bool HLoopInformation::Populate() { - DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated"; - for (HBasicBlock* back_edge : GetBackEdges()) { - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } - - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); +void HLoopInformation::PopulateIrreducibleRecursive(HBasicBlock* block) { + if (blocks_.IsBitSet(block->GetBlockId())) { + return; } - return true; -} -void HLoopInformation::Update() { - HGraph* graph = header_->GetGraph(); - for (uint32_t id : blocks_.Indexes()) { - HBasicBlock* block = graph->GetBlocks()[id]; - // Reset loop information of non-header blocks inside the loop, except - // members of inner nested loops because those should already have been - // updated by their own LoopInformation. - if (block->GetLoopInformation() == this && block != header_) { - block->SetLoopInformation(nullptr); + if (block->IsLoopHeader()) { + // If we hit a loop header in an irreducible loop, we first check if the + // pre header of that loop belongs to the currently analyzed loop. If it does, + // then we visit the back edges. + // Note that we cannot use GetPreHeader, as the loop may have not been populated + // yet. + HBasicBlock* pre_header = block->GetPredecessors()[0]; + PopulateIrreducibleRecursive(pre_header); + if (blocks_.IsBitSet(pre_header->GetBlockId())) { + blocks_.SetBit(block->GetBlockId()); + block->SetInLoop(this); + HLoopInformation* info = block->GetLoopInformation(); + for (HBasicBlock* back_edge : info->GetBackEdges()) { + PopulateIrreducibleRecursive(back_edge); + } } - } - blocks_.ClearAllBits(); - - if (back_edges_.empty()) { - // The loop has been dismantled, delete its suspend check and remove info - // from the header. - DCHECK(HasSuspendCheck()); - header_->RemoveInstruction(suspend_check_); - header_->SetLoopInformation(nullptr); - header_ = nullptr; - suspend_check_ = nullptr; } else { - if (kIsDebugBuild) { - for (HBasicBlock* back_edge : back_edges_) { - DCHECK(header_->Dominates(back_edge)); + // Visit all predecessors. If one predecessor is part of the loop, this + // block is also part of this loop. + for (HBasicBlock* predecessor : block->GetPredecessors()) { + PopulateIrreducibleRecursive(predecessor); + if (blocks_.IsBitSet(predecessor->GetBlockId())) { + blocks_.SetBit(block->GetBlockId()); + block->SetInLoop(this); } } - // This loop still has reachable back edges. Repopulate the list of blocks. - bool populate_successful = Populate(); - DCHECK(populate_successful); + } +} + +void HLoopInformation::Populate() { + DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated"; + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + header_->SetInLoop(this); + for (HBasicBlock* back_edge : GetBackEdges()) { + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + irreducible_ = true; + header_->GetGraph()->SetHasIrreducibleLoops(true); + PopulateIrreducibleRecursive(back_edge); + } else { + PopulateRecursive(back_edge); + } } } HBasicBlock* HLoopInformation::GetPreHeader() const { - return header_->GetDominator(); + HBasicBlock* block = header_->GetPredecessors()[0]; + DCHECK(irreducible_ || (block == header_->GetDominator())); + return block; } bool HLoopInformation::Contains(const HBasicBlock& block) const { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fdb14fcb07..23132308f0 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -98,11 +98,10 @@ enum IfCondition { kCondAE, // >= }; -enum BuildSsaResult { - kBuildSsaFailNonNaturalLoop, - kBuildSsaFailThrowCatchLoop, - kBuildSsaFailAmbiguousArrayOp, - kBuildSsaSuccess, +enum GraphAnalysisResult { + kAnalysisFailThrowCatchLoop, + kAnalysisFailAmbiguousArrayOp, + kAnalysisSuccess, }; class HInstructionList : public ValueObject { @@ -289,6 +288,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_irreducible_loops_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), dex_file_(dex_file), @@ -324,20 +324,20 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // Try building the SSA form of this graph, with dominance computation and // loop recognition. Returns a code specifying that it was successful or the // reason for failure. - BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles); + GraphAnalysisResult TryBuildingSsa(StackHandleScopeCollection* handles); void ComputeDominanceInformation(); void ClearDominanceInformation(); - - void BuildDominatorTree(); + void ClearLoopInformation(); + void FindBackEdges(ArenaBitVector* visited); + GraphAnalysisResult BuildDominatorTree(); void SimplifyCFG(); void SimplifyCatchBlocks(); // Analyze all natural loops in this graph. Returns a code specifying that it // was successful or the reason for failure. The method will fail if a loop - // is not natural, that is the header does not dominate a back edge, or if it // is a throw-catch loop, i.e. the header is a catch block. - BuildSsaResult AnalyzeNaturalLoops() const; + GraphAnalysisResult AnalyzeLoops() const; // Iterate over blocks to compute try block membership. Needs reverse post // order and loop information. @@ -482,6 +482,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasIrreducibleLoops() const { return has_irreducible_loops_; } + void SetHasIrreducibleLoops(bool value) { has_irreducible_loops_ = value; } + ArtMethod* GetArtMethod() const { return art_method_; } void SetArtMethod(ArtMethod* method) { art_method_ = method; } @@ -491,7 +494,6 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor); private: - void FindBackEdges(ArenaBitVector* visited); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -558,6 +560,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // try/catch-related passes if false. bool has_try_catch_; + // Flag whether there are any irreducible loops in the graph. + bool has_irreducible_loops_; + // Indicates whether the graph should be compiled in a way that // ensures full debuggability. If false, we can apply more // aggressive optimizations that may limit the level of debugging. @@ -613,12 +618,17 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { HLoopInformation(HBasicBlock* header, HGraph* graph) : header_(header), suspend_check_(nullptr), + irreducible_(false), back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)), // Make bit vector growable, as the number of blocks may change. blocks_(graph->GetArena(), graph->GetBlocks().size(), true) { back_edges_.reserve(kDefaultNumberOfBackEdges); } + bool IsIrreducible() const { return irreducible_; } + + void Dump(std::ostream& os); + HBasicBlock* GetHeader() const { return header_; } @@ -661,15 +671,8 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { ReplaceElement(back_edges_, existing, new_back_edge); } - // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, - // that is the header dominates the back edge. - bool Populate(); - - // Reanalyzes the loop by removing loop info from its blocks and re-running - // Populate(). If there are no back edges left, the loop info is completely - // removed as well as its SuspendCheck instruction. It must be run on nested - // inner loops first. - void Update(); + // Finds blocks that are part of this loop. + void Populate(); // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. @@ -690,9 +693,11 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); + void PopulateIrreducibleRecursive(HBasicBlock* block); HBasicBlock* header_; HSuspendCheck* suspend_check_; + bool irreducible_; ArenaVector<HBasicBlock*> back_edges_; ArenaBitVector blocks_; @@ -1019,6 +1024,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { return GetPredecessors()[0] == GetLoopInformation()->GetPreHeader(); } + bool IsFirstPredecessorBackEdge() const { + DCHECK(IsLoopHeader()); + return GetLoopInformation()->IsBackEdge(*GetPredecessors()[0]); + } + HLoopInformation* GetLoopInformation() const { return loop_information_; } @@ -1831,7 +1841,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetBlock(HBasicBlock* block) { block_ = block; } bool IsInBlock() const { return block_ != nullptr; } bool IsInLoop() const { return block_->IsInLoop(); } - bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); } + bool IsLoopHeaderPhi() const { return IsPhi() && block_->IsLoopHeader(); } + bool IsIrreducibleLoopHeaderPhi() const { + return IsLoopHeaderPhi() && GetBlock()->GetLoopInformation()->IsIrreducible(); + } virtual size_t InputCount() const = 0; HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 988e32bc1a..bb840eabdd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -782,19 +782,16 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); - BuildSsaResult result = graph->TryBuildingSsa(&handles); - if (result != kBuildSsaSuccess) { + GraphAnalysisResult result = graph->TryBuildingSsa(&handles); + if (result != kAnalysisSuccess) { switch (result) { - case kBuildSsaFailNonNaturalLoop: - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop); - break; - case kBuildSsaFailThrowCatchLoop: + case kAnalysisFailThrowCatchLoop: MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); break; - case kBuildSsaFailAmbiguousArrayOp: + case kAnalysisFailAmbiguousArrayOp: MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); break; - case kBuildSsaSuccess: + case kAnalysisSuccess: UNREACHABLE(); } pass_observer.SetGraphInBadState(); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index bca1632e31..f8035aae34 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,6 @@ enum MethodCompilationStat { kRemovedDeadInstruction, kRemovedNullCheck, kNotCompiledBranchOutsideMethodCode, - kNotCompiledNonNaturalLoop, kNotCompiledThrowCatchLoop, kNotCompiledAmbiguousArrayOp, kNotCompiledHugeMethod, @@ -106,7 +105,6 @@ class OptimizingCompilerStats { case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; case kRemovedNullCheck: name = "RemovedNullCheck"; break; case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break; - case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break; case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break; case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break; case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index af3a005304..5a910433b4 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -117,7 +117,7 @@ inline bool IsRemoved(HInstruction* instruction) { inline void TransformToSsa(HGraph* graph) { ScopedObjectAccess soa(Thread::Current()); StackHandleScopeCollection handles(soa.Self()); - EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess); + EXPECT_EQ(graph->TryBuildingSsa(&handles), kAnalysisSuccess); } } // namespace art diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index a385448104..1394dfaf5d 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -145,6 +145,11 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { }; void PcRelativeFixups::Run() { + if (graph_->HasIrreducibleLoops()) { + // Do not run this optimization, as irreducible loops do not work with an instruction + // that can be live-in at the irreducible loop header. + return; + } PCRelativeHandlerVisitor visitor(graph_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index eb0419b6e0..2bae4bc5c8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -179,9 +179,11 @@ void RegisterAllocator::AllocateRegistersInternal() { ProcessInstruction(inst_it.Current()); } - if (block->IsCatchBlock()) { - // By blocking all registers at the top of each catch block, we force - // intervals used after catch to spill. + if (block->IsCatchBlock() || + (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) { + // By blocking all registers at the top of each catch block or irreducible loop, we force + // intervals belonging to the live-in set of the catch/header block to be spilled. + // TODO(ngeoffray): Phis in this block could be allocated in register. size_t position = block->GetLifetimeStart(); BlockRegisters(position, position + 1); } @@ -1870,8 +1872,10 @@ void RegisterAllocator::Resolve() { // Resolve non-linear control flow across branches. Order does not matter. for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (block->IsCatchBlock()) { - // Instructions live at the top of catch blocks were forced to spill. + if (block->IsCatchBlock() || + (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) { + // Instructions live at the top of catch blocks or irreducible loop header + // were forced to spill. if (kIsDebugBuild) { BitVector* live = liveness_.GetLiveInSet(*block); for (uint32_t idx : live->Indexes()) { diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 306a457a9c..572faa841e 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -535,7 +535,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, (*phi)->AddInput(*input2); graph->BuildDominatorTree(); - graph->AnalyzeNaturalLoops(); + graph->AnalyzeLoops(); return graph; } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index f6bab8efcb..207e3f3b44 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -68,7 +68,7 @@ void SsaBuilder::FixNullConstantType() { // should be replaced with a null constant. // Both type propagation and redundant phi elimination ensure `int_operand` // can only be the 0 constant. - DCHECK(int_operand->IsIntConstant()); + DCHECK(int_operand->IsIntConstant()) << int_operand->DebugName(); DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } @@ -422,7 +422,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } -BuildSsaResult SsaBuilder::BuildSsa() { +GraphAnalysisResult SsaBuilder::BuildSsa() { // 1) Visit in reverse post order. We need to have all predecessors of a block // visited (with the exception of loops) in order to create the right environment // for that block. For loops, we create phis whose inputs will be set in 2). @@ -462,7 +462,7 @@ BuildSsaResult SsaBuilder::BuildSsa() { // computed the type of the array input, the ambiguity can be resolved and the // correct equivalents kept. if (!FixAmbiguousArrayOps()) { - return kBuildSsaFailAmbiguousArrayOp; + return kAnalysisFailAmbiguousArrayOp; } // 8) Mark dead phis. This will mark phis which are not used by instructions @@ -497,7 +497,7 @@ BuildSsaResult SsaBuilder::BuildSsa() { } } - return kBuildSsaSuccess; + return kAnalysisSuccess; } ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 0fcc3a1306..743dabde0f 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -49,7 +49,7 @@ static constexpr int kDefaultNumberOfLoops = 2; */ class SsaBuilder : public HGraphVisitor { public: - explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles) + SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles) : HGraphVisitor(graph), handles_(handles), agets_fixed_(false), @@ -63,7 +63,7 @@ class SsaBuilder : public HGraphVisitor { loop_headers_.reserve(kDefaultNumberOfLoops); } - BuildSsaResult BuildSsa(); + GraphAnalysisResult BuildSsa(); // Returns locals vector for `block`. If it is a catch block, the vector will be // prepopulated with catch phis for vregs which are defined in `current_locals_`. diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index b9d8731cc2..a5609fc466 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -273,6 +273,18 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { + if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) { + // To satisfy our liveness algorithm, we need to ensure loop headers of + // irreducible loops do not have any live-in instructions, except constants + // and the current method, which can be trivially re-materialized. + for (uint32_t idx : live_in->Indexes()) { + HInstruction* instruction = GetInstructionFromSsaIndex(idx); + DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName(); + DCHECK(!instruction->IsParameterValue()) << instruction->DebugName(); + DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant()) + << instruction->DebugName(); + } + } size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index 2eef307295..6816b6a028 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -154,6 +154,7 @@ void SsaRedundantPhiElimination::Run() { cycle_worklist.push_back(phi); visited_phis_in_cycle.insert(phi->GetId()); bool catch_phi_in_cycle = phi->IsCatchPhi(); + bool irreducible_loop_phi_in_cycle = phi->IsIrreducibleLoopHeaderPhi(); // First do a simple loop over inputs and check if they are all the same. for (size_t j = 0; j < phi->InputCount(); ++j) { @@ -187,6 +188,7 @@ void SsaRedundantPhiElimination::Run() { cycle_worklist.push_back(input->AsPhi()); visited_phis_in_cycle.insert(input->GetId()); catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi(); + irreducible_loop_phi_in_cycle |= input->IsIrreducibleLoopHeaderPhi(); } else { // Already visited, nothing to do. } @@ -206,6 +208,15 @@ void SsaRedundantPhiElimination::Run() { continue; } + if (irreducible_loop_phi_in_cycle && !candidate->IsConstant()) { + // For irreducible loops, we need to keep the phis to satisfy our linear scan + // algorithm. + // There is one exception for constants, as the type propagation requires redundant + // cyclic phis of a constant to be removed. This is ok for the linear scan as it + // has to deal with constants anyway, and they can trivially be rematerialized. + continue; + } + for (HPhi* current : cycle_worklist) { // The candidate may not dominate a phi in a catch block: there may be non-throwing // instructions at the beginning of a try range, that may be the first input of |