diff options
-rw-r--r-- | compiler/optimizing/loop_analysis.cc | 25 | ||||
-rw-r--r-- | compiler/optimizing/loop_analysis.h | 43 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 115 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 13 | ||||
-rw-r--r-- | compiler/optimizing/superblock_cloner.cc | 21 | ||||
-rw-r--r-- | compiler/optimizing/superblock_cloner.h | 3 |
6 files changed, 159 insertions, 61 deletions
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index cd3bdaf016..a0760eff69 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -16,6 +16,8 @@ #include "loop_analysis.h" +#include "base/bit_vector-inl.h" + namespace art { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, @@ -33,7 +35,8 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); - if (MakesScalarUnrollingNonBeneficial(instruction)) { + if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) { + analysis_results->has_instructions_preventing_scalar_peeling_ = true; analysis_results->has_instructions_preventing_scalar_unrolling_ = true; } analysis_results->instr_num_++; @@ -42,6 +45,22 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, } } +bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) { + HGraph* graph = loop_info->GetHeader()->GetGraph(); + for (uint32_t block_id : loop_info->GetBlocks().Indexes()) { + HBasicBlock* block = graph->GetBlocks()[block_id]; + DCHECK(block != nullptr); + if (block->EndsWithIf()) { + HIf* hif = block->GetLastInstruction()->AsIf(); + HInstruction* input = hif->InputAt(0); + if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) { + return true; + } + } + } + return false; +} + class Arm64LoopHelper : public ArchDefaultLoopHelper { public: // Scalar loop unrolling parameters and heuristics. @@ -60,7 +79,7 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { // Loop's maximum instruction count. Loops with higher count will not be unrolled. static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50; - bool IsLoopTooBigForScalarUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { + bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { size_t instr_num = loop_analysis_info->GetNumberOfInstructions(); size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks(); return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr || @@ -77,6 +96,8 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { return desired_unrolling_factor; } + bool IsLoopPeelingEnabled() const OVERRIDE { return true; } + uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, int64_t trip_count, uint32_t max_peel, diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index bad406f10b..ece9858136 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -33,6 +33,7 @@ class LoopAnalysisInfo : public ValueObject { : bb_num_(0), instr_num_(0), exits_num_(0), + has_instructions_preventing_scalar_peeling_(false), has_instructions_preventing_scalar_unrolling_(false), loop_info_(loop_info) {} @@ -40,6 +41,10 @@ class LoopAnalysisInfo : public ValueObject { size_t GetNumberOfInstructions() const { return instr_num_; } size_t GetNumberOfExits() const { return exits_num_; } + bool HasInstructionsPreventingScalarPeeling() const { + return has_instructions_preventing_scalar_peeling_; + } + bool HasInstructionsPreventingScalarUnrolling() const { return has_instructions_preventing_scalar_unrolling_; } @@ -53,6 +58,8 @@ class LoopAnalysisInfo : public ValueObject { size_t instr_num_; // Number of loop's exits. size_t exits_num_; + // Whether the loop has instructions which make scalar loop peeling non-beneficial. + bool has_instructions_preventing_scalar_peeling_; // Whether the loop has instructions which make scalar loop unrolling non-beneficial. bool has_instructions_preventing_scalar_unrolling_; @@ -71,22 +78,35 @@ class LoopAnalysis : public ValueObject { static void CalculateLoopBasicProperties(HLoopInformation* loop_info, LoopAnalysisInfo* analysis_results); + // Returns whether the loop has at least one loop invariant exit. + static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info); + + // Returns whether HIf's true or false successor is outside the specified loop. + // + // Prerequisite: HIf must be in the specified loop. + static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) { + DCHECK(loop_info->Contains(*hif->GetBlock())); + HBasicBlock* true_succ = hif->IfTrueSuccessor(); + HBasicBlock* false_succ = hif->IfFalseSuccessor(); + return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ)); + } + private: - // Returns whether an instruction makes scalar loop unrolling non-beneficial. + // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. // // If in the loop body we have a dex/runtime call then its contribution to the whole - // loop performance will probably prevail. So unrolling optimization will not bring - // any noticeable performance improvement however will increase the code size. - static bool MakesScalarUnrollingNonBeneficial(HInstruction* instruction) { + // loop performance will probably prevail. So peeling/unrolling optimization will not bring + // any noticeable performance improvement. It will increase the code size. + static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) { return (instruction->IsNewArray() || instruction->IsNewInstance() || instruction->IsUnresolvedInstanceFieldGet() || instruction->IsUnresolvedInstanceFieldSet() || instruction->IsUnresolvedStaticFieldGet() || instruction->IsUnresolvedStaticFieldSet() || - // TODO: Unroll loops with intrinsified invokes. + // TODO: Support loops with intrinsified invokes. instruction->IsInvoke() || - // TODO: Unroll loops with ClinitChecks. + // TODO: Support loops with ClinitChecks. instruction->IsClinitCheck()); } }; @@ -105,14 +125,14 @@ class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> { // doesn't support loop peeling and unrolling. static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator); - // Returns whether the loop is too big for loop unrolling by checking its total number of + // Returns whether the loop is too big for loop peeling/unrolling by checking its total number of // basic blocks and instructions. // - // If the loop body has too many instructions then unrolling optimization will not bring + // If the loop body has too many instructions then peeling/unrolling optimization will not bring // any noticeable performance improvement however will increase the code size. // // Returns 'true' by default, should be overridden by particular target loop helper. - virtual bool IsLoopTooBigForScalarUnrolling( + virtual bool IsLoopTooBigForScalarPeelingUnrolling( LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } // Returns optimal scalar unrolling factor for the loop. @@ -123,6 +143,11 @@ class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> { return kNoUnrollingFactor; } + // Returns whether scalar loop peeling is enabled, + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsLoopPeelingEnabled() const { return false; } + // Returns optimal SIMD unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index b41b659083..c0c721de63 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -34,7 +34,7 @@ namespace art { static constexpr bool kEnableVectorization = true; // Enables scalar loop unrolling in the loop optimizer. -static constexpr bool kEnableScalarUnrolling = false; +static constexpr bool kEnableScalarPeelingUnrolling = false; // // Static helpers. @@ -533,6 +533,43 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) { return true; } +// Tries to statically evaluate condition of the specified "HIf" for other condition checks. +static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { + HInstruction* cond = instruction->InputAt(0); + + // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the + // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in + // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond' + // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the + // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC). + // if (cond) { if(cond) { + // if (cond) {} if (1) {} + // } else { =======> } else { + // if (cond) {} if (0) {} + // } } + if (!cond->IsConstant()) { + HBasicBlock* true_succ = instruction->IfTrueSuccessor(); + HBasicBlock* false_succ = instruction->IfFalseSuccessor(); + + DCHECK_EQ(true_succ->GetPredecessors().size(), 1u); + DCHECK_EQ(false_succ->GetPredecessors().size(), 1u); + + const HUseList<HInstruction*>& uses = cond->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + HBasicBlock* user_block = user->GetBlock(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (true_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(1), index); + } else if (false_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(0), index); + } + } + } +} + // // Public methods. // @@ -851,40 +888,24 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { return TryOptimizeInnerLoopFinite(node) || + TryPeelingForLoopInvariantExitsElimination(node) || TryUnrollingForBranchPenaltyReduction(node); } -void HLoopOptimization::PeelOrUnrollOnce(LoopNode* loop_node, - bool do_unrolling, - SuperblockCloner::HBasicBlockMap* bb_map, - SuperblockCloner::HInstructionMap* hir_map) { - // TODO: peel loop nests. - DCHECK(loop_node->inner == nullptr); - - // Check that loop info is up-to-date. - HLoopInformation* loop_info = loop_node->loop_info; - HBasicBlock* header = loop_info->GetHeader(); - DCHECK(loop_info == header->GetLoopInformation()); - PeelUnrollHelper helper(loop_info, bb_map, hir_map); - DCHECK(helper.IsLoopClonable()); - HBasicBlock* new_header = do_unrolling ? helper.DoUnrolling() : helper.DoPeeling(); - DCHECK(header == new_header); - DCHECK(loop_info == new_header->GetLoopInformation()); -} // // Loop unrolling: generic part methods. // -bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node) { +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) { // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) // as InstructionSet is needed. - if (!kEnableScalarUnrolling || compiler_driver_ == nullptr) { + if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) { return false; } - HLoopInformation* loop_info = loop_node->loop_info; + HLoopInformation* loop_info = node->loop_info; int64_t trip_count = 0; // Only unroll loops with a known tripcount. if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { @@ -900,7 +921,7 @@ bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_nod LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); // Check "IsLoopClonable" last as it can be time-consuming. - if (arch_loop_helper_->IsLoopTooBigForScalarUnrolling(&loop_analysis_info) || + if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) || (loop_analysis_info.GetNumberOfExits() > 1) || loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || !PeelUnrollHelper::IsLoopClonable(loop_info)) { @@ -911,21 +932,57 @@ bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_nod DCHECK_EQ(unrolling_factor, 2u); // Perform unrolling. - ArenaAllocator* arena = loop_info->GetHeader()->GetGraph()->GetAllocator(); - SuperblockCloner::HBasicBlockMap bb_map( - std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); - SuperblockCloner::HInstructionMap hir_map( - std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); - PeelOrUnrollOnce(loop_node, /* unrolling */ true, &bb_map, &hir_map); + PeelUnrollSimpleHelper helper(loop_info); + helper.DoUnrolling(); // Remove the redundant loop check after unrolling. - HIf* copy_hif = bb_map.Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); return true; } +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) { + // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests) + // as InstructionSet is needed. + if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) { + return false; + } + + HLoopInformation* loop_info = node->loop_info; + // Check 'IsLoopClonable' the last as it might be time-consuming. + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } + + LoopAnalysisInfo loop_analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + + // Check "IsLoopClonable" last as it can be time-consuming. + if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) || + loop_analysis_info.HasInstructionsPreventingScalarPeeling() || + !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) || + !PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; + } + + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoPeeling(); + + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + + return true; +} + // // Loop vectorization. The implementation is based on the book by Aart J.C. Bik: // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 0120cffa56..f9a31a34d4 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -145,19 +145,14 @@ class HLoopOptimization : public HOptimization { // Performs optimizations specific to inner loop. Returns true if anything changed. bool OptimizeInnerLoop(LoopNode* node); - // Performs loop peeling/unrolling once (depends on the 'do_unrolling'); the transformation - // preserves the header and the loop info. - // - // Note: the function records copying information about blocks and instructions. - void PeelOrUnrollOnce(LoopNode* loop_node, - bool do_unrolling, - SuperblockCloner::HBasicBlockMap* bb_map, - SuperblockCloner::HInstructionMap* hir_map); - // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling // opportunities. Returns whether transformation happened. bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); + // Tries to apply loop peeling for loop invariant exits elimination. Returns whether + // transformation happened. + bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node); + // // Vectorization analysis and synthesis. // diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index ee74f1001f..fad7729956 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -28,11 +28,6 @@ using HInstructionMap = SuperblockCloner::HInstructionMap; using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; using HEdgeSet = SuperblockCloner::HEdgeSet; -// When doing peeling we can choose whether to keep original loop (made of original basic blocks) -// and form a peeled iteration of the copy blocks (preserve the header) or transfer original loop -// blocks to the peeled iteration and create new loop from the copy blocks. Similar for unrolling. -static const bool kPeelUnrollPreserveHeader = true; - void HEdge::Dump(std::ostream& stream) const { stream << "(" << from_ << "->" << to_ << ")"; } @@ -926,16 +921,12 @@ void CollectRemappingInfoForPeelUnroll(bool to_unroll, remap_orig_internal->Insert(e); remap_copy_internal->Insert(e); } else { - if (kPeelUnrollPreserveHeader) { - remap_copy_internal->Insert(e); - } else { - remap_orig_internal->Insert(e); - } + remap_copy_internal->Insert(e); } } // Set up remap_incoming edges set. - if (to_unroll != kPeelUnrollPreserveHeader) { + if (!to_unroll) { remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header)); } } @@ -992,6 +983,9 @@ HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { DCHECK(!loop_info_->IsIrreducible()); HBasicBlock* loop_header = loop_info_->GetHeader(); + // Check that loop info is up-to-date. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + HGraph* graph = loop_header->GetGraph(); ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); @@ -1009,7 +1003,10 @@ HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { cloner_.Run(); cloner_.CleanUp(); - return kPeelUnrollPreserveHeader ? loop_header : cloner_.GetBlockCopy(loop_header); + // Check that loop info is preserved. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + + return loop_header; } PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info) diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index afd5a5d6e7..e0931674cb 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -372,6 +372,9 @@ class PeelUnrollSimpleHelper : public ValueObject { HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } + const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } + const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } + private: SuperblockCloner::HBasicBlockMap bb_map_; SuperblockCloner::HInstructionMap hir_map_; |