diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/loop_analysis.cc | 56 | ||||
-rw-r--r-- | compiler/optimizing/loop_analysis.h | 53 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 118 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 15 |
4 files changed, 135 insertions, 107 deletions
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index a2124455e2..efb23e7d3e 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -17,19 +17,34 @@ #include "loop_analysis.h" #include "base/bit_vector-inl.h" +#include "induction_var_range.h" namespace art { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results) { + LoopAnalysisInfo* analysis_results, + int64_t trip_count) { + analysis_results->trip_count_ = trip_count; + for (HBlocksInLoopIterator block_it(*loop_info); !block_it.Done(); block_it.Advance()) { HBasicBlock* block = block_it.Current(); + // Check whether one of the successor is loop exit. for (HBasicBlock* successor : block->GetSuccessors()) { if (!loop_info->Contains(*successor)) { analysis_results->exits_num_++; + + // We track number of invariant loop exits which correspond to HIf instruction and + // can be eliminated by loop peeling; other control flow instruction are ignored and will + // not cause loop peeling to happen as they either cannot be inside a loop, or by + // definition cannot be loop exits (unconditional instructions), or are not beneficial for + // the optimization. + HIf* hif = block->GetLastInstruction()->AsIf(); + if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { + analysis_results->invariant_exits_num_++; + } } } @@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, } } -bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) { - HGraph* graph = loop_info->GetHeader()->GetGraph(); - for (uint32_t block_id : loop_info->GetBlocks().Indexes()) { - HBasicBlock* block = graph->GetBlocks()[block_id]; - DCHECK(block != nullptr); - if (block->EndsWithIf()) { - HIf* hif = block->GetLastInstruction()->AsIf(); - HInstruction* input = hif->InputAt(0); - if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) { - return true; - } - } +int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range) { + int64_t trip_count; + if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) { + trip_count = LoopAnalysisInfo::kUnknownTripCount; } - return false; + return trip_count; } // Default implementation of loop helper; used for all targets unless a custom implementation @@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper { // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6; - bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { - return loop_analysis_info->HasLongTypeInstructions() || - IsLoopTooBig(loop_analysis_info, + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE { + return analysis_info->HasLongTypeInstructions() || + IsLoopTooBig(analysis_info, kScalarHeuristicMaxBodySizeInstr, kScalarHeuristicMaxBodySizeBlocks); } - uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count) const OVERRIDE { + uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE { + int64_t trip_count = analysis_info->GetTripCount(); + // Unroll only loops with known trip count. + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor; if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } return desired_unrolling_factor; @@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { // TODO: Unroll loops with unknown trip count. DCHECK_NE(vector_length, 0u); if (trip_count < (2 * vector_length + max_peel)) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Don't unroll for large loop body size. uint32_t instruction_count = block->GetInstructions().CountSize(); if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Find a beneficial unroll factor with the following restrictions: // - At least one iteration of the transformed loop should be executed. diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index 7f321b73c8..bcb7b70494 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -21,26 +21,33 @@ namespace art { +class InductionVarRange; class LoopAnalysis; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; - // Class to hold cached information on properties of the loop. class LoopAnalysisInfo : public ValueObject { public: + // No loop unrolling factor (just one copy of the loop-body). + static constexpr uint32_t kNoUnrollingFactor = 1; + // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount). + static constexpr int64_t kUnknownTripCount = -1; + explicit LoopAnalysisInfo(HLoopInformation* loop_info) - : bb_num_(0), + : trip_count_(kUnknownTripCount), + bb_num_(0), instr_num_(0), exits_num_(0), + invariant_exits_num_(0), has_instructions_preventing_scalar_peeling_(false), has_instructions_preventing_scalar_unrolling_(false), has_long_type_instructions_(false), loop_info_(loop_info) {} + int64_t GetTripCount() const { return trip_count_; } size_t GetNumberOfBasicBlocks() const { return bb_num_; } size_t GetNumberOfInstructions() const { return instr_num_; } size_t GetNumberOfExits() const { return exits_num_; } + size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; } bool HasInstructionsPreventingScalarPeeling() const { return has_instructions_preventing_scalar_peeling_; @@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject { return has_instructions_preventing_scalar_unrolling_; } + bool HasInstructionsPreventingScalarOpts() const { + return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling(); + } + bool HasLongTypeInstructions() const { return has_long_type_instructions_; } - const HLoopInformation* GetLoopInfo() const { return loop_info_; } + HLoopInformation* GetLoopInfo() const { return loop_info_; } private: + // Trip count of the loop if known, kUnknownTripCount otherwise. + int64_t trip_count_; // Number of basic blocks in the loop body. size_t bb_num_; // Number of instructions in the loop body. size_t instr_num_; // Number of loop's exits. size_t exits_num_; + // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant. + size_t invariant_exits_num_; // Whether the loop has instructions which make scalar loop peeling non-beneficial. bool has_instructions_preventing_scalar_peeling_; // Whether the loop has instructions which make scalar loop unrolling non-beneficial. @@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject { bool has_long_type_instructions_; // Corresponding HLoopInformation. - const HLoopInformation* loop_info_; + HLoopInformation* loop_info_; friend class LoopAnalysis; }; @@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject { // Calculates loops basic properties like body size, exits number, etc. and fills // 'analysis_results' with this information. static void CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results); + LoopAnalysisInfo* analysis_results, + int64_t trip_count); - // Returns whether the loop has at least one loop invariant exit. - static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info); - - // Returns whether HIf's true or false successor is outside the specified loop. - // - // Prerequisite: HIf must be in the specified loop. - static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) { - DCHECK(loop_info->Contains(*hif->GetBlock())); - HBasicBlock* true_succ = hif->IfTrueSuccessor(); - HBasicBlock* false_succ = hif->IfFalseSuccessor(); - return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ)); - } + // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise. + static int64_t GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range); private: // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. @@ -143,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // Returns optimal scalar unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. - virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + virtual uint32_t GetScalarUnrollingFactor( + const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; } // Returns whether scalar loop peeling is enabled, @@ -160,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { int64_t trip_count ATTRIBUTE_UNUSED, uint32_t max_peel ATTRIBUTE_UNUSED, uint32_t vector_length ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 72aa25302e..440cd3351e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -744,64 +744,74 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || - TryPeelingForLoopInvariantExitsElimination(node) || - TryUnrollingForBranchPenaltyReduction(node); + return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); } // -// Loop unrolling: generic part methods. +// Scalar loop peeling and unrolling: generic part methods. // -bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) { - // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) - // as InstructionSet is needed. - if (compiler_options_ == nullptr) { +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (analysis_info->GetNumberOfExits() > 1) { return false; } - HLoopInformation* loop_info = node->loop_info; - int64_t trip_count = 0; - // Only unroll loops with a known tripcount. - if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info); + if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) { return false; } - uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count); - if (unrolling_factor == kNoUnrollingFactor) { - return false; - } + if (generate_code) { + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + // Perform unrolling. + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelUnrollSimpleHelper helper(loop_info); + helper.DoUnrolling(); - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - (loop_analysis_info.GetNumberOfExits() > 1) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { - return false; + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); } + return true; +} - // TODO: support other unrolling factors. - DCHECK_EQ(unrolling_factor, 2u); +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } - // Perform unrolling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoUnrolling(); + if (analysis_info->GetNumberOfInvariantExits() == 0) { + return false; + } - // Remove the redundant loop check after unrolling. - HIf* copy_hif = - helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); - int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; - copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + if (generate_code) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoPeeling(); + + // Statically evaluate loop check after peeling for loop invariant condition. + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + } return true; } -bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) { +bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) // as InstructionSet is needed. if (compiler_options_ == nullptr) { @@ -809,35 +819,27 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod } HLoopInformation* loop_info = node->loop_info; - // Check 'IsLoopClonable' the last as it might be time-consuming. - if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); + LoopAnalysisInfo analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count); + + if (analysis_info.HasInstructionsPreventingScalarOpts() || + arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) { return false; } - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); - - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { + if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { return false; } - // Perform peeling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoPeeling(); - - const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); - for (auto entry : *hir_map) { - HInstruction* copy = entry.second; - if (copy->IsIf()) { - TryToEvaluateIfCondition(copy->AsIf(), graph_); - } + // Run 'IsLoopClonable' the last as it might be time-consuming. + if (!PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; } - return true; + return TryPeelingForLoopInvariantExitsElimination(&analysis_info) || + TryUnrollingForBranchPenaltyReduction(&analysis_info); } // @@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Link reductions to their final uses. diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 9743b25259..bc4792458b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization { bool OptimizeInnerLoop(LoopNode* node); // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling - // opportunities. Returns whether transformation happened. - bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); + // opportunities. Returns whether transformation happened. 'generate_code' determines whether the + // optimization should be actually applied. + bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code = true); // Tries to apply loop peeling for loop invariant exits elimination. Returns whether - // transformation happened. - bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node); + // transformation happened. 'generate_code' determines whether the optimization should be + // actually applied. + bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop peeling and unrolling. + bool TryPeelingAndUnrolling(LoopNode* node); // // Vectorization analysis and synthesis. |