summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/loop_analysis.cc56
-rw-r--r--compiler/optimizing/loop_analysis.h53
-rw-r--r--compiler/optimizing/loop_optimization.cc118
-rw-r--r--compiler/optimizing/loop_optimization.h15
4 files changed, 135 insertions, 107 deletions
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a2124455e2..efb23e7d3e 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -17,19 +17,34 @@
#include "loop_analysis.h"
#include "base/bit_vector-inl.h"
+#include "induction_var_range.h"
namespace art {
void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results) {
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count) {
+ analysis_results->trip_count_ = trip_count;
+
for (HBlocksInLoopIterator block_it(*loop_info);
!block_it.Done();
block_it.Advance()) {
HBasicBlock* block = block_it.Current();
+ // Check whether one of the successor is loop exit.
for (HBasicBlock* successor : block->GetSuccessors()) {
if (!loop_info->Contains(*successor)) {
analysis_results->exits_num_++;
+
+ // We track number of invariant loop exits which correspond to HIf instruction and
+ // can be eliminated by loop peeling; other control flow instruction are ignored and will
+ // not cause loop peeling to happen as they either cannot be inside a loop, or by
+ // definition cannot be loop exits (unconditional instructions), or are not beneficial for
+ // the optimization.
+ HIf* hif = block->GetLastInstruction()->AsIf();
+ if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
+ analysis_results->invariant_exits_num_++;
+ }
}
}
@@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
}
}
-bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
- HGraph* graph = loop_info->GetHeader()->GetGraph();
- for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
- HBasicBlock* block = graph->GetBlocks()[block_id];
- DCHECK(block != nullptr);
- if (block->EndsWithIf()) {
- HIf* hif = block->GetLastInstruction()->AsIf();
- HInstruction* input = hif->InputAt(0);
- if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
- return true;
- }
- }
+int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range) {
+ int64_t trip_count;
+ if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
+ trip_count = LoopAnalysisInfo::kUnknownTripCount;
}
- return false;
+ return trip_count;
}
// Default implementation of loop helper; used for all targets unless a custom implementation
@@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
// Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
- bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
- return loop_analysis_info->HasLongTypeInstructions() ||
- IsLoopTooBig(loop_analysis_info,
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ return analysis_info->HasLongTypeInstructions() ||
+ IsLoopTooBig(analysis_info,
kScalarHeuristicMaxBodySizeInstr,
kScalarHeuristicMaxBodySizeBlocks);
}
- uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count) const OVERRIDE {
+ uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // Unroll only loops with known trip count.
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
return desired_unrolling_factor;
@@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper {
// TODO: Unroll loops with unknown trip count.
DCHECK_NE(vector_length, 0u);
if (trip_count < (2 * vector_length + max_peel)) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Don't unroll for large loop body size.
uint32_t instruction_count = block->GetInstructions().CountSize();
if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Find a beneficial unroll factor with the following restrictions:
// - At least one iteration of the transformed loop should be executed.
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index 7f321b73c8..bcb7b70494 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -21,26 +21,33 @@
namespace art {
+class InductionVarRange;
class LoopAnalysis;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
-
// Class to hold cached information on properties of the loop.
class LoopAnalysisInfo : public ValueObject {
public:
+ // No loop unrolling factor (just one copy of the loop-body).
+ static constexpr uint32_t kNoUnrollingFactor = 1;
+ // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount).
+ static constexpr int64_t kUnknownTripCount = -1;
+
explicit LoopAnalysisInfo(HLoopInformation* loop_info)
- : bb_num_(0),
+ : trip_count_(kUnknownTripCount),
+ bb_num_(0),
instr_num_(0),
exits_num_(0),
+ invariant_exits_num_(0),
has_instructions_preventing_scalar_peeling_(false),
has_instructions_preventing_scalar_unrolling_(false),
has_long_type_instructions_(false),
loop_info_(loop_info) {}
+ int64_t GetTripCount() const { return trip_count_; }
size_t GetNumberOfBasicBlocks() const { return bb_num_; }
size_t GetNumberOfInstructions() const { return instr_num_; }
size_t GetNumberOfExits() const { return exits_num_; }
+ size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; }
bool HasInstructionsPreventingScalarPeeling() const {
return has_instructions_preventing_scalar_peeling_;
@@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject {
return has_instructions_preventing_scalar_unrolling_;
}
+ bool HasInstructionsPreventingScalarOpts() const {
+ return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling();
+ }
+
bool HasLongTypeInstructions() const {
return has_long_type_instructions_;
}
- const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+ HLoopInformation* GetLoopInfo() const { return loop_info_; }
private:
+ // Trip count of the loop if known, kUnknownTripCount otherwise.
+ int64_t trip_count_;
// Number of basic blocks in the loop body.
size_t bb_num_;
// Number of instructions in the loop body.
size_t instr_num_;
// Number of loop's exits.
size_t exits_num_;
+ // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant.
+ size_t invariant_exits_num_;
// Whether the loop has instructions which make scalar loop peeling non-beneficial.
bool has_instructions_preventing_scalar_peeling_;
// Whether the loop has instructions which make scalar loop unrolling non-beneficial.
@@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject {
bool has_long_type_instructions_;
// Corresponding HLoopInformation.
- const HLoopInformation* loop_info_;
+ HLoopInformation* loop_info_;
friend class LoopAnalysis;
};
@@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject {
// Calculates loops basic properties like body size, exits number, etc. and fills
// 'analysis_results' with this information.
static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results);
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count);
- // Returns whether the loop has at least one loop invariant exit.
- static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
-
- // Returns whether HIf's true or false successor is outside the specified loop.
- //
- // Prerequisite: HIf must be in the specified loop.
- static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
- DCHECK(loop_info->Contains(*hif->GetBlock()));
- HBasicBlock* true_succ = hif->IfTrueSuccessor();
- HBasicBlock* false_succ = hif->IfFalseSuccessor();
- return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
- }
+ // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise.
+ static int64_t GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range);
private:
// Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
@@ -143,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
// Returns optimal scalar unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
- virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ virtual uint32_t GetScalarUnrollingFactor(
+ const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Returns whether scalar loop peeling is enabled,
@@ -160,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
int64_t trip_count ATTRIBUTE_UNUSED,
uint32_t max_peel ATTRIBUTE_UNUSED,
uint32_t vector_length ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 72aa25302e..440cd3351e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -744,64 +744,74 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
- return TryOptimizeInnerLoopFinite(node) ||
- TryPeelingForLoopInvariantExitsElimination(node) ||
- TryUnrollingForBranchPenaltyReduction(node);
+ return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
}
//
-// Loop unrolling: generic part methods.
+// Scalar loop peeling and unrolling: generic part methods.
//
-bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
- // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
- // as InstructionSet is needed.
- if (compiler_options_ == nullptr) {
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (analysis_info->GetNumberOfExits() > 1) {
return false;
}
- HLoopInformation* loop_info = node->loop_info;
- int64_t trip_count = 0;
- // Only unroll loops with a known tripcount.
- if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info);
+ if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) {
return false;
}
- uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
- if (unrolling_factor == kNoUnrollingFactor) {
- return false;
- }
+ if (generate_code) {
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+ // Perform unrolling.
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoUnrolling();
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- (loop_analysis_info.GetNumberOfExits() > 1) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
- return false;
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif =
+ helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
}
+ return true;
+}
- // TODO: support other unrolling factors.
- DCHECK_EQ(unrolling_factor, 2u);
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ return false;
+ }
- // Perform unrolling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoUnrolling();
+ if (analysis_info->GetNumberOfInvariantExits() == 0) {
+ return false;
+ }
- // Remove the redundant loop check after unrolling.
- HIf* copy_hif =
- helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
- int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
- copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ if (generate_code) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+
+ // Statically evaluate loop check after peeling for loop invariant condition.
+ const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+ for (auto entry : *hir_map) {
+ HInstruction* copy = entry.second;
+ if (copy->IsIf()) {
+ TryToEvaluateIfCondition(copy->AsIf(), graph_);
+ }
+ }
+ }
return true;
}
-bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
// Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
if (compiler_options_ == nullptr) {
@@ -809,35 +819,27 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod
}
HLoopInformation* loop_info = node->loop_info;
- // Check 'IsLoopClonable' the last as it might be time-consuming.
- if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
+ LoopAnalysisInfo analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count);
+
+ if (analysis_info.HasInstructionsPreventingScalarOpts() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) {
return false;
}
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
-
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
return false;
}
- // Perform peeling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoPeeling();
-
- const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
- for (auto entry : *hir_map) {
- HInstruction* copy = entry.second;
- if (copy->IsIf()) {
- TryToEvaluateIfCondition(copy->AsIf(), graph_);
- }
+ // Run 'IsLoopClonable' the last as it might be time-consuming.
+ if (!PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
}
- return true;
+ return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ TryUnrollingForBranchPenaltyReduction(&analysis_info);
}
//
@@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
ptc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
@@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
stc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Link reductions to their final uses.
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9743b25259..bc4792458b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization {
bool OptimizeInnerLoop(LoopNode* node);
// Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
- // opportunities. Returns whether transformation happened.
- bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+ // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
+ // optimization should be actually applied.
+ bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
// Tries to apply loop peeling for loop invariant exits elimination. Returns whether
- // transformation happened.
- bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+ // transformation happened. 'generate_code' determines whether the optimization should be
+ // actually applied.
+ bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply scalar loop peeling and unrolling.
+ bool TryPeelingAndUnrolling(LoopNode* node);
//
// Vectorization analysis and synthesis.