summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Artem Serov <artem.serov@linaro.org> 2018-06-12 10:23:27 +0100
committer Artem Serov <artem.serov@linaro.org> 2018-07-04 13:12:13 +0100
commit0e32908d0ee4be5905cdd409dd3c45331fc98465 (patch)
tree592ffd19b21379b0815345302b04911932fa90f9
parentb5271dd44a30f498689e503340d3c8d01bf31f07 (diff)
ART: Refactor scalar loop optimizations.
Refactor scalar loop peeling and unrolling to eliminate repeated checks and graph traversals, to make the code more readable and to make it easier to add new scalar loop opts. This is a prerequisite for full unrolling patch. Test: 530-checker-peel-unroll. Test: test-art-target, test-art-host. Change-Id: If824a95f304033555085eefac7524e59ed540322
-rw-r--r--compiler/optimizing/loop_analysis.cc56
-rw-r--r--compiler/optimizing/loop_analysis.h53
-rw-r--r--compiler/optimizing/loop_optimization.cc118
-rw-r--r--compiler/optimizing/loop_optimization.h15
4 files changed, 135 insertions, 107 deletions
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a2124455e2..efb23e7d3e 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -17,19 +17,34 @@
#include "loop_analysis.h"
#include "base/bit_vector-inl.h"
+#include "induction_var_range.h"
namespace art {
void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results) {
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count) {
+ analysis_results->trip_count_ = trip_count;
+
for (HBlocksInLoopIterator block_it(*loop_info);
!block_it.Done();
block_it.Advance()) {
HBasicBlock* block = block_it.Current();
+ // Check whether one of the successor is loop exit.
for (HBasicBlock* successor : block->GetSuccessors()) {
if (!loop_info->Contains(*successor)) {
analysis_results->exits_num_++;
+
+ // We track number of invariant loop exits which correspond to HIf instruction and
+ // can be eliminated by loop peeling; other control flow instruction are ignored and will
+ // not cause loop peeling to happen as they either cannot be inside a loop, or by
+ // definition cannot be loop exits (unconditional instructions), or are not beneficial for
+ // the optimization.
+ HIf* hif = block->GetLastInstruction()->AsIf();
+ if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
+ analysis_results->invariant_exits_num_++;
+ }
}
}
@@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
}
}
-bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
- HGraph* graph = loop_info->GetHeader()->GetGraph();
- for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
- HBasicBlock* block = graph->GetBlocks()[block_id];
- DCHECK(block != nullptr);
- if (block->EndsWithIf()) {
- HIf* hif = block->GetLastInstruction()->AsIf();
- HInstruction* input = hif->InputAt(0);
- if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
- return true;
- }
- }
+int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range) {
+ int64_t trip_count;
+ if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
+ trip_count = LoopAnalysisInfo::kUnknownTripCount;
}
- return false;
+ return trip_count;
}
// Default implementation of loop helper; used for all targets unless a custom implementation
@@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
// Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
- bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
- return loop_analysis_info->HasLongTypeInstructions() ||
- IsLoopTooBig(loop_analysis_info,
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ return analysis_info->HasLongTypeInstructions() ||
+ IsLoopTooBig(analysis_info,
kScalarHeuristicMaxBodySizeInstr,
kScalarHeuristicMaxBodySizeBlocks);
}
- uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count) const OVERRIDE {
+ uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // Unroll only loops with known trip count.
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
return desired_unrolling_factor;
@@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper {
// TODO: Unroll loops with unknown trip count.
DCHECK_NE(vector_length, 0u);
if (trip_count < (2 * vector_length + max_peel)) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Don't unroll for large loop body size.
uint32_t instruction_count = block->GetInstructions().CountSize();
if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Find a beneficial unroll factor with the following restrictions:
// - At least one iteration of the transformed loop should be executed.
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index 7f321b73c8..bcb7b70494 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -21,26 +21,33 @@
namespace art {
+class InductionVarRange;
class LoopAnalysis;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
-
// Class to hold cached information on properties of the loop.
class LoopAnalysisInfo : public ValueObject {
public:
+ // No loop unrolling factor (just one copy of the loop-body).
+ static constexpr uint32_t kNoUnrollingFactor = 1;
+ // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount).
+ static constexpr int64_t kUnknownTripCount = -1;
+
explicit LoopAnalysisInfo(HLoopInformation* loop_info)
- : bb_num_(0),
+ : trip_count_(kUnknownTripCount),
+ bb_num_(0),
instr_num_(0),
exits_num_(0),
+ invariant_exits_num_(0),
has_instructions_preventing_scalar_peeling_(false),
has_instructions_preventing_scalar_unrolling_(false),
has_long_type_instructions_(false),
loop_info_(loop_info) {}
+ int64_t GetTripCount() const { return trip_count_; }
size_t GetNumberOfBasicBlocks() const { return bb_num_; }
size_t GetNumberOfInstructions() const { return instr_num_; }
size_t GetNumberOfExits() const { return exits_num_; }
+ size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; }
bool HasInstructionsPreventingScalarPeeling() const {
return has_instructions_preventing_scalar_peeling_;
@@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject {
return has_instructions_preventing_scalar_unrolling_;
}
+ bool HasInstructionsPreventingScalarOpts() const {
+ return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling();
+ }
+
bool HasLongTypeInstructions() const {
return has_long_type_instructions_;
}
- const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+ HLoopInformation* GetLoopInfo() const { return loop_info_; }
private:
+ // Trip count of the loop if known, kUnknownTripCount otherwise.
+ int64_t trip_count_;
// Number of basic blocks in the loop body.
size_t bb_num_;
// Number of instructions in the loop body.
size_t instr_num_;
// Number of loop's exits.
size_t exits_num_;
+ // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant.
+ size_t invariant_exits_num_;
// Whether the loop has instructions which make scalar loop peeling non-beneficial.
bool has_instructions_preventing_scalar_peeling_;
// Whether the loop has instructions which make scalar loop unrolling non-beneficial.
@@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject {
bool has_long_type_instructions_;
// Corresponding HLoopInformation.
- const HLoopInformation* loop_info_;
+ HLoopInformation* loop_info_;
friend class LoopAnalysis;
};
@@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject {
// Calculates loops basic properties like body size, exits number, etc. and fills
// 'analysis_results' with this information.
static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
- LoopAnalysisInfo* analysis_results);
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count);
- // Returns whether the loop has at least one loop invariant exit.
- static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
-
- // Returns whether HIf's true or false successor is outside the specified loop.
- //
- // Prerequisite: HIf must be in the specified loop.
- static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
- DCHECK(loop_info->Contains(*hif->GetBlock()));
- HBasicBlock* true_succ = hif->IfTrueSuccessor();
- HBasicBlock* false_succ = hif->IfFalseSuccessor();
- return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
- }
+ // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise.
+ static int64_t GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range);
private:
// Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
@@ -143,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
// Returns optimal scalar unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
- virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
- uint64_t trip_count ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ virtual uint32_t GetScalarUnrollingFactor(
+ const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
// Returns whether scalar loop peeling is enabled,
@@ -160,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
int64_t trip_count ATTRIBUTE_UNUSED,
uint32_t max_peel ATTRIBUTE_UNUSED,
uint32_t vector_length ATTRIBUTE_UNUSED) const {
- return kNoUnrollingFactor;
+ return LoopAnalysisInfo::kNoUnrollingFactor;
}
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 72aa25302e..440cd3351e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -744,64 +744,74 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
}
bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
- return TryOptimizeInnerLoopFinite(node) ||
- TryPeelingForLoopInvariantExitsElimination(node) ||
- TryUnrollingForBranchPenaltyReduction(node);
+ return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
}
//
-// Loop unrolling: generic part methods.
+// Scalar loop peeling and unrolling: generic part methods.
//
-bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
- // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
- // as InstructionSet is needed.
- if (compiler_options_ == nullptr) {
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (analysis_info->GetNumberOfExits() > 1) {
return false;
}
- HLoopInformation* loop_info = node->loop_info;
- int64_t trip_count = 0;
- // Only unroll loops with a known tripcount.
- if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info);
+ if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) {
return false;
}
- uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
- if (unrolling_factor == kNoUnrollingFactor) {
- return false;
- }
+ if (generate_code) {
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+ // Perform unrolling.
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoUnrolling();
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- (loop_analysis_info.GetNumberOfExits() > 1) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
- return false;
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif =
+ helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
}
+ return true;
+}
- // TODO: support other unrolling factors.
- DCHECK_EQ(unrolling_factor, 2u);
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ return false;
+ }
- // Perform unrolling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoUnrolling();
+ if (analysis_info->GetNumberOfInvariantExits() == 0) {
+ return false;
+ }
- // Remove the redundant loop check after unrolling.
- HIf* copy_hif =
- helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
- int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
- copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ if (generate_code) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+
+ // Statically evaluate loop check after peeling for loop invariant condition.
+ const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+ for (auto entry : *hir_map) {
+ HInstruction* copy = entry.second;
+ if (copy->IsIf()) {
+ TryToEvaluateIfCondition(copy->AsIf(), graph_);
+ }
+ }
+ }
return true;
}
-bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
// Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
if (compiler_options_ == nullptr) {
@@ -809,35 +819,27 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod
}
HLoopInformation* loop_info = node->loop_info;
- // Check 'IsLoopClonable' the last as it might be time-consuming.
- if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
+ LoopAnalysisInfo analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count);
+
+ if (analysis_info.HasInstructionsPreventingScalarOpts() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) {
return false;
}
- LoopAnalysisInfo loop_analysis_info(loop_info);
- LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
-
- // Check "IsLoopClonable" last as it can be time-consuming.
- if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
- arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
- !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
- !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
return false;
}
- // Perform peeling.
- PeelUnrollSimpleHelper helper(loop_info);
- helper.DoPeeling();
-
- const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
- for (auto entry : *hir_map) {
- HInstruction* copy = entry.second;
- if (copy->IsIf()) {
- TryToEvaluateIfCondition(copy->AsIf(), graph_);
- }
+ // Run 'IsLoopClonable' the last as it might be time-consuming.
+ if (!PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
}
- return true;
+ return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ TryUnrollingForBranchPenaltyReduction(&analysis_info);
}
//
@@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
ptc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
@@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
stc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Link reductions to their final uses.
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9743b25259..bc4792458b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization {
bool OptimizeInnerLoop(LoopNode* node);
// Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
- // opportunities. Returns whether transformation happened.
- bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+ // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
+ // optimization should be actually applied.
+ bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
// Tries to apply loop peeling for loop invariant exits elimination. Returns whether
- // transformation happened.
- bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+ // transformation happened. 'generate_code' determines whether the optimization should be
+ // actually applied.
+ bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply scalar loop peeling and unrolling.
+ bool TryPeelingAndUnrolling(LoopNode* node);
//
// Vectorization analysis and synthesis.