ART: Implement loop full unrolling.
Performs whole loop unrolling for small loops with small
trip count to eliminate the loop check overhead, to have
more opportunities for inter-iteration optimizations.
caffeinemark/FloatAtom: 1.2x performance on arm64 Cortex-A57.
Test: 530-checker-peel-unroll.
Test: test-art-host, test-art-target.
Change-Id: Idf3fe3cb611376935d176c60db8c49907222e28a
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index efb23e7..d355ced 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -84,6 +84,8 @@
static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
// Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
+ // Maximum number of instructions to be created as a result of full unrolling.
+ static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35;
bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE {
return analysis_info->HasLongTypeInstructions() ||
@@ -108,6 +110,14 @@
bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
+ bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const OVERRIDE {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // We assume that trip count is known.
+ DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount);
+ size_t instr_num = analysis_info->GetNumberOfInstructions();
+ return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold);
+ }
+
protected:
bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
size_t instr_threshold,
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index bcb7b70..57509ee 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -160,6 +160,13 @@
// Returns 'false' by default, should be overridden by particular target loop helper.
virtual bool IsLoopPeelingEnabled() const { return false; }
+ // Returns whether it is beneficial to fully unroll the loop.
+ //
+ // Returns 'false' by default, should be overridden by particular target loop helper.
+ virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return false;
+ }
+
// Returns optimal SIMD unrolling factor for the loop.
//
// Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 440cd33..7d66155 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -422,6 +422,15 @@
}
}
+// Peel the first 'count' iterations of the loop.
+static void PeelByCount(HLoopInformation* loop_info, int count) {
+ for (int i = 0; i < count; i++) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+ }
+}
+
//
// Public methods.
//
@@ -811,6 +820,45 @@
return true;
}
+bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code) {
+ // Fully unroll loops with a known and small trip count.
+ int64_t trip_count = analysis_info->GetTripCount();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled() ||
+ trip_count == LoopAnalysisInfo::kUnknownTripCount ||
+ !arch_loop_helper_->IsFullUnrollingBeneficial(analysis_info)) {
+ return false;
+ }
+
+ if (generate_code) {
+ // Peeling of the N first iterations (where N equals to the trip count) will effectively
+ // eliminate the loop: after peeling we will have N sequential iterations copied into the loop
+ // preheader and the original loop. The trip count of this loop will be 0 as the sequential
+ // iterations are executed first and there are exactly N of them. Thus we can statically
+ // evaluate the loop exit condition to 'false' and fully eliminate it.
+ //
+ // Here is an example of full unrolling of a loop with a trip count 2:
+ //
+ // loop_cond_1
+ // loop_body_1 <- First iteration.
+ // |
+ // \ v
+ // ==\ loop_cond_2
+ // ==/ loop_body_2 <- Second iteration.
+ // / |
+ // <- v <-
+ // loop_cond \ loop_cond \ <- This cond is always false.
+ // loop_body _/ loop_body _/
+ //
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelByCount(loop_info, trip_count);
+ HIf* loop_hif = loop_info->GetHeader()->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*loop_hif->IfTrueSuccessor()) ? 0 : 1;
+ loop_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ }
+
+ return true;
+}
+
bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
// Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
@@ -828,7 +876,8 @@
return false;
}
- if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) &&
+ !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
!TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
return false;
}
@@ -838,7 +887,8 @@
return false;
}
- return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ return TryFullUnrolling(&analysis_info) ||
+ TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
TryUnrollingForBranchPenaltyReduction(&analysis_info);
}
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index bc47924..644b740 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -155,6 +155,12 @@
bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
bool generate_code = true);
+ // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate
+ // the loop check overhead and to have more opportunities for inter-iteration optimizations.
+ // Returns whether transformation happened. 'generate_code' determines whether the optimization
+ // should be actually applied.
+ bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true);
+
// Tries to apply scalar loop peeling and unrolling.
bool TryPeelingAndUnrolling(LoopNode* node);