ART: Implement loop full unrolling.
Performs whole loop unrolling for small loops with small
trip count to eliminate the loop check overhead, to have
more opportunities for inter-iteration optimizations.
caffeinemark/FloatAtom: 1.2x performance on arm64 Cortex-A57.
Test: 530-checker-peel-unroll.
Test: test-art-host, test-art-target.
Change-Id: Idf3fe3cb611376935d176c60db8c49907222e28a
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 440cd33..7d66155 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -422,6 +422,15 @@
}
}
+// Peel the first 'count' iterations of the loop.
+static void PeelByCount(HLoopInformation* loop_info, int count) {
+ for (int i = 0; i < count; i++) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info);
+ helper.DoPeeling();
+ }
+}
+
//
// Public methods.
//
@@ -811,6 +820,45 @@
return true;
}
+bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code) {
+ // Fully unroll loops with a known and small trip count.
+ int64_t trip_count = analysis_info->GetTripCount();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled() ||
+ trip_count == LoopAnalysisInfo::kUnknownTripCount ||
+ !arch_loop_helper_->IsFullUnrollingBeneficial(analysis_info)) {
+ return false;
+ }
+
+ if (generate_code) {
+ // Peeling of the N first iterations (where N equals to the trip count) will effectively
+ // eliminate the loop: after peeling we will have N sequential iterations copied into the loop
+ // preheader and the original loop. The trip count of this loop will be 0 as the sequential
+ // iterations are executed first and there are exactly N of them. Thus we can statically
+ // evaluate the loop exit condition to 'false' and fully eliminate it.
+ //
+ // Here is an example of full unrolling of a loop with a trip count 2:
+ //
+ // loop_cond_1
+ // loop_body_1 <- First iteration.
+ // |
+ // \ v
+ // ==\ loop_cond_2
+ // ==/ loop_body_2 <- Second iteration.
+ // / |
+ // <- v <-
+ // loop_cond \ loop_cond \ <- This cond is always false.
+ // loop_body _/ loop_body _/
+ //
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelByCount(loop_info, trip_count);
+ HIf* loop_hif = loop_info->GetHeader()->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*loop_hif->IfTrueSuccessor()) ? 0 : 1;
+ loop_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ }
+
+ return true;
+}
+
bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
// Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
@@ -828,7 +876,8 @@
return false;
}
- if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) &&
+ !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
!TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
return false;
}
@@ -838,7 +887,8 @@
return false;
}
- return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ return TryFullUnrolling(&analysis_info) ||
+ TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
TryUnrollingForBranchPenaltyReduction(&analysis_info);
}