summaryrefslogtreecommitdiff
path: root/compiler/optimizing/loop_optimization.cc
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-09-09 10:44:45 -0700
committer Aart Bik <ajcbik@google.com> 2017-09-11 10:02:55 -0700
commit521b50f58f2af8b5a68f821a6c4eac7d86ec01f5 (patch)
tree68e41bad2a2106e27bfd470b627f11706d9b6e92 /compiler/optimizing/loop_optimization.cc
parentd4d11822e349e7e4af0b43cb3fc69e14f1c95475 (diff)
No unrolling for large loop bodies.
Rationale: should yield 1, not 0 Test: test-art-host test-art-target Change-Id: I0ca68b2a5a4dba1c3e41248376002d9635716840
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r--compiler/optimizing/loop_optimization.cc21
1 files changed, 13 insertions, 8 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index e150b65628..baa045390b 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -34,6 +34,9 @@ static constexpr bool kEnableVectorization = true;
// All current SIMD targets want 16-byte alignment.
static constexpr size_t kAlignedBase = 16;
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
// Remove the instruction from the graph. A bit more elaborate than the usual
// instruction removal, since there may be a cycle in the use structure.
static void RemoveFromCycle(HInstruction* instruction) {
@@ -791,7 +794,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
ptc,
graph_->GetIntConstant(1),
- /*unroll*/ 1);
+ kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
@@ -818,7 +821,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
stc,
graph_->GetIntConstant(1),
- /*unroll*/ 1);
+ kNoUnrollingFactor);
}
// Link reductions to their final uses.
@@ -1767,14 +1770,17 @@ static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
switch (compiler_driver_->GetInstructionSet()) {
case kArm64: {
- DCHECK_NE(vector_length_, 0u);
+ // Don't unroll with insufficient iterations.
// TODO: Unroll loops with unknown trip count.
+ DCHECK_NE(vector_length_, 0u);
if (trip_count < 2 * vector_length_) {
- return 1;
+ return kNoUnrollingFactor;
}
-
+ // Don't unroll for large loop body size.
uint32_t instruction_count = block->GetInstructions().CountSize();
-
+ if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
+ return kNoUnrollingFactor;
+ }
// Find a beneficial unroll factor with the following restrictions:
// - At least one iteration of the transformed loop should be executed.
// - The loop body shouldn't be "too big" (heuristic).
@@ -1783,13 +1789,12 @@ uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_
uint32_t unroll_factor =
TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
DCHECK_GE(unroll_factor, 1u);
-
return unroll_factor;
}
case kX86:
case kX86_64:
default:
- return 1;
+ return kNoUnrollingFactor;
}
}