From 521b50f58f2af8b5a68f821a6c4eac7d86ec01f5 Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Sat, 9 Sep 2017 10:44:45 -0700 Subject: No unrolling for large loop bodies. Rationale: should yield 1, not 0 Test: test-art-host test-art-target Change-Id: I0ca68b2a5a4dba1c3e41248376002d9635716840 --- compiler/optimizing/loop_optimization.cc | 21 +++++++++++++-------- test/623-checker-loop-regressions/src/Main.java | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index e150b65628..baa045390b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -34,6 +34,9 @@ static constexpr bool kEnableVectorization = true; // All current SIMD targets want 16-byte alignment. static constexpr size_t kAlignedBase = 16; +// No loop unrolling factor (just one copy of the loop-body). +static constexpr uint32_t kNoUnrollingFactor = 1; + // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. static void RemoveFromCycle(HInstruction* instruction) { @@ -791,7 +794,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetIntConstant(1), - /*unroll*/ 1); + kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -818,7 +821,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetIntConstant(1), - /*unroll*/ 1); + kNoUnrollingFactor); } // Link reductions to their final uses. @@ -1767,14 +1770,17 @@ static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { switch (compiler_driver_->GetInstructionSet()) { case kArm64: { - DCHECK_NE(vector_length_, 0u); + // Don't unroll with insufficient iterations. // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length_, 0u); if (trip_count < 2 * vector_length_) { - return 1; + return kNoUnrollingFactor; } - + // Don't unroll for large loop body size. uint32_t instruction_count = block->GetInstructions().CountSize(); - + if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { + return kNoUnrollingFactor; + } // Find a beneficial unroll factor with the following restrictions: // - At least one iteration of the transformed loop should be executed. // - The loop body shouldn't be "too big" (heuristic). @@ -1783,13 +1789,12 @@ uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_ uint32_t unroll_factor = TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); DCHECK_GE(unroll_factor, 1u); - return unroll_factor; } case kX86: case kX86_64: default: - return 1; + return kNoUnrollingFactor; } } diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 9229d8110d..418be30aad 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -485,6 +485,18 @@ public class Main { return sum; } + // Large loop body should not break unrolling computation. + static void largeBody(int[] x) { + for (int i = 0; i < 100; i++) { + x[i] = x[i] * 1 + x[i] * 2 + x[i] * 3 + x[i] * 4 + x[i] * 5 + x[i] * 6 + + x[i] * 7 + x[i] * 8 + x[i] * 9 + x[i] * 10 + x[i] * 11 + x[i] * 12 + + x[i] * 13 + x[i] * 14 + x[i] * 15 + x[i] * 1 + x[i] * 2 + x[i] * 3 + x[i] * 4 + + x[i] * 5 + x[i] * 6 + x[i] * 7 + x[i] * 8 + x[i] * 9 + x[i] * 10 + x[i] * 11 + + x[i] * 12 + x[i] * 13 + x[i] * 14 + x[i] * 15 + x[i] * 1 + x[i] * 2 + x[i] * 3 + + x[i] * 4 + x[i] * 5; + } + } + public static void main(String[] args) { expectEquals(10, earlyExitFirst(-1)); for (int i = 0; i <= 10; i++) { @@ -633,6 +645,11 @@ public class Main { } expectEquals(-2080768, sum(x)); + largeBody(f); + for (int i = 0; i < 100; i++) { + expectEquals(2805, f[i]); + } + System.out.println("passed"); } -- cgit v1.2.3-59-g8ed1b