ART: Enable scalar loop peeling and unrolling.

Turn on scalar loop peeling and unrolling by default.

Test: 482-checker-loop-back-edge-use, 530-checker-peel-unroll
Test: test-art-host, test-art-target, boot-to-gui
Change-Id: Ibfe1b54f790a97b281e85396da2985e0f22c2834
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a0760ef..a212445 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -35,6 +35,9 @@
 
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
+      if (it.Current()->GetType() == DataType::Type::kInt64) {
+        analysis_results->has_long_type_instructions_ = true;
+      }
       if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
         analysis_results->has_instructions_preventing_scalar_peeling_ = true;
         analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
@@ -61,34 +64,29 @@
   return false;
 }
 
-class Arm64LoopHelper : public ArchDefaultLoopHelper {
+// Default implementation of loop helper; used for all targets unless a custom implementation
+// is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics.
+class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
  public:
   // Scalar loop unrolling parameters and heuristics.
   //
   // Maximum possible unrolling factor.
-  static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+  static constexpr uint32_t kScalarMaxUnrollFactor = 2;
   // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
-  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+  static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
   // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
-  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+  static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
 
-  // SIMD loop unrolling parameters and heuristics.
-  //
-  // Maximum possible unrolling factor.
-  static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
-  // Loop's maximum instruction count. Loops with higher count will not be unrolled.
-  static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
-
-  bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
-    size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
-    size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
-    return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
-            bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+  bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+    return loop_analysis_info->HasLongTypeInstructions() ||
+           IsLoopTooBig(loop_analysis_info,
+                        kScalarHeuristicMaxBodySizeInstr,
+                        kScalarHeuristicMaxBodySizeBlocks);
   }
 
   uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
                                     uint64_t trip_count) const OVERRIDE {
-    uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+    uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
     if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
       return kNoUnrollingFactor;
     }
@@ -98,6 +96,38 @@
 
   bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
 
+ protected:
+  bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
+                    size_t instr_threshold,
+                    size_t bb_threshold) const {
+    size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+    size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+    return (instr_num >= instr_threshold || bb_num >= bb_threshold);
+  }
+};
+
+// Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop
+// peeling and unrolling and supports SIMD loop unrolling.
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+  // SIMD loop unrolling parameters and heuristics.
+  //
+  // Maximum possible unrolling factor.
+  static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+  // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+  static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+  // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+  // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+  static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+  bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+    return IsLoopTooBig(loop_analysis_info,
+                        kArm64ScalarHeuristicMaxBodySizeInstr,
+                        kArm64ScalarHeuristicMaxBodySizeBlocks);
+  }
+
   uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
                                   int64_t trip_count,
                                   uint32_t max_peel,
@@ -126,8 +156,8 @@
   }
 };
 
-ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
-                                                     ArenaAllocator* allocator) {
+ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa,
+                                                   ArenaAllocator* allocator) {
   switch (isa) {
     case InstructionSet::kArm64: {
       return new (allocator) Arm64LoopHelper;