ART: Refactor scalar loop optimizations.

Refactor scalar loop peeling and unrolling to eliminate repeated
checks and graph traversals, to make the code more readable and
to make it easier to add new scalar loop opts.

This is a prerequisite for full unrolling patch.

Test: 530-checker-peel-unroll.
Test: test-art-target, test-art-host.
Change-Id: If824a95f304033555085eefac7524e59ed540322
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a212445..efb23e7 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -17,19 +17,34 @@
 #include "loop_analysis.h"
 
 #include "base/bit_vector-inl.h"
+#include "induction_var_range.h"
 
 namespace art {
 
 void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
-                                                LoopAnalysisInfo* analysis_results) {
+                                                LoopAnalysisInfo* analysis_results,
+                                                int64_t trip_count) {
+  analysis_results->trip_count_ = trip_count;
+
   for (HBlocksInLoopIterator block_it(*loop_info);
        !block_it.Done();
        block_it.Advance()) {
     HBasicBlock* block = block_it.Current();
 
+    // Check whether one of the successor is loop exit.
     for (HBasicBlock* successor : block->GetSuccessors()) {
       if (!loop_info->Contains(*successor)) {
         analysis_results->exits_num_++;
+
+        // We track number of invariant loop exits which correspond to HIf instruction and
+        // can be eliminated by loop peeling; other control flow instruction are ignored and will
+        // not cause loop peeling to happen as they either cannot be inside a loop, or by
+        // definition cannot be loop exits (unconditional instructions), or are not beneficial for
+        // the optimization.
+        HIf* hif = block->GetLastInstruction()->AsIf();
+        if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
+          analysis_results->invariant_exits_num_++;
+        }
       }
     }
 
@@ -48,20 +63,13 @@
   }
 }
 
-bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
-  HGraph* graph = loop_info->GetHeader()->GetGraph();
-  for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
-    HBasicBlock* block = graph->GetBlocks()[block_id];
-    DCHECK(block != nullptr);
-    if (block->EndsWithIf()) {
-      HIf* hif = block->GetLastInstruction()->AsIf();
-      HInstruction* input = hif->InputAt(0);
-      if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
-        return true;
-      }
-    }
+int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
+                                       const InductionVarRange* induction_range) {
+  int64_t trip_count;
+  if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
+    trip_count = LoopAnalysisInfo::kUnknownTripCount;
   }
-  return false;
+  return trip_count;
 }
 
 // Default implementation of loop helper; used for all targets unless a custom implementation
@@ -77,18 +85,22 @@
   // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
   static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
 
-  bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
-    return loop_analysis_info->HasLongTypeInstructions() ||
-           IsLoopTooBig(loop_analysis_info,
+  bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE {
+    return analysis_info->HasLongTypeInstructions() ||
+           IsLoopTooBig(analysis_info,
                         kScalarHeuristicMaxBodySizeInstr,
                         kScalarHeuristicMaxBodySizeBlocks);
   }
 
-  uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
-                                    uint64_t trip_count) const OVERRIDE {
+  uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE {
+    int64_t trip_count = analysis_info->GetTripCount();
+    // Unroll only loops with known trip count.
+    if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+      return LoopAnalysisInfo::kNoUnrollingFactor;
+    }
     uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
     if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
-      return kNoUnrollingFactor;
+      return LoopAnalysisInfo::kNoUnrollingFactor;
     }
 
     return desired_unrolling_factor;
@@ -136,12 +148,12 @@
     // TODO: Unroll loops with unknown trip count.
     DCHECK_NE(vector_length, 0u);
     if (trip_count < (2 * vector_length + max_peel)) {
-      return kNoUnrollingFactor;
+      return LoopAnalysisInfo::kNoUnrollingFactor;
     }
     // Don't unroll for large loop body size.
     uint32_t instruction_count = block->GetInstructions().CountSize();
     if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
-      return kNoUnrollingFactor;
+      return LoopAnalysisInfo::kNoUnrollingFactor;
     }
     // Find a beneficial unroll factor with the following restrictions:
     //  - At least one iteration of the transformed loop should be executed.
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index 7f321b7..bcb7b70 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -21,26 +21,33 @@
 
 namespace art {
 
+class InductionVarRange;
 class LoopAnalysis;
 
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
-
 // Class to hold cached information on properties of the loop.
 class LoopAnalysisInfo : public ValueObject {
  public:
+  // No loop unrolling factor (just one copy of the loop-body).
+  static constexpr uint32_t kNoUnrollingFactor = 1;
+  // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount).
+  static constexpr int64_t kUnknownTripCount = -1;
+
   explicit LoopAnalysisInfo(HLoopInformation* loop_info)
-      : bb_num_(0),
+      : trip_count_(kUnknownTripCount),
+        bb_num_(0),
         instr_num_(0),
         exits_num_(0),
+        invariant_exits_num_(0),
         has_instructions_preventing_scalar_peeling_(false),
         has_instructions_preventing_scalar_unrolling_(false),
         has_long_type_instructions_(false),
         loop_info_(loop_info) {}
 
+  int64_t GetTripCount() const { return trip_count_; }
   size_t GetNumberOfBasicBlocks() const { return bb_num_; }
   size_t GetNumberOfInstructions() const { return instr_num_; }
   size_t GetNumberOfExits() const { return exits_num_; }
+  size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; }
 
   bool HasInstructionsPreventingScalarPeeling() const {
     return has_instructions_preventing_scalar_peeling_;
@@ -50,19 +57,27 @@
     return has_instructions_preventing_scalar_unrolling_;
   }
 
+  bool HasInstructionsPreventingScalarOpts() const {
+    return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling();
+  }
+
   bool HasLongTypeInstructions() const {
     return has_long_type_instructions_;
   }
 
-  const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+  HLoopInformation* GetLoopInfo() const { return loop_info_; }
 
  private:
+  // Trip count of the loop if known, kUnknownTripCount otherwise.
+  int64_t trip_count_;
   // Number of basic blocks in the loop body.
   size_t bb_num_;
   // Number of instructions in the loop body.
   size_t instr_num_;
   // Number of loop's exits.
   size_t exits_num_;
+  // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant.
+  size_t invariant_exits_num_;
   // Whether the loop has instructions which make scalar loop peeling non-beneficial.
   bool has_instructions_preventing_scalar_peeling_;
   // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
@@ -72,7 +87,7 @@
   bool has_long_type_instructions_;
 
   // Corresponding HLoopInformation.
-  const HLoopInformation* loop_info_;
+  HLoopInformation* loop_info_;
 
   friend class LoopAnalysis;
 };
@@ -84,20 +99,12 @@
   // Calculates loops basic properties like body size, exits number, etc. and fills
   // 'analysis_results' with this information.
   static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
-                                           LoopAnalysisInfo* analysis_results);
+                                           LoopAnalysisInfo* analysis_results,
+                                           int64_t trip_count);
 
-  // Returns whether the loop has at least one loop invariant exit.
-  static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info);
-
-  // Returns whether HIf's true or false successor is outside the specified loop.
-  //
-  // Prerequisite: HIf must be in the specified loop.
-  static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) {
-    DCHECK(loop_info->Contains(*hif->GetBlock()));
-    HBasicBlock* true_succ = hif->IfTrueSuccessor();
-    HBasicBlock* false_succ = hif->IfFalseSuccessor();
-    return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ));
-  }
+  // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise.
+  static int64_t GetLoopTripCount(HLoopInformation* loop_info,
+                                  const InductionVarRange* induction_range);
 
  private:
   // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
@@ -143,9 +150,9 @@
   // Returns optimal scalar unrolling factor for the loop.
   //
   // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
-  virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
-                                            uint64_t trip_count ATTRIBUTE_UNUSED) const {
-    return kNoUnrollingFactor;
+  virtual uint32_t GetScalarUnrollingFactor(
+      const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+    return LoopAnalysisInfo::kNoUnrollingFactor;
   }
 
   // Returns whether scalar loop peeling is enabled,
@@ -160,7 +167,7 @@
                                           int64_t trip_count ATTRIBUTE_UNUSED,
                                           uint32_t max_peel ATTRIBUTE_UNUSED,
                                           uint32_t vector_length ATTRIBUTE_UNUSED) const {
-    return kNoUnrollingFactor;
+    return LoopAnalysisInfo::kNoUnrollingFactor;
   }
 };
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 72aa253..440cd33 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -744,102 +744,104 @@
 }
 
 bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
-  return TryOptimizeInnerLoopFinite(node) ||
-         TryPeelingForLoopInvariantExitsElimination(node) ||
-         TryUnrollingForBranchPenaltyReduction(node);
+  return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
 }
 
 
 
 //
-// Loop unrolling: generic part methods.
+// Scalar loop peeling and unrolling: generic part methods.
 //
 
-bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
-  // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
-  // as InstructionSet is needed.
-  if (compiler_options_ == nullptr) {
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+                                                              bool generate_code) {
+  if (analysis_info->GetNumberOfExits() > 1) {
     return false;
   }
 
-  HLoopInformation* loop_info = node->loop_info;
-  int64_t trip_count = 0;
-  // Only unroll loops with a known tripcount.
-  if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+  uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info);
+  if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) {
     return false;
   }
 
-  uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
-  if (unrolling_factor == kNoUnrollingFactor) {
-    return false;
+  if (generate_code) {
+    // TODO: support other unrolling factors.
+    DCHECK_EQ(unrolling_factor, 2u);
+
+    // Perform unrolling.
+    HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+    PeelUnrollSimpleHelper helper(loop_info);
+    helper.DoUnrolling();
+
+    // Remove the redundant loop check after unrolling.
+    HIf* copy_hif =
+        helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+    int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+    copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
   }
-
-  LoopAnalysisInfo loop_analysis_info(loop_info);
-  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
-
-  // Check "IsLoopClonable" last as it can be time-consuming.
-  if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
-      arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
-      (loop_analysis_info.GetNumberOfExits() > 1) ||
-      !PeelUnrollHelper::IsLoopClonable(loop_info)) {
-    return false;
-  }
-
-  // TODO: support other unrolling factors.
-  DCHECK_EQ(unrolling_factor, 2u);
-
-  // Perform unrolling.
-  PeelUnrollSimpleHelper helper(loop_info);
-  helper.DoUnrolling();
-
-  // Remove the redundant loop check after unrolling.
-  HIf* copy_hif =
-      helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
-  int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
-  copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
-
   return true;
 }
 
-bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
-  // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
-  // as InstructionSet is needed.
-  if (compiler_options_ == nullptr) {
-    return false;
-  }
-
-  HLoopInformation* loop_info = node->loop_info;
-  // Check 'IsLoopClonable' the last as it might be time-consuming.
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+                                                                   bool generate_code) {
+  HLoopInformation* loop_info = analysis_info->GetLoopInfo();
   if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
     return false;
   }
 
-  LoopAnalysisInfo loop_analysis_info(loop_info);
-  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
-
-  // Check "IsLoopClonable" last as it can be time-consuming.
-  if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
-      arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
-      !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
-      !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+  if (analysis_info->GetNumberOfInvariantExits() == 0) {
     return false;
   }
 
-  // Perform peeling.
-  PeelUnrollSimpleHelper helper(loop_info);
-  helper.DoPeeling();
+  if (generate_code) {
+    // Perform peeling.
+    PeelUnrollSimpleHelper helper(loop_info);
+    helper.DoPeeling();
 
-  const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
-  for (auto entry : *hir_map) {
-    HInstruction* copy = entry.second;
-    if (copy->IsIf()) {
-      TryToEvaluateIfCondition(copy->AsIf(), graph_);
+    // Statically evaluate loop check after peeling for loop invariant condition.
+    const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+    for (auto entry : *hir_map) {
+      HInstruction* copy = entry.second;
+      if (copy->IsIf()) {
+        TryToEvaluateIfCondition(copy->AsIf(), graph_);
+      }
     }
   }
 
   return true;
 }
 
+bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
+  // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
+  // as InstructionSet is needed.
+  if (compiler_options_ == nullptr) {
+    return false;
+  }
+
+  HLoopInformation* loop_info = node->loop_info;
+  int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
+  LoopAnalysisInfo analysis_info(loop_info);
+  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count);
+
+  if (analysis_info.HasInstructionsPreventingScalarOpts() ||
+      arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) {
+    return false;
+  }
+
+  if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+      !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
+    return false;
+  }
+
+  // Run 'IsLoopClonable' the last as it might be time-consuming.
+  if (!PeelUnrollHelper::IsLoopClonable(loop_info)) {
+    return false;
+  }
+
+  return TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+         TryUnrollingForBranchPenaltyReduction(&analysis_info);
+}
+
 //
 // Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
 // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -1076,7 +1078,7 @@
                     vector_index_,
                     ptc,
                     graph_->GetConstant(induc_type, 1),
-                    kNoUnrollingFactor);
+                    LoopAnalysisInfo::kNoUnrollingFactor);
   }
 
   // Generate vector loop, possibly further unrolled:
@@ -1103,7 +1105,7 @@
                     vector_index_,
                     stc,
                     graph_->GetConstant(induc_type, 1),
-                    kNoUnrollingFactor);
+                    LoopAnalysisInfo::kNoUnrollingFactor);
   }
 
   // Link reductions to their final uses.
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9743b25..bc47924 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -144,12 +144,19 @@
   bool OptimizeInnerLoop(LoopNode* node);
 
   // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
-  // opportunities. Returns whether transformation happened.
-  bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+  // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
+  // optimization should be actually applied.
+  bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+                                             bool generate_code = true);
 
   // Tries to apply loop peeling for loop invariant exits elimination. Returns whether
-  // transformation happened.
-  bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node);
+  // transformation happened. 'generate_code' determines whether the optimization should be
+  // actually applied.
+  bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+                                                  bool generate_code = true);
+
+  // Tries to apply scalar loop peeling and unrolling.
+  bool TryPeelingAndUnrolling(LoopNode* node);
 
   //
   // Vectorization analysis and synthesis.