ART: Implement scalar loop peeling.

Implement scalar loop peeling for invariant exits elimination
(on arm64). If the loop exit condition is loop invariant then
loop peeling + GVN + DCE can eliminate this exit in the loop
body. Note: GVN and DCE aren't applied during loop optimizations.

Note: this functionality is turned off by default now.

Test: test-art-host, test-art-target, boot-to-gui.

Change-Id: I98d20054a431838b452dc06bd25c075eb445960c
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index b41b659..c0c721d 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -34,7 +34,7 @@
 static constexpr bool kEnableVectorization = true;
 
 // Enables scalar loop unrolling in the loop optimizer.
-static constexpr bool kEnableScalarUnrolling = false;
+static constexpr bool kEnableScalarPeelingUnrolling = false;
 
 //
 // Static helpers.
@@ -533,6 +533,43 @@
   return true;
 }
 
+// Tries to statically evaluate condition of the specified "HIf" for other condition checks.
+static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
+  HInstruction* cond = instruction->InputAt(0);
+
+  // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the
+  // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in
+  // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond'
+  // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the
+  // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC).
+  //     if (cond) {               if(cond) {
+  //       if (cond) {}              if (1) {}
+  //     } else {        =======>  } else {
+  //       if (cond) {}              if (0) {}
+  //     }                         }
+  if (!cond->IsConstant()) {
+    HBasicBlock* true_succ = instruction->IfTrueSuccessor();
+    HBasicBlock* false_succ = instruction->IfFalseSuccessor();
+
+    DCHECK_EQ(true_succ->GetPredecessors().size(), 1u);
+    DCHECK_EQ(false_succ->GetPredecessors().size(), 1u);
+
+    const HUseList<HInstruction*>& uses = cond->GetUses();
+    for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+      HInstruction* user = it->GetUser();
+      size_t index = it->GetIndex();
+      HBasicBlock* user_block = user->GetBlock();
+      // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+      ++it;
+      if (true_succ->Dominates(user_block)) {
+        user->ReplaceInput(graph->GetIntConstant(1), index);
+     } else if (false_succ->Dominates(user_block)) {
+        user->ReplaceInput(graph->GetIntConstant(0), index);
+      }
+    }
+  }
+}
+
 //
 // Public methods.
 //
@@ -851,40 +888,24 @@
 
 bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
   return TryOptimizeInnerLoopFinite(node) ||
+         TryPeelingForLoopInvariantExitsElimination(node) ||
          TryUnrollingForBranchPenaltyReduction(node);
 }
 
-void HLoopOptimization::PeelOrUnrollOnce(LoopNode* loop_node,
-                                         bool do_unrolling,
-                                         SuperblockCloner::HBasicBlockMap* bb_map,
-                                         SuperblockCloner::HInstructionMap* hir_map) {
-  // TODO: peel loop nests.
-  DCHECK(loop_node->inner == nullptr);
 
-  // Check that loop info is up-to-date.
-  HLoopInformation* loop_info = loop_node->loop_info;
-  HBasicBlock* header = loop_info->GetHeader();
-  DCHECK(loop_info == header->GetLoopInformation());
-
-  PeelUnrollHelper helper(loop_info, bb_map, hir_map);
-  DCHECK(helper.IsLoopClonable());
-  HBasicBlock* new_header = do_unrolling ? helper.DoUnrolling() : helper.DoPeeling();
-  DCHECK(header == new_header);
-  DCHECK(loop_info == new_header->GetLoopInformation());
-}
 
 //
 // Loop unrolling: generic part methods.
 //
 
-bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node) {
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
   // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
   // as InstructionSet is needed.
-  if (!kEnableScalarUnrolling || compiler_driver_ == nullptr) {
+  if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
     return false;
   }
 
-  HLoopInformation* loop_info = loop_node->loop_info;
+  HLoopInformation* loop_info = node->loop_info;
   int64_t trip_count = 0;
   // Only unroll loops with a known tripcount.
   if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
@@ -900,7 +921,7 @@
   LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
 
   // Check "IsLoopClonable" last as it can be time-consuming.
-  if (arch_loop_helper_->IsLoopTooBigForScalarUnrolling(&loop_analysis_info) ||
+  if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
       (loop_analysis_info.GetNumberOfExits() > 1) ||
       loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
       !PeelUnrollHelper::IsLoopClonable(loop_info)) {
@@ -911,21 +932,57 @@
   DCHECK_EQ(unrolling_factor, 2u);
 
   // Perform unrolling.
-  ArenaAllocator* arena = loop_info->GetHeader()->GetGraph()->GetAllocator();
-  SuperblockCloner::HBasicBlockMap bb_map(
-      std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
-  SuperblockCloner::HInstructionMap hir_map(
-      std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
-  PeelOrUnrollOnce(loop_node, /* unrolling */ true, &bb_map, &hir_map);
+  PeelUnrollSimpleHelper helper(loop_info);
+  helper.DoUnrolling();
 
   // Remove the redundant loop check after unrolling.
-  HIf* copy_hif = bb_map.Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+  HIf* copy_hif =
+      helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
   int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
   copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
 
   return true;
 }
 
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
+  // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+  // as InstructionSet is needed.
+  if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+    return false;
+  }
+
+  HLoopInformation* loop_info = node->loop_info;
+  // Check 'IsLoopClonable' the last as it might be time-consuming.
+  if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+    return false;
+  }
+
+  LoopAnalysisInfo loop_analysis_info(loop_info);
+  LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+  // Check "IsLoopClonable" last as it can be time-consuming.
+  if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+      loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
+      !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
+      !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+    return false;
+  }
+
+  // Perform peeling.
+  PeelUnrollSimpleHelper helper(loop_info);
+  helper.DoPeeling();
+
+  const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+  for (auto entry : *hir_map) {
+    HInstruction* copy = entry.second;
+    if (copy->IsIf()) {
+      TryToEvaluateIfCondition(copy->AsIf(), graph_);
+    }
+  }
+
+  return true;
+}
+
 //
 // Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
 // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."