ART: Implement scalar loop peeling.

Implement scalar loop peeling for invariant exits elimination
(on arm64). If the loop exit condition is loop invariant then
loop peeling + GVN + DCE can eliminate this exit in the loop
body. Note: GVN and DCE aren't applied during loop optimizations.

Note: this functionality is turned off by default now.

Test: test-art-host, test-art-target, boot-to-gui.

Change-Id: I98d20054a431838b452dc06bd25c075eb445960c
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index cd3bdaf..a0760ef 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -16,6 +16,8 @@
 
 #include "loop_analysis.h"
 
+#include "base/bit_vector-inl.h"
+
 namespace art {
 
 void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
@@ -33,7 +35,8 @@
 
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
-      if (MakesScalarUnrollingNonBeneficial(instruction)) {
+      if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
+        analysis_results->has_instructions_preventing_scalar_peeling_ = true;
         analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
       }
       analysis_results->instr_num_++;
@@ -42,6 +45,22 @@
   }
 }
 
+bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
+  HGraph* graph = loop_info->GetHeader()->GetGraph();
+  for (uint32_t block_id : loop_info->GetBlocks().Indexes()) {
+    HBasicBlock* block = graph->GetBlocks()[block_id];
+    DCHECK(block != nullptr);
+    if (block->EndsWithIf()) {
+      HIf* hif = block->GetLastInstruction()->AsIf();
+      HInstruction* input = hif->InputAt(0);
+      if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 class Arm64LoopHelper : public ArchDefaultLoopHelper {
  public:
   // Scalar loop unrolling parameters and heuristics.
@@ -60,7 +79,7 @@
   // Loop's maximum instruction count. Loops with higher count will not be unrolled.
   static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
 
-  bool IsLoopTooBigForScalarUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+  bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
     size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
     size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
     return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
@@ -77,6 +96,8 @@
     return desired_unrolling_factor;
   }
 
+  bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
+
   uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
                                   int64_t trip_count,
                                   uint32_t max_peel,