ART: Introduce Loop Versioning in SuberblockCloner.

Support Loop Versioning in SuberblockCloner as a tool to
enable further optimization (e.g. Dynamic Loop Unrolling).
The patch brings the feature in without enabling it.

Replace std::cout with LOG(INFO) for debug dumps.

Test: superblock_cloner_test.
Test: test-art-target.

Change-Id: I303cabfb752b8c3c8597abfc0ac261e8616e8cee
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index 9f7a316..b968491 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -20,7 +20,7 @@
 #include "induction_var_range.h"
 #include "graph_checker.h"
 
-#include <iostream>
+#include <sstream>
 
 namespace art {
 
@@ -227,6 +227,40 @@
   }
 }
 
+bool SuperblockCloner::IsRemapInfoForVersioning() const {
+  return remap_incoming_->empty() &&
+         remap_orig_internal_->empty() &&
+         remap_copy_internal_->empty();
+}
+
+void SuperblockCloner::CopyIncomingEdgesForVersioning() {
+  for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+    HBasicBlock* orig_block = GetBlockById(orig_block_id);
+    size_t incoming_edge_count = 0;
+    for (HBasicBlock* orig_pred : orig_block->GetPredecessors()) {
+      uint32_t orig_pred_id = orig_pred->GetBlockId();
+      if (IsInOrigBBSet(orig_pred_id)) {
+        continue;
+      }
+
+      HBasicBlock* copy_block = GetBlockCopy(orig_block);
+      // This corresponds to the requirement on the order of predecessors: all the incoming
+      // edges must be seen before the internal ones. This is always true for natural loops.
+      // TODO: remove this requirement.
+      DCHECK_EQ(orig_block->GetPredecessorIndexOf(orig_pred), incoming_edge_count);
+      for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+        HPhi* orig_phi = it.Current()->AsPhi();
+        HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+        HInstruction* orig_phi_input = orig_phi->InputAt(incoming_edge_count);
+        // Add the corresponding input of the original phi to the copy one.
+        copy_phi->AddInput(orig_phi_input);
+      }
+      copy_block->AddPredecessor(orig_pred);
+      incoming_edge_count++;
+    }
+  }
+}
+
 //
 // Local versions of CF calculation/adjustment routines.
 //
@@ -452,6 +486,12 @@
 }
 
 void SuperblockCloner::RemapEdgesSuccessors() {
+  // By this stage all the blocks have been copied, copy phis - created with no inputs;
+  // no copy edges have been created so far.
+  if (IsRemapInfoForVersioning()) {
+    CopyIncomingEdgesForVersioning();
+  }
+
   // Redirect incoming edges.
   for (HEdge e : *remap_incoming_) {
     HBasicBlock* orig_block = GetBlockById(e.GetFrom());
@@ -646,25 +686,26 @@
     if (bb == nullptr) {
       continue;
     }
-    std::cout << bb->GetBlockId();
-    std::cout << " <- ";
+    std::ostringstream oss;
+    oss << bb->GetBlockId();
+    oss << " <- ";
     for (HBasicBlock* pred : bb->GetPredecessors()) {
-      std::cout << pred->GetBlockId() << " ";
+      oss << pred->GetBlockId() << " ";
     }
-    std::cout << " -> ";
+    oss << " -> ";
     for (HBasicBlock* succ : bb->GetSuccessors()) {
-      std::cout << succ->GetBlockId()  << " ";
+      oss << succ->GetBlockId()  << " ";
     }
 
     if (bb->GetDominator()) {
-      std::cout << " dom " << bb->GetDominator()->GetBlockId();
+      oss << " dom " << bb->GetDominator()->GetBlockId();
     }
 
     if (bb->GetLoopInformation()) {
-      std::cout <<  "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+      oss <<  "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
     }
 
-    std::cout << std::endl;
+    LOG(INFO) << oss.str();
   }
 }
 
@@ -747,35 +788,35 @@
     checker.Run();
     if (!checker.IsValid()) {
       for (const std::string& error : checker.GetErrors()) {
-        std::cout << error << std::endl;
+        LOG(ERROR) << error;
       }
-      LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+      LOG(FATAL) << "GraphChecker failed: superblock cloner";
     }
   }
 }
 
 void DumpBBSet(const ArenaBitVector* set) {
   for (uint32_t idx : set->Indexes()) {
-    std::cout << idx << "\n";
+    LOG(INFO) << idx;
   }
 }
 
 void SuperblockCloner::DumpInputSets() {
-  std::cout << "orig_bb_set:\n";
+  LOG(INFO) << "orig_bb_set:";
   for (uint32_t idx : orig_bb_set_.Indexes()) {
-    std::cout << idx << "\n";
+    LOG(INFO) << idx;
   }
-  std::cout << "remap_orig_internal:\n";
+  LOG(INFO) << "remap_orig_internal:";
   for (HEdge e : *remap_orig_internal_) {
-    std::cout << e << "\n";
+    LOG(INFO) << e;
   }
-  std::cout << "remap_copy_internal:\n";
+  LOG(INFO) << "remap_copy_internal:";
   for (auto e : *remap_copy_internal_) {
-    std::cout << e << "\n";
+    LOG(INFO) << e;
   }
-  std::cout << "remap_incoming:\n";
+  LOG(INFO) << "remap_incoming:";
   for (auto e : *remap_incoming_) {
-    std::cout << e << "\n";
+    LOG(INFO) << e;
   }
 }
 
@@ -837,8 +878,8 @@
   return true;
 }
 
+// Checks that loop unrolling/peeling/versioning is being conducted.
 bool SuperblockCloner::IsFastCase() const {
-  // Check that loop unrolling/loop peeling is being conducted.
   // Check that all the basic blocks belong to the same loop.
   bool flag = false;
   HLoopInformation* common_loop_info = nullptr;
@@ -854,11 +895,15 @@
     }
   }
 
-  // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+  // Check that orig_bb_set_ corresponds to loop peeling/unrolling/versioning.
   if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
     return false;
   }
 
+  if (IsRemapInfoForVersioning()) {
+    return true;
+  }
+
   bool peeling_or_unrolling = false;
   HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
   HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
@@ -1012,8 +1057,7 @@
     HBasicBlock* copy_block = CloneBasicBlock(orig_block);
     bb_map_->Put(orig_block, copy_block);
     if (kSuperblockClonerLogging) {
-      std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
-                   std::endl;
+      LOG(INFO) << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId();
     }
   }
 }
@@ -1089,14 +1133,14 @@
   return current;
 }
 
-bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
-  PeelUnrollHelper helper(
+bool LoopClonerHelper::IsLoopClonable(HLoopInformation* loop_info) {
+  LoopClonerHelper helper(
       loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr);
   return helper.IsLoopClonable();
 }
 
-HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
-  // For now do peeling only for natural loops.
+HBasicBlock* LoopClonerHelper::DoLoopTransformationImpl(TransformationKind transformation) {
+  // For now do transformations only for natural loops.
   DCHECK(!loop_info_->IsIrreducible());
 
   HBasicBlock* loop_header = loop_info_->GetHeader();
@@ -1105,9 +1149,25 @@
   HGraph* graph = loop_header->GetGraph();
 
   if (kSuperblockClonerLogging) {
-    std::cout << "Method: " << graph->PrettyMethod() << std::endl;
-    std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
-                 " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+    LOG(INFO) << "Method: " << graph->PrettyMethod();
+    std::ostringstream oss;
+    oss << "Scalar loop ";
+    switch (transformation) {
+      case TransformationKind::kPeeling:
+        oss << "peeling";
+        break;
+      case TransformationKind::kUnrolling:
+        oss<< "unrolling";
+        break;
+      case TransformationKind::kVersioning:
+        oss << "versioning";
+        break;
+      default:
+        LOG(FATAL) << "Unreachable";
+        UNREACHABLE();
+    }
+    oss << " was applied to the loop <" << loop_header->GetBlockId() << ">.";
+    LOG(INFO) << oss.str();
   }
 
   ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
@@ -1116,11 +1176,14 @@
   HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
   HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
 
-  CollectRemappingInfoForPeelUnroll(to_unroll,
-                                    loop_info_,
-                                    &remap_orig_internal,
-                                    &remap_copy_internal,
-                                    &remap_incoming);
+  // No remapping needed for loop versioning.
+  if (transformation != TransformationKind::kVersioning) {
+    CollectRemappingInfoForPeelUnroll(transformation == TransformationKind::kUnrolling,
+                                      loop_info_,
+                                      &remap_orig_internal,
+                                      &remap_copy_internal,
+                                      &remap_incoming);
+  }
 
   cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
   cloner_.Run();
@@ -1132,7 +1195,7 @@
   return loop_header;
 }
 
-PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info,
+LoopClonerSimpleHelper::LoopClonerSimpleHelper(HLoopInformation* info,
                                                InductionVarRange* induction_range)
   : bb_map_(std::less<HBasicBlock*>(),
             info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),