ART: Implement loop peeling/unrolling routines.

Implement loop peeling/unrolling routines using SuperblockCloner.
Fixes bug b/74198030 and provides tests for it.

Bug: b/74198030
Test: superblock_cloner_test.cc, loop_optimization_test.cc.

Change-Id: Id0d9a91575c88f8e45186441b14e903b89b007dd
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index db83689..c21bd65 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -227,11 +227,14 @@
   graph_->ClearDominanceInformation();
   graph_->BuildDominatorTree();
 
+  // BuildDominatorTree inserts a block beetween loop header and entry block.
+  EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
+
   // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
   // are still mapped correctly to the block predecessors.
   for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
     HInstruction* input = phi->InputAt(i);
-    ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+    EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
   }
 }
 
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a7c23be..04942f9 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -28,6 +28,11 @@
 using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
 using HEdgeSet = SuperblockCloner::HEdgeSet;
 
+// When doing peeling we can choose whether to keep original loop (made of original basic blocks)
+// and form a peeled iteration of the copy blocks (preserve the header) or transfer original loop
+// blocks to the peeled iteration and create new loop from the copy blocks. Similar for unrolling.
+static const bool kPeelUnrollPreserveHeader = true;
+
 void HEdge::Dump(std::ostream& stream) const {
   stream << "(" << from_ << "->" << to_ << ")";
 }
@@ -70,20 +75,18 @@
   return true;
 }
 
-// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
-// graph.
-static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
-  if (loop1 != nullptr || loop2 != nullptr) {
-    return nullptr;
+// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
+static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
+  if (set1->Size() != set2->Size()) {
+    return false;
   }
 
-  if (loop1->IsIn(*loop2)) {
-    return loop2;
-  } else if (loop2->IsIn(*loop1)) {
-    return loop1;
+  for (auto e : *set1) {
+    if (set2->Find(e) == set2->end()) {
+      return false;
+    }
   }
-  HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
-  return block->GetLoopInformation();
+  return true;
 }
 
 // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
@@ -95,6 +98,21 @@
   }
 }
 
+// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
+// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
+// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
+// block.
+static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
+  DCHECK(bb_set->IsBitSet(block->GetBlockId()));
+  bb_set->ClearBit(block->GetBlockId());
+
+  for (HBasicBlock* succ : block->GetSuccessors()) {
+    if (bb_set->IsBitSet(succ->GetBlockId())) {
+      TraverseSubgraphForConnectivity(succ, bb_set);
+    }
+  }
+}
+
 //
 // Helpers for CloneBasicBlock.
 //
@@ -268,7 +286,6 @@
 }
 
 void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
-  // TODO: DCHECK that after the transformation the graph is connected.
   HBasicBlock* block_entry = nullptr;
 
   if (outer_loop_ == nullptr) {
@@ -424,6 +441,11 @@
       outer_loop_ = nullptr;
       break;
     }
+    if (outer_loop_ == nullptr) {
+      // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
+      // common loop.
+      outer_loop_ = loop_exit_loop_info;
+    }
     outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
   }
 
@@ -507,6 +529,34 @@
 // Debug and logging methods.
 //
 
+// Debug function to dump graph' BasicBlocks info.
+void DumpBB(HGraph* graph) {
+  for (HBasicBlock* bb : graph->GetBlocks()) {
+    if (bb == nullptr) {
+      continue;
+    }
+    std::cout << bb->GetBlockId();
+    std::cout << " <- ";
+    for (HBasicBlock* pred : bb->GetPredecessors()) {
+      std::cout << pred->GetBlockId() << " ";
+    }
+    std::cout << " -> ";
+    for (HBasicBlock* succ : bb->GetSuccessors()) {
+      std::cout << succ->GetBlockId()  << " ";
+    }
+
+    if (bb->GetDominator()) {
+      std::cout << " dom " << bb->GetDominator()->GetBlockId();
+    }
+
+    if (bb->GetLoopInformation()) {
+      std::cout <<  "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+    }
+
+    std::cout << std::endl;
+  }
+}
+
 void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
   DCHECK(!orig_instr->IsPhi());
   HInstruction* copy_instr = GetInstrCopy(orig_instr);
@@ -542,6 +592,82 @@
   }
 }
 
+bool SuperblockCloner::CheckRemappingInfoIsValid() {
+  for (HEdge edge : *remap_orig_internal_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        !IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  for (auto edge : *remap_copy_internal_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        !IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  for (auto edge : *remap_incoming_) {
+    if (!IsEdgeValid(edge, graph_) ||
+        IsInOrigBBSet(edge.GetFrom()) ||
+        !IsInOrigBBSet(edge.GetTo())) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void SuperblockCloner::VerifyGraph() {
+  for (auto it : *hir_map_) {
+    HInstruction* orig_instr = it.first;
+    HInstruction* copy_instr = it.second;
+    if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
+      DCHECK(it.first->GetBlock() != nullptr);
+    }
+    if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
+      DCHECK(it.second->GetBlock() != nullptr);
+    }
+  }
+
+  GraphChecker checker(graph_);
+  checker.Run();
+  if (!checker.IsValid()) {
+    for (const std::string& error : checker.GetErrors()) {
+      std::cout << error << std::endl;
+    }
+    LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+  }
+}
+
+void DumpBBSet(const ArenaBitVector* set) {
+  for (uint32_t idx : set->Indexes()) {
+    std::cout << idx << "\n";
+  }
+}
+
+void SuperblockCloner::DumpInputSets() {
+  std::cout << graph_->PrettyMethod() << "\n";
+  std::cout << "orig_bb_set:\n";
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    std::cout << idx << "\n";
+  }
+  std::cout << "remap_orig_internal:\n";
+  for (HEdge e : *remap_orig_internal_) {
+    std::cout << e << "\n";
+  }
+  std::cout << "remap_copy_internal:\n";
+  for (auto e : *remap_copy_internal_) {
+    std::cout << e << "\n";
+  }
+  std::cout << "remap_incoming:\n";
+  for (auto e : *remap_incoming_) {
+    std::cout << e << "\n";
+  }
+}
+
 //
 // Public methods.
 //
@@ -569,6 +695,7 @@
   remap_orig_internal_ = remap_orig_internal;
   remap_copy_internal_ = remap_copy_internal;
   remap_incoming_ = remap_incoming;
+  DCHECK(CheckRemappingInfoIsValid());
 }
 
 bool SuperblockCloner::IsSubgraphClonable() const {
@@ -602,6 +729,63 @@
   return true;
 }
 
+bool SuperblockCloner::IsFastCase() const {
+  // Check that loop unrolling/loop peeling is being conducted.
+  // Check that all the basic blocks belong to the same loop.
+  bool flag = false;
+  HLoopInformation* common_loop_info = nullptr;
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+    HLoopInformation* block_loop_info = block->GetLoopInformation();
+    if (!flag) {
+      common_loop_info = block_loop_info;
+    } else {
+      if (block_loop_info != common_loop_info) {
+        return false;
+      }
+    }
+  }
+
+  // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+  if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
+    return false;
+  }
+
+  bool peeling_or_unrolling = false;
+  HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+
+  // Check whether remapping info corresponds to loop unrolling.
+  CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
+                                    common_loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+                          EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+                          EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+  remap_orig_internal.Clear();
+  remap_copy_internal.Clear();
+  remap_incoming.Clear();
+
+  // Check whether remapping info corresponds to loop peeling.
+  CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
+                                    common_loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+                          EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+                          EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+  return peeling_or_unrolling;
+}
+
 void SuperblockCloner::Run() {
   DCHECK(bb_map_ != nullptr);
   DCHECK(hir_map_ != nullptr);
@@ -609,6 +793,11 @@
          remap_copy_internal_ != nullptr &&
          remap_incoming_ != nullptr);
   DCHECK(IsSubgraphClonable());
+  DCHECK(IsFastCase());
+
+  if (kSuperblockClonerLogging) {
+    DumpInputSets();
+  }
 
   // Find an area in the graph for which control flow information should be adjusted.
   FindAndSetLocalAreaForAdjustments();
@@ -618,6 +807,19 @@
   // Connect the blocks together/remap successors and fix phis which are directly affected my the
   // remapping.
   RemapEdgesSuccessors();
+
+  // Check that the subgraph is connected.
+  if (kIsDebugBuild) {
+    HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
+
+    // Add original and copy blocks of the subgraph to the work set.
+    for (auto iter : *bb_map_) {
+      work_set.SetBit(iter.first->GetBlockId());   // Original block.
+      work_set.SetBit(iter.second->GetBlockId());  // Copy block.
+    }
+    CHECK(IsSubgraphConnected(&work_set, graph_));
+  }
+
   // Recalculate dominance and backedge information which is required by the next stage.
   AdjustControlFlowInfo();
   // Fix data flow of the graph.
@@ -650,6 +852,10 @@
       }
     }
   }
+
+  if (kSuperblockClonerVerify) {
+    VerifyGraph();
+  }
 }
 
 HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
@@ -701,4 +907,125 @@
   }
 }
 
+//
+// Stand-alone methods.
+//
+
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+                                       HLoopInformation* loop_info,
+                                       HEdgeSet* remap_orig_internal,
+                                       HEdgeSet* remap_copy_internal,
+                                       HEdgeSet* remap_incoming) {
+  DCHECK(loop_info != nullptr);
+  HBasicBlock* loop_header = loop_info->GetHeader();
+  // Set up remap_orig_internal edges set - set is empty.
+  // Set up remap_copy_internal edges set.
+  for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
+    HEdge e = HEdge(back_edge_block, loop_header);
+    if (to_unroll) {
+      remap_orig_internal->Insert(e);
+      remap_copy_internal->Insert(e);
+    } else {
+      if (kPeelUnrollPreserveHeader) {
+        remap_copy_internal->Insert(e);
+      } else {
+        remap_orig_internal->Insert(e);
+      }
+    }
+  }
+
+  // Set up remap_incoming edges set.
+  if (to_unroll != kPeelUnrollPreserveHeader) {
+    remap_incoming->Insert(HEdge(loop_info->GetPreHeader(), loop_header));
+  }
+}
+
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
+  ArenaVector<HBasicBlock*> entry_blocks(
+      graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  // Find subgraph entry blocks.
+  for (uint32_t orig_block_id : work_set->Indexes()) {
+    HBasicBlock* block = graph->GetBlocks()[orig_block_id];
+    for (HBasicBlock* pred : block->GetPredecessors()) {
+      if (!work_set->IsBitSet(pred->GetBlockId())) {
+        entry_blocks.push_back(block);
+        break;
+      }
+    }
+  }
+
+  for (HBasicBlock* entry_block : entry_blocks) {
+    if (work_set->IsBitSet(entry_block->GetBlockId())) {
+      TraverseSubgraphForConnectivity(entry_block, work_set);
+    }
+  }
+
+  // Return whether there are unvisited - unreachable - blocks.
+  return work_set->NumSetBits() == 0;
+}
+
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+  if (loop1 == nullptr || loop2 == nullptr) {
+    return nullptr;
+  }
+
+  if (loop1->IsIn(*loop2)) {
+    return loop2;
+  }
+
+  HLoopInformation* current = loop1;
+  while (current != nullptr && !loop2->IsIn(*current)) {
+    current = current->GetPreHeader()->GetLoopInformation();
+  }
+
+  return current;
+}
+
+bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
+  PeelUnrollHelper helper(loop_info, nullptr, nullptr);
+  return helper.IsLoopClonable();
+}
+
+HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
+  // For now do peeling only for natural loops.
+  DCHECK(!loop_info_->IsIrreducible());
+
+  HBasicBlock* loop_header = loop_info_->GetHeader();
+  HGraph* graph = loop_header->GetGraph();
+  ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
+
+  HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  CollectRemappingInfoForPeelUnroll(to_unroll,
+                                    loop_info_,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+  cloner_.Run();
+  cloner_.CleanUp();
+
+  return kPeelUnrollPreserveHeader ? loop_header : cloner_.GetBlockCopy(loop_header);
+}
+
+PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info)
+  : bb_map_(std::less<HBasicBlock*>(),
+            info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+    hir_map_(std::less<HInstruction*>(),
+             info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+    helper_(info, &bb_map_, &hir_map_) {}
+
 }  // namespace art
+
+namespace std {
+
+ostream& operator<<(ostream& os, const art::HEdge& e) {
+  e.Dump(os);
+  return os;
+}
+
+}  // namespace std
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 23de692..19c9dd4 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -152,6 +152,15 @@
   // TODO: Start from small range of graph patterns then extend it.
   bool IsSubgraphClonable() const;
 
+  // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
+  // when iterative DF algorithm is not required and dominators/instructions inputs can be
+  // trivially adjusted.
+  //
+  // TODO: formally describe the criteria.
+  //
+  // Loop peeling and unrolling satisfy the criteria.
+  bool IsFastCase() const;
+
   // Runs the copy algorithm according to the description.
   void Run();
 
@@ -202,11 +211,17 @@
     return IsInOrigBBSet(block->GetBlockId());
   }
 
+  // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
+  // dominators) needs to be adjusted.
+  HLoopInformation* GetRegionToBeAdjusted() const {
+    return outer_loop_;
+  }
+
  private:
   // Fills the 'exits' vector with the subgraph exits.
   void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
 
-  // Finds and records information about the area in the graph for which control-flow (back edges,
+  // Finds and records information about the area in the graph for which control flow (back edges,
   // loops, dominators) needs to be adjusted.
   void FindAndSetLocalAreaForAdjustments();
 
@@ -217,7 +232,7 @@
   // phis' nor instructions' inputs values are resolved.
   void RemapEdgesSuccessors();
 
-  // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+  // Adjusts control flow (back edges, loops, dominators) for the local area defined by
   // FindAndSetLocalAreaForAdjustments.
   void AdjustControlFlowInfo();
 
@@ -272,6 +287,9 @@
   // Debug and logging methods.
   //
   void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+  bool CheckRemappingInfoIsValid();
+  void VerifyGraph();
+  void DumpInputSets();
 
   HBasicBlock* GetBlockById(uint32_t block_id) const {
     DCHECK(block_id < graph_->GetBlocks().size());
@@ -295,15 +313,94 @@
   HBasicBlockMap* bb_map_;
   // Correspondence map for instructions: (original HInstruction, copy HInstruction).
   HInstructionMap* hir_map_;
-  // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+  // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
   HLoopInformation* outer_loop_;
   HBasicBlockSet outer_loop_bb_set_;
 
   ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+  ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
 
   DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
 };
 
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when correspondence map between original and copied
+// basic blocks/instructions are demanded.
+class PeelUnrollHelper : public ValueObject {
+ public:
+  explicit PeelUnrollHelper(HLoopInformation* info,
+                            SuperblockCloner::HBasicBlockMap* bb_map,
+                            SuperblockCloner::HInstructionMap* hir_map) :
+      loop_info_(info),
+      cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) {
+    // For now do peeling/unrolling only for natural loops.
+    DCHECK(!info->IsIrreducible());
+  }
+
+  // Returns whether the loop can be peeled/unrolled (static function).
+  static bool IsLoopClonable(HLoopInformation* loop_info);
+
+  // Returns whether the loop can be peeled/unrolled.
+  bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
+
+  HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); }
+  HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); }
+  HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
+
+ protected:
+  // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
+  //
+  // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
+  HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
+
+ private:
+  HLoopInformation* loop_info_;
+  SuperblockCloner cloner_;
+
+  DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
+};
+
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when there is no need to get correspondence information between
+// original and copied basic blocks/instructions.
+class PeelUnrollSimpleHelper : public ValueObject {
+ public:
+  explicit PeelUnrollSimpleHelper(HLoopInformation* info);
+  bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
+  HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
+  HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
+  HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
+
+ private:
+  SuperblockCloner::HBasicBlockMap bb_map_;
+  SuperblockCloner::HInstructionMap hir_map_;
+  PeelUnrollHelper helper_;
+
+  DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
+};
+
+// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+                                       HLoopInformation* loop_info,
+                                       SuperblockCloner::HEdgeSet* remap_orig_internal,
+                                       SuperblockCloner::HEdgeSet* remap_copy_internal,
+                                       SuperblockCloner::HEdgeSet* remap_incoming);
+
+// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
+//
+// Returns whether such a set 'outer_entries' of basic blocks exists that:
+// - each block from 'outer_entries' is not from 'work_set'.
+// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
+//
+// After the function returns work_set contains only blocks from the original 'work_set'
+// which are unreachable from the rest of the graph.
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
 }  // namespace art
 
 namespace std {
@@ -312,11 +409,12 @@
 struct hash<art::HEdge> {
   size_t operator()(art::HEdge const& x) const noexcept  {
     // Use Cantor pairing function as the hash function.
-    uint32_t a = x.GetFrom();
-    uint32_t b = x.GetTo();
+    size_t a = x.GetFrom();
+    size_t b = x.GetTo();
     return (a + b) * (a + b + 1) / 2 + b;
   }
 };
+ostream& operator<<(ostream& os, const art::HEdge& e);
 
 }  // namespace std
 
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index f1b7bff..df2e517 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -25,54 +25,67 @@
 
 using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
 using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
 
 // This class provides methods and helpers for testing various cloning and copying routines:
 // individual instruction cloning and cloning of the more coarse-grain structures.
 class SuperblockClonerTest : public OptimizingUnitTest {
  public:
-  SuperblockClonerTest()
-      : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {}
+  SuperblockClonerTest() : graph_(CreateGraph()),
+                           entry_block_(nullptr),
+                           return_block_(nullptr),
+                           exit_block_(nullptr),
+                           parameter_(nullptr) {}
 
-  void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p,
-                                  /* out */ HBasicBlock** body_p) {
+  void InitGraph() {
     entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
     graph_->AddBlock(entry_block_);
     graph_->SetEntryBlock(entry_block_);
 
-    HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
-    HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
-    HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
-    HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_);
-
-    graph_->AddBlock(loop_preheader);
-    graph_->AddBlock(loop_header);
-    graph_->AddBlock(loop_body);
-    graph_->AddBlock(loop_exit);
+    return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+    graph_->AddBlock(return_block_);
 
     exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
     graph_->AddBlock(exit_block_);
     graph_->SetExitBlock(exit_block_);
 
-    entry_block_->AddSuccessor(loop_preheader);
-    loop_preheader->AddSuccessor(loop_header);
-    // Loop exit first to have a proper exit condition/target for HIf.
-    loop_header->AddSuccessor(loop_exit);
-    loop_header->AddSuccessor(loop_body);
-    loop_body->AddSuccessor(loop_header);
-    loop_exit->AddSuccessor(exit_block_);
-
-    *header_p = loop_header;
-    *body_p = loop_body;
+    entry_block_->AddSuccessor(return_block_);
+    return_block_->AddSuccessor(exit_block_);
 
     parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
                                                       dex::TypeIndex(0),
                                                       0,
                                                       DataType::Type::kInt32);
     entry_block_->AddInstruction(parameter_);
-    loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid());
+    return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
     exit_block_->AddInstruction(new (GetAllocator()) HExit());
   }
 
+  void CreateBasicLoopControlFlow(HBasicBlock* position,
+                                  HBasicBlock* successor,
+                                  /* out */ HBasicBlock** header_p,
+                                  /* out */ HBasicBlock** body_p) {
+    HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
+    HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
+    HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
+
+    graph_->AddBlock(loop_preheader);
+    graph_->AddBlock(loop_header);
+    graph_->AddBlock(loop_body);
+
+    position->ReplaceSuccessor(successor, loop_preheader);
+
+    loop_preheader->AddSuccessor(loop_header);
+    // Loop exit first to have a proper exit condition/target for HIf.
+    loop_header->AddSuccessor(successor);
+    loop_header->AddSuccessor(loop_body);
+    loop_body->AddSuccessor(loop_header);
+
+    *header_p = loop_header;
+    *body_p = loop_body;
+  }
+
   void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) {
     uint32_t dex_pc = 0;
 
@@ -84,11 +97,12 @@
     // Header block.
     HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
     HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck();
+    HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128);
 
     loop_header->AddPhi(phi);
     loop_header->AddInstruction(suspend_check);
-    loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128));
-    loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_));
+    loop_header->AddInstruction(loop_check);
+    loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check));
 
     // Loop body block.
     HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc);
@@ -97,8 +111,8 @@
     HInstruction* array_get =
         new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc);
     HInstruction* add =  new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1);
-    HInstruction* array_set =
-        new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
+    HInstruction* array_set = new (GetAllocator()) HArraySet(
+        null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
     HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1);
 
     loop_body->AddInstruction(null_check);
@@ -153,6 +167,7 @@
   HGraph* graph_;
 
   HBasicBlock* entry_block_;
+  HBasicBlock* return_block_;
   HBasicBlock* exit_block_;
 
   HInstruction* parameter_;
@@ -162,10 +177,11 @@
   HBasicBlock* header = nullptr;
   HBasicBlock* loop_body = nullptr;
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
-  ASSERT_TRUE(CheckGraph());
+  EXPECT_TRUE(CheckGraph());
 
   HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck();
   CloneAndReplaceInstructionVisitor visitor(graph_);
@@ -193,7 +209,8 @@
   HBasicBlock* loop_body = nullptr;
   ArenaAllocator* arena = graph_->GetAllocator();
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
   ASSERT_TRUE(CheckGraph());
@@ -272,7 +289,8 @@
   HBasicBlock* loop_body = nullptr;
   ArenaAllocator* arena = graph_->GetAllocator();
 
-  CreateBasicLoopControlFlow(&header, &loop_body);
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
   CreateBasicLoopDataFlow(header, loop_body);
   graph_->BuildDominatorTree();
   ASSERT_TRUE(CheckGraph());
@@ -303,4 +321,487 @@
   EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
 }
 
+// Tests IsSubgraphConnected function for negative case.
+TEST_F(SuperblockClonerTest, IsGraphConnected) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(unreachable_block);
+
+  HBasicBlockSet bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  bb_set.SetBit(header->GetBlockId());
+  bb_set.SetBit(loop_body->GetBlockId());
+  bb_set.SetBit(unreachable_block->GetBlockId());
+
+  EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_));
+  EXPECT_EQ(bb_set.NumSetBits(), 1u);
+  EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId()));
+}
+
+// Tests SuperblockCloner for loop peeling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+//       Before                    After
+//
+//         |B|                      |B|
+//          |                        |
+//          v                        v
+//         |1|                      |1|
+//          |                        |
+//          v                        v
+//         |2|<-\              (6) |2A|
+//         / \  /                   / \
+//        v   v/                   /   v
+//       |4|  |3|                 /   |3A| (7)
+//        |                      /     /
+//        v                     |     v
+//       |E|                     \   |2|<-\
+//                                \ / \   /
+//                                 v   v /
+//                                |4|  |3|
+//                                 |
+//                                 v
+//                                |E|
+TEST_F(SuperblockClonerTest, LoopPeeling) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlockMap bb_map(
+      std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(
+      std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+  EXPECT_TRUE(helper.IsLoopClonable());
+  HBasicBlock* new_header = helper.DoPeeling();
+  HLoopInformation* new_loop_info = new_header->GetLoopInformation();
+
+  EXPECT_TRUE(CheckGraph());
+
+  // Check loop body successors.
+  EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
+  EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+  // Check loop structure.
+  EXPECT_EQ(header, new_header);
+  EXPECT_EQ(new_loop_info->GetHeader(), header);
+  EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u);
+  EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body);
+}
+
+// Tests SuperblockCloner for loop unrolling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+//       Before                    After
+//
+//         |B|                      |B|
+//          |                        |
+//          v                        v
+//         |1|                      |1|
+//          |                        |
+//          v                        v
+//         |2|<-\               (6) |2A|<-\
+//         / \  /                   / \    \
+//        v   v/                   /   v    \
+//       |4|  |3|                 /(7)|3A|   |
+//        |                      /     /    /
+//        v                     |     v    /
+//       |E|                     \   |2|  /
+//                                \ / \  /
+//                                 v   v/
+//                                |4| |3|
+//                                 |
+//                                 v
+//                                |E|
+TEST_F(SuperblockClonerTest, LoopUnrolling) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlockMap bb_map(
+      std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(
+      std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollHelper helper(loop_info, &bb_map, &hir_map);
+  EXPECT_TRUE(helper.IsLoopClonable());
+  HBasicBlock* new_header = helper.DoUnrolling();
+
+  EXPECT_TRUE(CheckGraph());
+
+  // Check loop body successors.
+  EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header));
+  EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+  // Check loop structure.
+  EXPECT_EQ(header, new_header);
+  EXPECT_EQ(loop_info, new_header->GetLoopInformation());
+  EXPECT_EQ(loop_info->GetHeader(), new_header);
+  EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
+  EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
+}
+
+// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
+// the transformation the loop has a single preheader.
+TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+
+  // Transform a basic loop to have multiple back edges.
+  HBasicBlock* latch = header->GetSuccessors()[1];
+  HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+  HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(if_block);
+  graph_->AddBlock(temp1);
+  header->ReplaceSuccessor(latch, if_block);
+  if_block->AddSuccessor(latch);
+  if_block->AddSuccessor(temp1);
+  temp1->AddSuccessor(header);
+
+  if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+  HInstructionIterator it(header->GetPhis());
+  DCHECK(!it.Done());
+  HPhi* loop_phi = it.Current()->AsPhi();
+  HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32,
+                                                     loop_phi,
+                                                     graph_->GetIntConstant(2));
+  temp1->AddInstruction(temp_add);
+  temp1->AddInstruction(new (GetAllocator()) HGoto());
+  loop_phi->AddInput(temp_add);
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  PeelUnrollSimpleHelper helper(loop_info);
+  HBasicBlock* new_header = helper.DoPeeling();
+  EXPECT_EQ(header, new_header);
+
+  EXPECT_TRUE(CheckGraph());
+  EXPECT_EQ(header->GetPredecessors().size(), 3u);
+}
+
+static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header,
+                                                   HBasicBlock* loop2_header,
+                                                   HBasicBlock* loop3_header) {
+  EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header);
+  EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header);
+  EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header);
+  EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(),
+            loop2_header);
+}
+
+TEST_F(SuperblockClonerTest, LoopPeelingNested) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1    2 3
+  //             [ ], [ [ ] ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation();
+
+  // Check nested loops structure.
+  CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+  PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation());
+  helper.DoPeeling();
+  // Check that nested loops structure has not changed after the transformation.
+  CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+
+  // Test that the loop info is preserved.
+  EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation());
+  EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation());
+
+  EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before);
+  EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr);
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr);
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+// Checks that the loop population is correctly propagated after an inner loop is peeled.
+TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1 2 3        4
+  //             [ [ [ ] ] ], [ ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop4_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+  helper.DoPeeling();
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+  HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+
+  EXPECT_TRUE(loop1->Contains(*loop2_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+  // Check that loop4 info has not been touched after local run of AnalyzeLoops.
+  EXPECT_EQ(loop4, loop4_header->GetLoopInformation());
+
+  EXPECT_TRUE(loop1->IsIn(*loop1));
+  EXPECT_TRUE(loop2->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop2));
+  EXPECT_TRUE(!loop4->IsIn(*loop1));
+
+  EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr);
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2);
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop
+// in the hierarchy. Loop population information must be valid after loop peeling.
+TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops then peel loop3.
+  //   Headers:  1 2 3
+  //             [ [ [ ] ] ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+  HBasicBlock* loop_body1 = loop_body;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+  HBasicBlock* loop_body3 = loop_body;
+
+  // Change the loop3 - insert an exit which leads to loop1.
+  HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_);
+  graph_->AddBlock(loop3_extra_if_block);
+  loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+  loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block);
+  loop3_extra_if_block->AddSuccessor(loop_body1);  // Long exit.
+  loop3_extra_if_block->AddSuccessor(loop_body3);
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+  EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit));
+
+  PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation());
+  helper.DoPeeling();
+
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  // Check that after the transformation the local area for CF adjustments has been chosen
+  // correctly and loop population has been updated.
+  loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+  EXPECT_TRUE(loop1->Contains(*loop3_long_exit));
+
+  EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1);
+
+  EXPECT_TRUE(loop1->Contains(*loop3_header));
+  EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+  EXPECT_TRUE(CheckGraph());
+}
+
+TEST_F(SuperblockClonerTest, FastCaseCheck) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  InitGraph();
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+  HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+  CollectRemappingInfoForPeelUnroll(true,
+                                    loop_info,
+                                    &remap_orig_internal,
+                                    &remap_copy_internal,
+                                    &remap_incoming);
+
+  // Insert some extra nodes and edges.
+  HBasicBlock* preheader = loop_info->GetPreHeader();
+  orig_bb_set.SetBit(preheader->GetBlockId());
+
+  // Adjust incoming edges.
+  remap_incoming.Clear();
+  remap_incoming.Insert(HEdge(preheader->GetSinglePredecessor(), preheader));
+
+  HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          &bb_map,
+                          &hir_map);
+  cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+
+  EXPECT_FALSE(cloner.IsFastCase());
+}
+
+// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric.
+static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) {
+  HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2);
+  HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1);
+  EXPECT_EQ(common_loop21, common_loop12);
+  return common_loop12;
+}
+
+// Tests FindCommonLoop function on a loop nest.
+TEST_F(SuperblockClonerTest, FindCommonLoop) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+
+  InitGraph();
+
+  // Create the following nested structure of loops
+  //   Headers:  1 2 3      4      5
+  //             [ [ [ ] ], [ ] ], [ ]
+  CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop1_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop2_header = header;
+
+  CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop3_header = header;
+
+  CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop4_header = header;
+
+  CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  HBasicBlock* loop5_header = header;
+
+  graph_->BuildDominatorTree();
+  EXPECT_TRUE(CheckGraph());
+
+  HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+  HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+  HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+  HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+  HLoopInformation* loop5 = loop5_header->GetLoopInformation();
+
+  EXPECT_TRUE(loop1->IsIn(*loop1));
+  EXPECT_TRUE(loop2->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop1));
+  EXPECT_TRUE(loop3->IsIn(*loop2));
+  EXPECT_TRUE(loop4->IsIn(*loop1));
+
+  EXPECT_FALSE(loop5->IsIn(*loop1));
+  EXPECT_FALSE(loop4->IsIn(*loop2));
+  EXPECT_FALSE(loop4->IsIn(*loop3));
+
+  EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr);
+  EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1);
+
+  EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1);
+  EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr);
+
+  EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5);
+}
+
 }  // namespace art