ART: Implement SuperblockCloner.

SuperblockCloner provides a feature of cloning subgraphs in a
smart, high level way without fine grain manipulation with IR;
data flow and graph properties are resolved/adjusted automatically.
The clone transformation is defined by specifying a set of basic
blocks to copy and a set of rules how to treat edges, remap their
successors. By using this approach such optimizations as Branch
Target Expansion, Loop Peeling, Loop Unrolling can be implemented.

Test: superblock_cloner_test.cc.
Change-Id: Ibeede38195376ca35f44ba9015491e50b3a5b87e
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 727431a..91e475d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -865,6 +865,15 @@
   graph->SetHasLoops(true);
 }
 
+void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) {
+  DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this);
+  blocks_.Union(&inner_loop->blocks_);
+  HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation();
+  if (outer_loop != nullptr) {
+    outer_loop->PopulateInnerLoopUpwards(this);
+  }
+}
+
 HBasicBlock* HLoopInformation::GetPreHeader() const {
   HBasicBlock* block = header_->GetPredecessors()[0];
   DCHECK(irreducible_ || (block == header_->GetDominator()));
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index d4382c6..180893d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -826,6 +826,10 @@
   // Finds blocks that are part of this loop.
   void Populate();
 
+  // Updates blocks population of the loop and all of its outer' ones recursively after the
+  // population of the inner loop is updated.
+  void PopulateInnerLoopUpwards(HLoopInformation* inner_loop);
+
   // Returns whether this loop information contains `block`.
   // Note that this loop information *must* be populated before entering this function.
   bool Contains(const HBasicBlock& block) const;
@@ -856,6 +860,12 @@
 
   bool HasExitEdge() const;
 
+  // Resets back edge and blocks-in-loop data.
+  void ResetBasicBlockData() {
+    back_edges_.clear();
+    ClearAllBlocks();
+  }
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -998,6 +1008,18 @@
     loop_information_->AddBackEdge(back_edge);
   }
 
+  // Registers a back edge; if the block was not a loop header before the call associates a newly
+  // created loop info with it.
+  //
+  // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop
+  // info for all blocks during back edges recalculation.
+  void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) {
+    if (loop_information_ == nullptr || loop_information_->GetHeader() != this) {
+      loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_);
+    }
+    loop_information_->AddBackEdge(back_edge);
+  }
+
   HGraph* GetGraph() const { return graph_; }
   void SetGraph(HGraph* graph) { graph_ = graph; }
 
@@ -7298,19 +7320,19 @@
 class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
  public:
   explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
-      : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count(0) {}
+      : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
 
   void VisitInstruction(HInstruction* instruction) OVERRIDE {
     if (instruction->IsClonable()) {
       ReplaceInstrOrPhiByClone(instruction);
-      instr_replaced_by_clones_count++;
+      instr_replaced_by_clones_count_++;
     }
   }
 
-  size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count; }
+  size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; }
 
  private:
-  size_t instr_replaced_by_clones_count;
+  size_t instr_replaced_by_clones_count_;
 
   DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
 };
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
new file mode 100644
index 0000000..a7c23be
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -0,0 +1,704 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "superblock_cloner.h"
+
+#include "common_dominator.h"
+#include "graph_checker.h"
+
+#include <iostream>
+
+namespace art {
+
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
+
+void HEdge::Dump(std::ostream& stream) const {
+  stream << "(" << from_ << "->" << to_ << ")";
+}
+
+//
+// Static helper methods.
+//
+
+// Returns whether instruction has any uses (regular or environmental) outside the region,
+// defined by basic block set.
+static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) {
+  auto& uses = instr->GetUses();
+  for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) {
+    HInstruction* user = use_node->GetUser();
+    if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+      return true;
+    }
+  }
+
+  auto& env_uses = instr->GetEnvUses();
+  for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) {
+    HInstruction* user = use_node->GetUser()->GetHolder();
+    if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Returns whether the phi's inputs are the same HInstruction.
+static bool ArePhiInputsTheSame(const HPhi* phi) {
+  HInstruction* first_input = phi->InputAt(0);
+  for (size_t i = 1, e = phi->InputCount(); i < e; i++) {
+    if (phi->InputAt(i) != first_input) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+  if (loop1 != nullptr || loop2 != nullptr) {
+    return nullptr;
+  }
+
+  if (loop1->IsIn(*loop2)) {
+    return loop2;
+  } else if (loop2->IsIn(*loop1)) {
+    return loop1;
+  }
+  HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
+  return block->GetLoopInformation();
+}
+
+// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
+static void OrderLoopsHeadersPredecessors(HGraph* graph) {
+  for (HBasicBlock* block : graph->GetPostOrder()) {
+    if (block->IsLoopHeader()) {
+      graph->OrderLoopHeaderPredecessors(block);
+    }
+  }
+}
+
+//
+// Helpers for CloneBasicBlock.
+//
+
+void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) {
+  DCHECK(!copy_instr->IsPhi());
+  for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) {
+    // Copy instruction holds the same input as the original instruction holds.
+    HInstruction* orig_input = copy_instr->InputAt(i);
+    if (!IsInOrigBBSet(orig_input->GetBlock())) {
+      // Defined outside the subgraph.
+      continue;
+    }
+    HInstruction* copy_input = GetInstrCopy(orig_input);
+    // copy_instr will be registered as a user of copy_inputs after returning from this function:
+    // 'copy_block->AddInstruction(copy_instr)'.
+    copy_instr->SetRawInputAt(i, copy_input);
+  }
+}
+
+void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr,
+                                                         const HEnvironment* orig_env) {
+  if (orig_env->GetParent() != nullptr) {
+    DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent());
+  }
+  HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr);
+
+  for (size_t i = 0; i < orig_env->Size(); i++) {
+    HInstruction* env_input = orig_env->GetInstructionAt(i);
+    if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) {
+      env_input = GetInstrCopy(env_input);
+      DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr);
+    }
+    copy_env->SetRawEnvAt(i, env_input);
+    if (env_input != nullptr) {
+      env_input->AddEnvUseAt(copy_env, i);
+    }
+  }
+  // InsertRawEnvironment assumes that instruction already has an environment that's why we use
+  // SetRawEnvironment in the 'else' case.
+  // As this function calls itself recursively with the same copy_instr - this copy_instr may
+  // have partially copied chain of HEnvironments.
+  if (copy_instr->HasEnvironment()) {
+    copy_instr->InsertRawEnvironment(copy_env);
+  } else {
+    copy_instr->SetRawEnvironment(copy_env);
+  }
+}
+
+//
+// Helpers for RemapEdgesSuccessors.
+//
+
+void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block,
+                                                       HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+
+  size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  size_t phi_input_count = 0;
+  // This flag reflects whether the original successor has at least one phi and this phi
+  // has been already processed in the loop. Used for validation purposes in DCHECK to check that
+  // in the end all of the phis in the copy successor have the same number of inputs - the number
+  // of copy successor's predecessors.
+  bool first_phi_met = false;
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(this_index);
+    // Remove corresponding input for original phi.
+    orig_phi->RemoveInputAt(this_index);
+    // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds
+    // to orig_block, so add the input at the end of the list.
+    copy_phi->AddInput(orig_phi_input);
+    if (!first_phi_met) {
+      phi_input_count = copy_phi->InputCount();
+      first_phi_met = true;
+    } else {
+      DCHECK_EQ(phi_input_count, copy_phi->InputCount());
+    }
+  }
+  // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds
+  // to the previously added phi inputs position.
+  orig_block->ReplaceSuccessor(orig_succ, copy_succ);
+  DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count);
+}
+
+void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block,
+                                           HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_block = GetBlockCopy(orig_block);
+  HBasicBlock* copy_succ = GetBlockCopy(orig_succ);
+  copy_block->AddSuccessor(copy_succ);
+
+  size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+    copy_phi->AddInput(orig_phi_input);
+  }
+}
+
+void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block,
+                                             HBasicBlock* orig_succ) {
+  DCHECK(IsInOrigBBSet(orig_succ));
+  HBasicBlock* copy_block = GetBlockCopy(orig_block);
+  copy_block->AddSuccessor(orig_succ);
+  DCHECK(copy_block->HasSuccessor(orig_succ));
+
+  size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block);
+  for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) {
+    HPhi* orig_phi = it.Current()->AsPhi();
+    HInstruction* orig_phi_input = orig_phi->InputAt(orig_index);
+    orig_phi->AddInput(orig_phi_input);
+  }
+}
+
+//
+// Local versions of CF calculation/adjustment routines.
+//
+
+// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect
+// the performance of the base version by checking the local set.
+// TODO: this version works when updating the back edges info for natural loop-based local_set.
+// Check which exactly types of subgraphs can be analysed or rename it to
+// FindBackEdgesInTheNaturalLoop.
+void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) {
+  ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks.
+  DCHECK_EQ(visited.GetHighestBitSet(), -1);
+
+  // Nodes that we're currently visiting, indexed by block id.
+  ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder);
+  // Number of successors visited from a given node, indexed by block id.
+  ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(),
+                                         0u,
+                                         arena_->Adapter(kArenaAllocGraphBuilder));
+  // Stack of nodes that we're currently visiting (same as marked in "visiting" above).
+  ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder));
+  constexpr size_t kDefaultWorklistSize = 8;
+  worklist.reserve(kDefaultWorklistSize);
+
+  visited.SetBit(entry_block->GetBlockId());
+  visiting.SetBit(entry_block->GetBlockId());
+  worklist.push_back(entry_block);
+
+  while (!worklist.empty()) {
+    HBasicBlock* current = worklist.back();
+    uint32_t current_id = current->GetBlockId();
+    if (successors_visited[current_id] == current->GetSuccessors().size()) {
+      visiting.ClearBit(current_id);
+      worklist.pop_back();
+    } else {
+      HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++];
+      uint32_t successor_id = successor->GetBlockId();
+      if (!local_set->IsBitSet(successor_id)) {
+        continue;
+      }
+
+      if (visiting.IsBitSet(successor_id)) {
+        DCHECK(ContainsElement(worklist, successor));
+        successor->AddBackEdgeWhileUpdating(current);
+      } else if (!visited.IsBitSet(successor_id)) {
+        visited.SetBit(successor_id);
+        visiting.SetBit(successor_id);
+        worklist.push_back(successor);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
+  // TODO: DCHECK that after the transformation the graph is connected.
+  HBasicBlock* block_entry = nullptr;
+
+  if (outer_loop_ == nullptr) {
+    for (auto block : graph_->GetBlocks()) {
+      if (block != nullptr) {
+        outer_loop_bb_set->SetBit(block->GetBlockId());
+        HLoopInformation* info = block->GetLoopInformation();
+        if (info != nullptr) {
+          info->ResetBasicBlockData();
+        }
+      }
+    }
+    block_entry = graph_->GetEntryBlock();
+  } else {
+    outer_loop_bb_set->Copy(&outer_loop_bb_set_);
+    block_entry = outer_loop_->GetHeader();
+
+    // Add newly created copy blocks.
+    for (auto entry : *bb_map_) {
+      outer_loop_bb_set->SetBit(entry.second->GetBlockId());
+    }
+
+    // Clear loop_info for the whole outer loop.
+    for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+      HBasicBlock* block = GetBlockById(idx);
+      HLoopInformation* info = block->GetLoopInformation();
+      if (info != nullptr) {
+        info->ResetBasicBlockData();
+      }
+    }
+  }
+
+  FindBackEdgesLocal(block_entry, outer_loop_bb_set);
+
+  for (uint32_t idx : outer_loop_bb_set->Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+    HLoopInformation* info = block->GetLoopInformation();
+    // Reset LoopInformation for regular blocks and old headers which are no longer loop headers.
+    if (info != nullptr &&
+        (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) {
+      block->SetLoopInformation(nullptr);
+    }
+  }
+}
+
+// This is a modified version of HGraph::AnalyzeLoops.
+GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) {
+  // We iterate post order to ensure we visit inner loops before outer loops.
+  // `PopulateRecursive` needs this guarantee to know whether a natural loop
+  // contains an irreducible loop.
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+      continue;
+    }
+    if (block->IsLoopHeader()) {
+      if (block->IsCatchBlock()) {
+        // TODO: Dealing with exceptional back edges could be tricky because
+        //       they only approximate the real control flow. Bail out for now.
+        return kAnalysisFailThrowCatchLoop;
+      }
+      block->GetLoopInformation()->Populate();
+    }
+  }
+
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) {
+      continue;
+    }
+    if (block->IsLoopHeader()) {
+      HLoopInformation* cur_loop = block->GetLoopInformation();
+      HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation();
+      if (outer_loop != nullptr) {
+        outer_loop->PopulateInnerLoopUpwards(cur_loop);
+      }
+    }
+  }
+
+  return kAnalysisSuccess;
+}
+
+void SuperblockCloner::CleanUpControlFlow() {
+  // TODO: full control flow clean up for now, optimize it.
+  graph_->ClearDominanceInformation();
+
+  ArenaBitVector outer_loop_bb_set(
+      arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+  // TODO: do it locally.
+  graph_->SimplifyCFG();
+  graph_->ComputeDominanceInformation();
+
+  // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just
+  // before in ComputeDominanceInformation.
+  GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set);
+  DCHECK_EQ(result, kAnalysisSuccess);
+
+  // TODO: do it locally
+  OrderLoopsHeadersPredecessors(graph_);
+
+  graph_->ComputeTryBlockInformation();
+}
+
+//
+// Helpers for ResolveDataFlow
+//
+
+void SuperblockCloner::ResolvePhi(HPhi* phi) {
+  HBasicBlock* phi_block = phi->GetBlock();
+  for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+    HInstruction* input = phi->InputAt(i);
+    HBasicBlock* input_block = input->GetBlock();
+
+    // Originally defined outside the region.
+    if (!IsInOrigBBSet(input_block)) {
+      continue;
+    }
+    HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i];
+    if (!IsInOrigBBSet(corresponding_block)) {
+      phi->ReplaceInput(GetInstrCopy(input), i);
+    }
+  }
+}
+
+//
+// Main algorithm methods.
+//
+
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+  DCHECK(exits->empty());
+  for (uint32_t block_id : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(block_id);
+    for (HBasicBlock* succ : block->GetSuccessors()) {
+      if (!IsInOrigBBSet(succ)) {
+        exits->push_back(succ);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
+  DCHECK(outer_loop_ == nullptr);
+  ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+  SearchForSubgraphExits(&exits);
+
+  // For a reducible graph we need to update back-edges and dominance information only for
+  // the outermost loop which is affected by the transformation - it can be found by picking
+  // the common most outer loop of loops to which the subgraph exits blocks belong.
+  // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case).
+  for (HBasicBlock* exit : exits) {
+    HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation();
+    if (loop_exit_loop_info == nullptr) {
+      outer_loop_ = nullptr;
+      break;
+    }
+    outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
+  }
+
+  if (outer_loop_ != nullptr) {
+    // Save the loop population info as it will be changed later.
+    outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks());
+  }
+}
+
+void SuperblockCloner::RemapEdgesSuccessors() {
+  // Redirect incoming edges.
+  for (HEdge e : *remap_incoming_) {
+    HBasicBlock* orig_block = GetBlockById(e.GetFrom());
+    HBasicBlock* orig_succ = GetBlockById(e.GetTo());
+    RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+  }
+
+  // Redirect internal edges.
+  for (uint32_t orig_block_id : orig_bb_set_.Indexes()) {
+    HBasicBlock* orig_block = GetBlockById(orig_block_id);
+
+    for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) {
+      uint32_t orig_succ_id = orig_succ->GetBlockId();
+
+      // Check for outgoing edge.
+      if (!IsInOrigBBSet(orig_succ)) {
+        HBasicBlock* copy_block = GetBlockCopy(orig_block);
+        copy_block->AddSuccessor(orig_succ);
+        continue;
+      }
+
+      auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+      auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+
+      // Due to construction all successors of copied block were set to original.
+      if (copy_redir != remap_copy_internal_->end()) {
+        RemapCopyInternalEdge(orig_block, orig_succ);
+      } else {
+        AddCopyInternalEdge(orig_block, orig_succ);
+      }
+
+      if (orig_redir != remap_orig_internal_->end()) {
+        RemapOrigInternalOrIncomingEdge(orig_block, orig_succ);
+      }
+    }
+  }
+}
+
+void SuperblockCloner::AdjustControlFlowInfo() {
+  ArenaBitVector outer_loop_bb_set(
+      arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  RecalculateBackEdgesInfo(&outer_loop_bb_set);
+
+  graph_->ClearDominanceInformation();
+  // TODO: Do it locally.
+  graph_->ComputeDominanceInformation();
+}
+
+// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to
+// the valid values; only phis' inputs must be adjusted.
+void SuperblockCloner::ResolveDataFlow() {
+  for (auto entry : *bb_map_) {
+    HBasicBlock* orig_block = entry.first;
+
+    for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+      HPhi* orig_phi = it.Current()->AsPhi();
+      HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi();
+      ResolvePhi(orig_phi);
+      ResolvePhi(copy_phi);
+    }
+    if (kIsDebugBuild) {
+      // Inputs of instruction copies must be already mapped to correspondent inputs copies.
+      for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+        CheckInstructionInputsRemapping(it.Current());
+      }
+    }
+  }
+}
+
+//
+// Debug and logging methods.
+//
+
+void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
+  DCHECK(!orig_instr->IsPhi());
+  HInstruction* copy_instr = GetInstrCopy(orig_instr);
+  for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+    HInstruction* orig_input = orig_instr->InputAt(i);
+    DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock()));
+
+    // If original input is defined outside the region then it will remain for both original
+    // instruction and the copy after the transformation.
+    if (!IsInOrigBBSet(orig_input->GetBlock())) {
+      continue;
+    }
+    HInstruction* copy_input = GetInstrCopy(orig_input);
+    DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+  }
+
+  // Resolve environment.
+  if (orig_instr->HasEnvironment()) {
+    HEnvironment* orig_env = orig_instr->GetEnvironment();
+
+    for (size_t i = 0, e = orig_env->Size(); i < e; ++i) {
+      HInstruction* orig_input = orig_env->GetInstructionAt(i);
+
+      // If original input is defined outside the region then it will remain for both original
+      // instruction and the copy after the transformation.
+      if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) {
+        continue;
+      }
+
+      HInstruction* copy_input = GetInstrCopy(orig_input);
+      DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock()));
+    }
+  }
+}
+
+//
+// Public methods.
+//
+
+SuperblockCloner::SuperblockCloner(HGraph* graph,
+                                   const HBasicBlockSet* orig_bb_set,
+                                   HBasicBlockMap* bb_map,
+                                   HInstructionMap* hir_map)
+  : graph_(graph),
+    arena_(graph->GetAllocator()),
+    orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+    remap_orig_internal_(nullptr),
+    remap_copy_internal_(nullptr),
+    remap_incoming_(nullptr),
+    bb_map_(bb_map),
+    hir_map_(hir_map),
+    outer_loop_(nullptr),
+    outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+  orig_bb_set_.Copy(orig_bb_set);
+}
+
+void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+                                                 const HEdgeSet* remap_copy_internal,
+                                                 const HEdgeSet* remap_incoming) {
+  remap_orig_internal_ = remap_orig_internal;
+  remap_copy_internal_ = remap_copy_internal;
+  remap_incoming_ = remap_incoming;
+}
+
+bool SuperblockCloner::IsSubgraphClonable() const {
+  // TODO: Support irreducible graphs and graphs with try-catch.
+  if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) {
+    return false;
+  }
+
+  // Check that there are no instructions defined in the subgraph and used outside.
+  // TODO: Improve this by accepting graph with such uses but only one exit.
+  for (uint32_t idx : orig_bb_set_.Indexes()) {
+    HBasicBlock* block = GetBlockById(idx);
+
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (!instr->IsClonable() ||
+          IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        return false;
+      }
+    }
+
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+      HInstruction* instr = it.Current();
+      if (!instr->IsClonable() ||
+          IsUsedOutsideRegion(instr, orig_bb_set_)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void SuperblockCloner::Run() {
+  DCHECK(bb_map_ != nullptr);
+  DCHECK(hir_map_ != nullptr);
+  DCHECK(remap_orig_internal_ != nullptr &&
+         remap_copy_internal_ != nullptr &&
+         remap_incoming_ != nullptr);
+  DCHECK(IsSubgraphClonable());
+
+  // Find an area in the graph for which control flow information should be adjusted.
+  FindAndSetLocalAreaForAdjustments();
+  // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
+  // adjusted.
+  CloneBasicBlocks();
+  // Connect the blocks together/remap successors and fix phis which are directly affected my the
+  // remapping.
+  RemapEdgesSuccessors();
+  // Recalculate dominance and backedge information which is required by the next stage.
+  AdjustControlFlowInfo();
+  // Fix data flow of the graph.
+  ResolveDataFlow();
+}
+
+void SuperblockCloner::CleanUp() {
+  CleanUpControlFlow();
+
+  // Remove phis which have all inputs being same.
+  // When a block has a single predecessor it must not have any phis. However after the
+  // transformation it could happen that there is such block with a phi with a single input.
+  // As this is needed to be processed we also simplify phis with multiple same inputs here.
+  for (auto entry : *bb_map_) {
+    HBasicBlock* orig_block = entry.first;
+    for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      HPhi* phi = inst_it.Current()->AsPhi();
+      if (ArePhiInputsTheSame(phi)) {
+        phi->ReplaceWith(phi->InputAt(0));
+        orig_block->RemovePhi(phi);
+      }
+    }
+
+    HBasicBlock* copy_block = GetBlockCopy(orig_block);
+    for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
+      HPhi* phi = inst_it.Current()->AsPhi();
+      if (ArePhiInputsTheSame(phi)) {
+        phi->ReplaceWith(phi->InputAt(0));
+        copy_block->RemovePhi(phi);
+      }
+    }
+  }
+}
+
+HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
+  HGraph* graph = orig_block->GetGraph();
+  HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc());
+  graph->AddBlock(copy_block);
+
+  // Clone all the phis and add them to the map.
+  for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) {
+    HInstruction* orig_instr = it.Current();
+    HInstruction* copy_instr = orig_instr->Clone(arena_);
+    copy_block->AddPhi(copy_instr->AsPhi());
+    copy_instr->AsPhi()->RemoveAllInputs();
+    DCHECK(!orig_instr->HasEnvironment());
+    hir_map_->Put(orig_instr, copy_instr);
+  }
+
+  // Clone all the instructions and add them to the map.
+  for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* orig_instr = it.Current();
+    HInstruction* copy_instr = orig_instr->Clone(arena_);
+    ReplaceInputsWithCopies(copy_instr);
+    copy_block->AddInstruction(copy_instr);
+    if (orig_instr->HasEnvironment()) {
+      DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment());
+    }
+    hir_map_->Put(orig_instr, copy_instr);
+  }
+
+  return copy_block;
+}
+
+void SuperblockCloner::CloneBasicBlocks() {
+  // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied
+  // instructions might be replaced by copies of the original inputs (depending where those inputs
+  // are defined). So the definitions of the original inputs must be visited before their original
+  // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')"
+  // guarantees that.
+  for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) {
+    if (!IsInOrigBBSet(orig_block)) {
+      continue;
+    }
+    HBasicBlock* copy_block = CloneBasicBlock(orig_block);
+    bb_map_->Put(orig_block, copy_block);
+    if (kSuperblockClonerLogging) {
+      std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() <<
+                   std::endl;
+    }
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
new file mode 100644
index 0000000..23de692
--- /dev/null
+++ b/compiler/optimizing/superblock_cloner.h
@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
+
+#include "base/arena_bit_vector.h"
+#include "base/arena_containers.h"
+#include "base/bit_vector-inl.h"
+#include "nodes.h"
+
+namespace art {
+
+static const bool kSuperblockClonerLogging = false;
+static const bool kSuperblockClonerVerify = false;
+
+// Represents an edge between two HBasicBlocks.
+//
+// Note: objects of this class are small - pass them by value.
+class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
+ public:
+  HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
+    DCHECK_NE(to_, kInvalidBlockId);
+    DCHECK_NE(from_, kInvalidBlockId);
+  }
+  HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
+    DCHECK_NE(to_, kInvalidBlockId);
+    DCHECK_NE(from_, kInvalidBlockId);
+  }
+  HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
+
+  uint32_t GetFrom() const { return from_; }
+  uint32_t GetTo() const { return to_; }
+
+  bool operator==(const HEdge& other) const {
+    return this->from_ == other.from_ && this->to_ == other.to_;
+  }
+
+  bool operator!=(const HEdge& other) const { return !operator==(other); }
+  void Dump(std::ostream& stream) const;
+
+  // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
+  // has to_ block as a successor.
+  bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
+
+ private:
+  // Predecessor block id.
+  uint32_t from_;
+  // Successor block id.
+  uint32_t to_;
+};
+
+// Returns whether a HEdge edge corresponds to an existing edge in the graph.
+inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
+  if (!edge.IsValid()) {
+    return false;
+  }
+  uint32_t from = edge.GetFrom();
+  uint32_t to = edge.GetTo();
+  if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
+    return false;
+  }
+
+  HBasicBlock* block_from = graph->GetBlocks()[from];
+  HBasicBlock* block_to = graph->GetBlocks()[to];
+  if (block_from == nullptr || block_to == nullptr) {
+    return false;
+  }
+
+  return block_from->HasSuccessor(block_to, 0);
+}
+
+// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
+// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
+// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
+// and a set of rules how to treat edges, remap their successors. By using this approach such
+// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
+//
+// The idea of the transformation is based on "Superblock cloning" technique described in the book
+// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
+// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
+// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
+// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
+//
+// There are two states of the IR graph: original graph (before the transformation) and
+// copy graph (after).
+//
+// Before the transformation:
+// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
+// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
+// where pred, succ - basic blocks):
+//  - internal - pred, succ are members of ‘orig_bb_set’.
+//  - outside  - pred, succ are not members of ‘orig_bb_set’.
+//  - incoming - pred is not a member of ‘orig_bb_set’, succ is.
+//  - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
+//
+// Transformation:
+//
+// 1. Initial cloning:
+//   1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
+//        form ‘copy_bb_set’.
+//   1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
+//        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
+//        set.
+//   1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
+//        copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
+//        set.
+// 2. Successors remapping.
+//   2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
+//        be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
+//   2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
+//        should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
+//   2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
+//        whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
+//        (X, Y_1)).
+// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
+// 4. Fix/resolve data flow.
+// 5. Do cleanups (DCE, critical edges splitting, etc).
+//
+class SuperblockCloner : public ValueObject {
+ public:
+  // TODO: Investigate optimal types for the containers.
+  using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
+  using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
+  using HBasicBlockSet = ArenaBitVector;
+  using HEdgeSet = ArenaHashSet<HEdge>;
+
+  SuperblockCloner(HGraph* graph,
+                   const HBasicBlockSet* orig_bb_set,
+                   HBasicBlockMap* bb_map,
+                   HInstructionMap* hir_map);
+
+  // Sets edge successor remapping info specified by corresponding edge sets.
+  void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
+                                 const HEdgeSet* remap_copy_internal,
+                                 const HEdgeSet* remap_incoming);
+
+  // Returns whether the specified subgraph is copyable.
+  // TODO: Start from small range of graph patterns then extend it.
+  bool IsSubgraphClonable() const;
+
+  // Runs the copy algorithm according to the description.
+  void Run();
+
+  // Cleans up the graph after transformation: splits critical edges, recalculates control flow
+  // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
+  void CleanUp();
+
+  // Returns a clone of a basic block (orig_block).
+  //
+  //  - The copy block will have no successors/predecessors; they should be set up manually.
+  //  - For each instruction in the orig_block a copy is created and inserted into the copy block;
+  //    this correspondence is recorded in the map (old instruction, new instruction).
+  //  - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
+  //    same, as in the original block, PHIs do not reflect a correct correspondence between the
+  //    value and predecessors (as the copy block has no predecessors by now), etc.
+  HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
+
+  // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
+  // and hir_map_.
+  void CloneBasicBlocks();
+
+  HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
+    auto copy_input_iter = hir_map_->find(orig_instr);
+    DCHECK(copy_input_iter != hir_map_->end());
+    return copy_input_iter->second;
+  }
+
+  HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
+    HBasicBlock* block = bb_map_->Get(orig_block);
+    DCHECK(block != nullptr);
+    return block;
+  }
+
+  HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
+    for (auto it : *hir_map_) {
+      if (it.second == copy_instr) {
+        return it.first;
+      }
+    }
+    return nullptr;
+  }
+
+  bool IsInOrigBBSet(uint32_t block_id) const {
+    return orig_bb_set_.IsBitSet(block_id);
+  }
+
+  bool IsInOrigBBSet(const HBasicBlock* block) const {
+    return IsInOrigBBSet(block->GetBlockId());
+  }
+
+ private:
+  // Fills the 'exits' vector with the subgraph exits.
+  void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+
+  // Finds and records information about the area in the graph for which control-flow (back edges,
+  // loops, dominators) needs to be adjusted.
+  void FindAndSetLocalAreaForAdjustments();
+
+  // Remaps edges' successors according to the info specified in the edges sets.
+  //
+  // Only edge successors/predecessors and phis' input records (to have a correspondence between
+  // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
+  // phis' nor instructions' inputs values are resolved.
+  void RemapEdgesSuccessors();
+
+  // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+  // FindAndSetLocalAreaForAdjustments.
+  void AdjustControlFlowInfo();
+
+  // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
+  // the SSA form.
+  void ResolveDataFlow();
+
+  //
+  // Helpers for CloneBasicBlock.
+  //
+
+  // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
+  // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
+  void ReplaceInputsWithCopies(HInstruction* copy_instr);
+
+  // Recursively clones the environment for the copy instruction. If the input of the original
+  // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
+  // leaves it the same as original.
+  void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
+
+  //
+  // Helpers for RemapEdgesSuccessors.
+  //
+
+  // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
+  // copy_succ.
+  void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
+  void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
+  void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
+
+  //
+  // Local versions of control flow calculation/adjustment routines.
+  //
+
+  void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
+  void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
+  GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
+  void CleanUpControlFlow();
+
+  //
+  // Helpers for ResolveDataFlow
+  //
+
+  // Resolves the inputs of the phi.
+  void ResolvePhi(HPhi* phi);
+
+  //
+  // Debug and logging methods.
+  //
+  void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+
+  HBasicBlock* GetBlockById(uint32_t block_id) const {
+    DCHECK(block_id < graph_->GetBlocks().size());
+    HBasicBlock* block = graph_->GetBlocks()[block_id];
+    DCHECK(block != nullptr);
+    return block;
+  }
+
+  HGraph* const graph_;
+  ArenaAllocator* const arena_;
+
+  // Set of basic block in the original graph to be copied.
+  HBasicBlockSet orig_bb_set_;
+
+  // Sets of edges which require successors remapping.
+  const HEdgeSet* remap_orig_internal_;
+  const HEdgeSet* remap_copy_internal_;
+  const HEdgeSet* remap_incoming_;
+
+  // Correspondence map for blocks: (original block, copy block).
+  HBasicBlockMap* bb_map_;
+  // Correspondence map for instructions: (original HInstruction, copy HInstruction).
+  HInstructionMap* hir_map_;
+  // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+  HLoopInformation* outer_loop_;
+  HBasicBlockSet outer_loop_bb_set_;
+
+  ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+
+  DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
+};
+
+}  // namespace art
+
+namespace std {
+
+template <>
+struct hash<art::HEdge> {
+  size_t operator()(art::HEdge const& x) const noexcept  {
+    // Use Cantor pairing function as the hash function.
+    uint32_t a = x.GetFrom();
+    uint32_t b = x.GetTo();
+    return (a + b) * (a + b + 1) / 2 + b;
+  }
+};
+
+}  // namespace std
+
+#endif  // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index fd77eb8..f1b7bff 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -17,11 +17,15 @@
 #include "graph_checker.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
+#include "superblock_cloner.h"
 
 #include "gtest/gtest.h"
 
 namespace art {
 
+using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
+using HInstructionMap = SuperblockCloner::HInstructionMap;
+
 // This class provides methods and helpers for testing various cloning and copying routines:
 // individual instruction cloning and cloning of the more coarse-grain structures.
 class SuperblockClonerTest : public OptimizingUnitTest {
@@ -182,4 +186,121 @@
   EXPECT_NE(new_suspend_check, nullptr);
 }
 
+// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of
+// instructions' inputs.
+TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  CreateBasicLoopControlFlow(&header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  ASSERT_TRUE(CheckGraph());
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+  HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+  HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          &bb_map,
+                          &hir_map);
+  EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+  cloner.CloneBasicBlocks();
+
+  EXPECT_EQ(bb_map.size(), 2u);
+  EXPECT_EQ(hir_map.size(), 12u);
+
+  for (auto it : hir_map) {
+    HInstruction* orig_instr = it.first;
+    HInstruction* copy_instr = it.second;
+
+    EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock());
+    EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind());
+    EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType());
+
+    if (orig_instr->IsPhi()) {
+      continue;
+    }
+
+    EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount());
+
+    // Check that inputs match.
+    for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) {
+      HInstruction* orig_input = orig_instr->InputAt(i);
+      HInstruction* copy_input = copy_instr->InputAt(i);
+      if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+        EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+      } else {
+        EXPECT_EQ(orig_input, copy_input);
+      }
+    }
+
+    EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment());
+
+    // Check that environments match.
+    if (orig_instr->HasEnvironment()) {
+      HEnvironment* orig_env = orig_instr->GetEnvironment();
+      HEnvironment* copy_env = copy_instr->GetEnvironment();
+
+      EXPECT_EQ(copy_env->GetParent(), nullptr);
+      EXPECT_EQ(orig_env->Size(), copy_env->Size());
+
+      for (size_t i = 0, e = orig_env->Size(); i < e; i++) {
+        HInstruction* orig_input = orig_env->GetInstructionAt(i);
+        HInstruction* copy_input = copy_env->GetInstructionAt(i);
+        if (cloner.IsInOrigBBSet(orig_input->GetBlock())) {
+          EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input);
+        } else {
+          EXPECT_EQ(orig_input, copy_input);
+        }
+      }
+    }
+  }
+}
+
+// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control
+// flow.
+TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
+  HBasicBlock* header = nullptr;
+  HBasicBlock* loop_body = nullptr;
+  ArenaAllocator* arena = graph_->GetAllocator();
+
+  CreateBasicLoopControlFlow(&header, &loop_body);
+  CreateBasicLoopDataFlow(header, loop_body);
+  graph_->BuildDominatorTree();
+  ASSERT_TRUE(CheckGraph());
+
+  ArenaBitVector orig_bb_set(
+      arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+
+  HLoopInformation* loop_info = header->GetLoopInformation();
+  orig_bb_set.Union(&loop_info->GetBlocks());
+
+  SuperblockCloner cloner(graph_,
+                          &orig_bb_set,
+                          nullptr,
+                          nullptr);
+  EXPECT_TRUE(cloner.IsSubgraphClonable());
+
+  cloner.FindAndSetLocalAreaForAdjustments();
+  cloner.CleanUpControlFlow();
+
+  EXPECT_TRUE(CheckGraph());
+
+  EXPECT_TRUE(entry_block_->Dominates(header));
+  EXPECT_TRUE(entry_block_->Dominates(exit_block_));
+
+  EXPECT_EQ(header->GetLoopInformation(), loop_info);
+  EXPECT_EQ(loop_info->GetHeader(), header);
+  EXPECT_TRUE(loop_info->Contains(*loop_body));
+  EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
+}
+
 }  // namespace art