Implement polymorphic inlining.

For example, before:
HInvokeVirtual

After:
If (receiver == Foo) {
  // inlined code.
} else if (receiver == Bar) {
  // inlined code
} else {
  // HInvokeVirtual or HDeoptimize(receiver != Baz)
}

Change-Id: I5ce305aef8f39f8294bf2b2bcfe60e0dddcfdbec
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a5acab8..498739c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -350,32 +350,15 @@
   }
 
   // We successfully inlined, now add a guard.
-  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
-      class_linker, receiver, invoke_instruction->GetDexPc());
-
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
-  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
-                                                               class_index,
-                                                               caller_dex_file,
-                                                               is_referrer,
-                                                               invoke_instruction->GetDexPc(),
-                                                               /* needs_access_check */ false,
-                                                               /* is_in_dex_cache */ true);
-
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
-  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-      compare, invoke_instruction->GetDexPc());
-  // TODO: Extend reference type propagation to understand the guard.
-  if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
-  } else {
-    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
-  }
-  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
-  bb_cursor->InsertInstructionAfter(compare, load_class);
-  bb_cursor->InsertInstructionAfter(deoptimize, compare);
-  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  AddTypeGuard(receiver,
+               cursor,
+               bb_cursor,
+               class_index,
+               is_referrer,
+               invoke_instruction,
+               /* with_deoptimization */ true);
 
   // Run type propagation to get the guard typed, and eventually propagate the
   // type of the receiver.
@@ -386,11 +369,218 @@
   return true;
 }
 
+HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
+                                     HInstruction* cursor,
+                                     HBasicBlock* bb_cursor,
+                                     uint32_t class_index,
+                                     bool is_referrer,
+                                     HInstruction* invoke_instruction,
+                                     bool with_deoptimization) {
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+  // Note that we will just compare the classes, so we don't need Java semantics access checks.
+  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.
+  HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),
+                                                               class_index,
+                                                               caller_dex_file,
+                                                               is_referrer,
+                                                               invoke_instruction->GetDexPc(),
+                                                               /* needs_access_check */ false,
+                                                               /* is_in_dex_cache */ true);
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, load_class);
+  if (with_deoptimization) {
+    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+        compare, invoke_instruction->GetDexPc());
+    bb_cursor->InsertInstructionAfter(deoptimize, compare);
+    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
+  return compare;
+}
+
 bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
   DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
       << invoke_instruction->DebugName();
+
+  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) {
+    return true;
+  }
+
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
+
+  bool all_targets_inlined = true;
+  bool one_target_inlined = false;
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+
+    HInstruction* receiver = invoke_instruction->InputAt(0);
+    HInstruction* cursor = invoke_instruction->GetPrevious();
+    HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+    uint32_t class_index = FindClassIndexIn(ic.GetTypeAt(i), caller_dex_file);
+    HInstruction* return_replacement = nullptr;
+    if (class_index == DexFile::kDexNoIndex ||
+        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
+      all_targets_inlined = false;
+    } else {
+      one_target_inlined = true;
+      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass());
+
+      // If we have inlined all targets before, and this receiver is the last seen,
+      // we deoptimize instead of keeping the original invoke instruction.
+      bool deoptimize = all_targets_inlined &&
+          (i != InlineCache::kIndividualCacheSize - 1) &&
+          (ic.GetTypeAt(i + 1) == nullptr);
+      HInstruction* compare = AddTypeGuard(
+          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize);
+      if (deoptimize) {
+        if (return_replacement != nullptr) {
+          invoke_instruction->ReplaceWith(return_replacement);
+        }
+        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+        // Because the inline cache data can be populated concurrently, we force the end of the
+        // iteration. Otherhwise, we could see a new receiver type.
+        break;
+      } else {
+        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
+      }
+    }
+  }
+
+  if (!one_target_inlined) {
+    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+                   << " from inline cache is not inlined because none"
+                   << " of its targets could be inlined";
+    return false;
+  }
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  // Run type propagation to get the guards typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false);
+  rtp_fixup.Run();
+  return true;
+}
+
+void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
+                                                        HInstruction* return_replacement,
+                                                        HInstruction* invoke_instruction) {
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HBasicBlock* cursor_block = compare->GetBlock();
+  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock();
+  ArenaAllocator* allocator = graph_->GetArena();
+
+  // Spit the block after the compare: `cursor_block` will now be the start of the diamond,
+  // and the returned block is the start of the then branch (that could contain multiple blocks).
+  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare);
+
+  // Split the block containing the invoke before and after the invoke. The returned block
+  // of the split before will contain the invoke and will be the otherwise branch of
+  // the diamond. The returned block of the split after will be the merge block
+  // of the diamond.
+  HBasicBlock* end_then = invoke_instruction->GetBlock();
+  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction);
+  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction);
+
+  // If the methods we are inlining return a value, we create a phi in the merge block
+  // that will have the `invoke_instruction and the `return_replacement` as inputs.
+  if (return_replacement != nullptr) {
+    HPhi* phi = new (allocator) HPhi(
+        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc);
+    merge->AddPhi(phi);
+    invoke_instruction->ReplaceWith(phi);
+    phi->AddInput(return_replacement);
+    phi->AddInput(invoke_instruction);
+  }
+
+  // Add the control flow instructions.
+  otherwise->AddInstruction(new (allocator) HGoto(dex_pc));
+  end_then->AddInstruction(new (allocator) HGoto(dex_pc));
+  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc));
+
+  // Add the newly created blocks to the graph.
+  graph_->AddBlock(then);
+  graph_->AddBlock(otherwise);
+  graph_->AddBlock(merge);
+
+  // Set up successor (and implictly predecessor) relations.
+  cursor_block->AddSuccessor(otherwise);
+  cursor_block->AddSuccessor(then);
+  end_then->AddSuccessor(merge);
+  otherwise->AddSuccessor(merge);
+
+  // Set up dominance information.
+  then->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(then);
+  otherwise->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(otherwise);
+  merge->SetDominator(cursor_block);
+  cursor_block->AddDominatedBlock(merge);
+
+  // Update the revert post order.
+  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block);
+  MakeRoomFor(&graph_->reverse_post_order_, 1, index);
+  graph_->reverse_post_order_[++index] = then;
+  index = IndexOfElement(graph_->reverse_post_order_, end_then);
+  MakeRoomFor(&graph_->reverse_post_order_, 2, index);
+  graph_->reverse_post_order_[++index] = otherwise;
+  graph_->reverse_post_order_[++index] = merge;
+
+  // Set the loop information of the newly created blocks.
+  HLoopInformation* loop_info = cursor_block->GetLoopInformation();
+  if (loop_info != nullptr) {
+    then->SetLoopInformation(cursor_block->GetLoopInformation());
+    merge->SetLoopInformation(cursor_block->GetLoopInformation());
+    otherwise->SetLoopInformation(cursor_block->GetLoopInformation());
+    for (HLoopInformationOutwardIterator loop_it(*cursor_block);
+         !loop_it.Done();
+         loop_it.Advance()) {
+      loop_it.Current()->Add(then);
+      loop_it.Current()->Add(merge);
+      loop_it.Current()->Add(otherwise);
+    }
+    // In case the original invoke location was a back edge, we need to update
+    // the loop to now have the merge block as a back edge.
+    if (loop_info->IsBackEdge(*original_invoke_block)) {
+      loop_info->RemoveBackEdge(original_invoke_block);
+      loop_info->AddBackEdge(merge);
+    }
+  }
+
+  // Set the try/catch information of the newly created blocks.
+  then->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+  otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation());
+}
+
+bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction,
+                                                    ArtMethod* resolved_method,
+                                                    const InlineCache& ic) {
   // This optimization only works under JIT for now.
   DCHECK(Runtime::Current()->UseJit());
   if (graph_->GetInstructionSet() == kMips64) {