Implement first kind of polymorphic inlining.

Add HClassTableGet to fetch an ArtMethod from the vtable or imt,
and compare it to the only method the profiling saw.

Change-Id: I76afd3689178f10e3be048aa3ac9a97c6f63295d
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 5f00f0a..1b51508 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -6599,6 +6599,29 @@
   }
 }
 
+void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArmPointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
+  }
+  __ LoadFromOffset(kLoadWord,
+                    locations->Out().AsRegister<Register>(),
+                    locations->InAt(0).AsRegister<Register>(),
+                    method_offset);
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c0e3959..a8d463e 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4983,6 +4983,29 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kArm64PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
+  }
+  __ Ldr(XRegisterFrom(locations->Out()),
+         MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
+}
+
+
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5bd136a..ebfcfe4 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -5236,6 +5236,14 @@
   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
 }
 
+void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+}
+
+void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips";
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index e3115f4..3aff4ec 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -4267,5 +4267,14 @@
   }
 }
 
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
+  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+}
+
 }  // namespace mips64
 }  // namespace art
+
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 50c4ba2..d76c669 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3957,6 +3957,27 @@
 void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
 }
 
+void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
+  }
+  __ movl(locations->Out().AsRegister<Register>(),
+          Address(locations->InAt(0).AsRegister<Register>(), method_offset));
+}
+
 void LocationsBuilderX86::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index fd18834..84f56f1 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3959,6 +3959,27 @@
   // Nothing to do, the method is already at its location.
 }
 
+void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t method_offset = 0;
+  if (instruction->GetTableKind() == HClassTableGet::kVTable) {
+    method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kX86_64PointerSize).SizeValue();
+  } else {
+    method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+        instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
+  }
+  __ movq(locations->Out().AsRegister<CpuRegister>(),
+          Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
+}
+
 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 2e79df1..35109fa 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -296,9 +296,29 @@
   return false;
 }
 
+HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
+                                                   HInstruction* receiver,
+                                                   uint32_t dex_pc) const {
+  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
+  return new (graph_->GetArena()) HInstanceFieldGet(
+      receiver,
+      Primitive::kPrimNot,
+      field->GetOffset(),
+      field->IsVolatile(),
+      field->GetDexFieldIndex(),
+      field->GetDeclaringClass()->GetDexClassDefIndex(),
+      *field->GetDexFile(),
+      handles_->NewHandle(field->GetDexCache()),
+      dex_pc);
+}
+
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
   uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file);
   if (class_index == DexFile::kDexNoIndex) {
@@ -328,18 +348,8 @@
   }
 
   // We successfully inlined, now add a guard.
-  ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
-  DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
-  HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet(
-      receiver,
-      Primitive::kPrimNot,
-      field->GetOffset(),
-      field->IsVolatile(),
-      field->GetDexFieldIndex(),
-      field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *field->GetDexFile(),
-      handles_->NewHandle(field->GetDexCache()),
-      invoke_instruction->GetDexPc());
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
 
   bool is_referrer =
       (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass());
@@ -351,16 +361,16 @@
                                                                /* needs_access_check */ false,
                                                                /* is_in_dex_cache */ true);
 
-  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get);
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
   HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
       compare, invoke_instruction->GetDexPc());
   // TODO: Extend reference type propagation to understand the guard.
   if (cursor != nullptr) {
-    bb_cursor->InsertInstructionAfter(field_get, cursor);
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
   } else {
-    bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction());
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
   }
-  bb_cursor->InsertInstructionAfter(load_class, field_get);
+  bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, load_class);
   bb_cursor->InsertInstructionAfter(deoptimize, compare);
   deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
@@ -374,13 +384,101 @@
   return true;
 }
 
-bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED,
+bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
-                                        const InlineCache& ic ATTRIBUTE_UNUSED) {
-  // TODO
-  VLOG(compiler) << "Unimplemented polymorphic inlining for "
-                 << PrettyMethod(resolved_method);
-  return false;
+                                        const InlineCache& ic) {
+  DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())
+      << invoke_instruction->DebugName();
+  // This optimization only works under JIT for now.
+  DCHECK(Runtime::Current()->UseJit());
+  if (graph_->GetInstructionSet() == kMips || graph_->GetInstructionSet() == kMips64) {
+    // TODO: Support HClassTableGet for mips and mips64.
+    return false;
+  }
+  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  size_t pointer_size = class_linker->GetImagePointerSize();
+
+  DCHECK(resolved_method != nullptr);
+  ArtMethod* actual_method = nullptr;
+  // Check whether we are actually calling the same method among
+  // the different types seen.
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    if (ic.GetTypeAt(i) == nullptr) {
+      break;
+    }
+    ArtMethod* new_method = nullptr;
+    if (invoke_instruction->IsInvokeInterface()) {
+      new_method = ic.GetTypeAt(i)->FindVirtualMethodForInterface(
+          resolved_method, pointer_size);
+    } else {
+      DCHECK(invoke_instruction->IsInvokeVirtual());
+      new_method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual(
+          resolved_method, pointer_size);
+    }
+    if (actual_method == nullptr) {
+      actual_method = new_method;
+    } else if (actual_method != new_method) {
+      // Different methods, bailout.
+      return false;
+    }
+  }
+
+  HInstruction* receiver = invoke_instruction->InputAt(0);
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
+
+  if (!TryInline(invoke_instruction, actual_method, /* do_rtp */ false)) {
+    return false;
+  }
+
+  // We successfully inlined, now add a guard.
+  HInstanceFieldGet* receiver_class = BuildGetReceiverClass(
+      class_linker, receiver, invoke_instruction->GetDexPc());
+
+  size_t method_offset = invoke_instruction->IsInvokeVirtual()
+      ? actual_method->GetVtableIndex()
+      : invoke_instruction->AsInvokeInterface()->GetImtIndex();
+
+  Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet())
+      ? Primitive::kPrimLong
+      : Primitive::kPrimInt;
+  HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet(
+      receiver_class,
+      type,
+      invoke_instruction->IsInvokeVirtual() ? HClassTableGet::kVTable : HClassTableGet::kIMTable,
+      method_offset,
+      invoke_instruction->GetDexPc());
+
+  HConstant* constant;
+  if (type == Primitive::kPrimLong) {
+    constant = graph_->GetLongConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  } else {
+    constant = graph_->GetIntConstant(
+        reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc());
+  }
+
+  HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant);
+  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
+      compare, invoke_instruction->GetDexPc());
+  // TODO: Extend reference type propagation to understand the guard.
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(receiver_class, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(class_table_get, receiver_class);
+  bb_cursor->InsertInstructionAfter(compare, class_table_get);
+  bb_cursor->InsertInstructionAfter(deoptimize, compare);
+  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+
+  // Run type propagation to get the guard typed.
+  ReferenceTypePropagation rtp_fixup(graph_, handles_);
+  rtp_fixup.Run();
+
+  MaybeRecordStat(kInlinedPolymorphicCall);
+
+  return true;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 8de510e..3c01751 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -85,6 +85,11 @@
                          bool same_dex_file,
                          bool do_rtp = true);
 
+  HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
+                                           HInstruction* receiver,
+                                           uint32_t dex_pc) const
+    SHARED_REQUIRES(Locks::mutator_lock_);
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5246fd1..a98ec2f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1161,6 +1161,7 @@
   M(BoundsCheck, Instruction)                                           \
   M(BoundType, Instruction)                                             \
   M(CheckCast, Instruction)                                             \
+  M(ClassTableGet, Instruction)                                         \
   M(ClearException, Instruction)                                        \
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
@@ -2542,6 +2543,44 @@
   DISALLOW_COPY_AND_ASSIGN(HCurrentMethod);
 };
 
+// Fetches an ArtMethod from the virtual table or the interface method table
+// of a class.
+class HClassTableGet : public HExpression<1> {
+ public:
+  enum TableKind {
+    kVTable,
+    kIMTable,
+  };
+  HClassTableGet(HInstruction* cls,
+                 Primitive::Type type,
+                 TableKind kind,
+                 size_t index,
+                 uint32_t dex_pc)
+      : HExpression(type, SideEffects::None(), dex_pc),
+        index_(index),
+        table_kind_(kind) {
+    SetRawInputAt(0, cls);
+  }
+
+  bool CanBeMoved() const OVERRIDE { return true; }
+  bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+    return other->AsClassTableGet()->GetIndex() == index_ &&
+        other->AsClassTableGet()->GetTableKind() == table_kind_;
+  }
+
+  TableKind GetTableKind() const { return table_kind_; }
+  size_t GetIndex() const { return index_; }
+
+  DECLARE_INSTRUCTION(ClassTableGet);
+
+ private:
+  // The index of the ArtMethod in the table.
+  const size_t index_;
+  const TableKind table_kind_;
+
+  DISALLOW_COPY_AND_ASSIGN(HClassTableGet);
+};
+
 // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
 // have one successor for each entry in the switch table, and the final successor
 // will be the block containing the next Dex opcode.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index f8035aa..881beb4 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -51,6 +51,7 @@
   kNotCompiledVerificationError,
   kNotCompiledVerifyAtRuntime,
   kInlinedMonomorphicCall,
+  kInlinedPolymorphicCall,
   kMonomorphicCall,
   kPolymorphicCall,
   kMegamorphicCall,
@@ -118,6 +119,7 @@
       case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
       case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
       case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break;
+      case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break;
       case kMonomorphicCall: name = "MonomorphicCall"; break;
       case kPolymorphicCall: name = "PolymorphicCall"; break;
       case kMegamorphicCall: name = "kMegamorphicCall"; break;