Record profiling information before Jitting.

- Add a new instrumentation kind to record dynamic invokes.
- Use the JNI entry point field to store the profiling data.
- Record seen receivers for every dynamic invoke.

Change-Id: I2c1738ab2a72052d45964d055dc16b44b906e54c
diff --git a/runtime/jit/ b/runtime/jit/
index 26a4fe4..683b2cf 100644
--- a/runtime/jit/
+++ b/runtime/jit/
@@ -39,6 +39,8 @@
   jit_options->compile_threshold_ =
+  jit_options->warmup_threshold_ =
+      options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
   jit_options->dump_info_on_shutdown_ =
   return jit_options;
@@ -160,17 +162,19 @@
-void Jit::CreateInstrumentationCache(size_t compile_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
   CHECK_GT(compile_threshold, 0U);
   Runtime* const runtime = Runtime::Current();
   // Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
   // something.
-  instrumentation_cache_.reset(new jit::JitInstrumentationCache(compile_threshold));
+  instrumentation_cache_.reset(
+      new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
       new jit::JitInstrumentationListener(instrumentation_cache_.get()),
       instrumentation::Instrumentation::kMethodEntered |
-      instrumentation::Instrumentation::kBackwardBranch);
+      instrumentation::Instrumentation::kBackwardBranch |
+      instrumentation::Instrumentation::kInvokeVirtualOrInterface);
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index ca6e7ea..643bc23 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,13 +43,14 @@
 class Jit {
   static constexpr bool kStressMode = kIsDebugBuild;
-  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 1 : 1000;
+  static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
+  static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
   virtual ~Jit();
   static Jit* Create(JitOptions* options, std::string* error_msg);
   bool CompileMethod(ArtMethod* method, Thread* self)
-  void CreateInstrumentationCache(size_t compile_threshold);
+  void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
   void CreateThreadPool();
   CompilerCallbacks* GetCompilerCallbacks() {
     return compiler_callbacks_;
@@ -95,6 +96,9 @@
   size_t GetCompileThreshold() const {
     return compile_threshold_;
+  size_t GetWarmupThreshold() const {
+    return warmup_threshold_;
+  }
   size_t GetCodeCacheCapacity() const {
     return code_cache_capacity_;
@@ -112,6 +116,7 @@
   bool use_jit_;
   size_t code_cache_capacity_;
   size_t compile_threshold_;
+  size_t warmup_threshold_;
   bool dump_info_on_shutdown_;
   JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
diff --git a/runtime/jit/ b/runtime/jit/
index cd5f4cb..4c53162 100644
--- a/runtime/jit/
+++ b/runtime/jit/
@@ -82,9 +82,19 @@
   return code_cache_ptr_ - size;
+uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  size = RoundUp(size, sizeof(void*));
+  if (size > DataCacheRemain()) {
+    return nullptr;
+  }
+  data_cache_ptr_ += size;
+  return data_cache_ptr_ - size;
 uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
   MutexLock mu(self, lock_);
-  const size_t size = end - begin;
+  const size_t size = RoundUp(end - begin, sizeof(void*));
   if (size > DataCacheRemain()) {
     return nullptr;  // Out of space in the data cache.
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 9707f6f..f485e4a 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -86,6 +86,9 @@
   // Reserve a region of code of size at least "size". Returns null if there is no more room.
   uint8_t* ReserveCode(Thread* self, size_t size) REQUIRES(!lock_);
+  // Reserve a region of data of size at least "size". Returns null if there is no more room.
+  uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
   // Add a data array of size (end - begin) with the associated contents, returns null if there
   // is no more room.
   uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
diff --git a/runtime/jit/ b/runtime/jit/
index 258c29d..f485682 100644
--- a/runtime/jit/
+++ b/runtime/jit/
@@ -26,16 +26,12 @@
 class JitCompileTask : public Task {
-  JitCompileTask(ArtMethod* method, JitInstrumentationCache* cache)
-      : method_(method), cache_(cache) {
-  }
+  explicit JitCompileTask(ArtMethod* method) : method_(method) {}
   virtual void Run(Thread* self) OVERRIDE {
     ScopedObjectAccess soa(self);
     VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
-    if (Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
-      cache_->SignalCompiled(self, method_);
-    } else {
+    if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
       VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
@@ -46,13 +42,14 @@
   ArtMethod* const method_;
-  JitInstrumentationCache* const cache_;
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold)
-    : lock_("jit instrumentation lock"), hot_method_threshold_(hot_method_threshold) {
+JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
+                                                 size_t warm_method_threshold)
+    : hot_method_threshold_(hot_method_threshold),
+      warm_method_threshold_(warm_method_threshold) {
 void JitInstrumentationCache::CreateThreadPool() {
@@ -60,20 +57,11 @@
 void JitInstrumentationCache::DeleteThreadPool() {
+  DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
-void JitInstrumentationCache::SignalCompiled(Thread* self, ArtMethod* method) {
-  ScopedObjectAccessUnchecked soa(self);
-  jmethodID method_id = soa.EncodeMethod(method);
-  MutexLock mu(self, lock_);
-  auto it = samples_.find(method_id);
-  if (it != samples_.end()) {
-    samples_.erase(it);
-  }
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t count) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
   ScopedObjectAccessUnchecked soa(self);
   // Since we don't have on-stack replacement, some methods can remain in the interpreter longer
   // than we want resulting in samples even after the method is compiled.
@@ -81,34 +69,21 @@
       Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
-  jmethodID method_id = soa.EncodeMethod(method);
-  bool is_hot = false;
-  {
-    MutexLock mu(self, lock_);
-    size_t sample_count = 0;
-    auto it = samples_.find(method_id);
-    if (it != samples_.end()) {
-      it->second += count;
-      sample_count = it->second;
-    } else {
-      sample_count = count;
-      samples_.insert(std::make_pair(method_id, count));
-    }
-    // If we have enough samples, mark as hot and request Jit compilation.
-    if (sample_count >= hot_method_threshold_ && sample_count - count < hot_method_threshold_) {
-      is_hot = true;
+  if (thread_pool_.get() == nullptr) {
+    DCHECK(Runtime::Current()->IsShuttingDown(self));
+    return;
+  }
+  uint16_t sample_count = method->IncrementCounter();
+  if (sample_count == warm_method_threshold_) {
+    ProfilingInfo* info = method->CreateProfilingInfo();
+    if (info != nullptr) {
+      VLOG(jit) << "Start profiling " << PrettyMethod(method);
-  if (is_hot) {
-    if (thread_pool_.get() != nullptr) {
-      thread_pool_->AddTask(self, new JitCompileTask(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), this));
-      thread_pool_->StartWorkers(self);
-    } else {
-      VLOG(jit) << "Compiling hot method " << PrettyMethod(method);
-      Runtime::Current()->GetJit()->CompileMethod(
-          method->GetInterfaceMethodIfProxy(sizeof(void*)), self);
-    }
+  if (sample_count == hot_method_threshold_) {
+    thread_pool_->AddTask(self, new JitCompileTask(
+        method->GetInterfaceMethodIfProxy(sizeof(void*))));
+    thread_pool_->StartWorkers(self);
@@ -117,5 +92,17 @@
   CHECK(instrumentation_cache_ != nullptr);
+void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
+                                                          mirror::Object* this_object,
+                                                          ArtMethod* caller,
+                                                          uint32_t dex_pc,
+                                                          ArtMethod* callee ATTRIBUTE_UNUSED) {
+  DCHECK(this_object != nullptr);
+  ProfilingInfo* info = caller->GetProfilingInfo();
+  if (info != nullptr) {
+    info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+  }
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 0deaf8a..6fdef65 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -45,18 +45,15 @@
 // Keeps track of which methods are hot.
 class JitInstrumentationCache {
-  explicit JitInstrumentationCache(size_t hot_method_threshold);
+  JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
   void AddSamples(Thread* self, ArtMethod* method, size_t samples)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
-  void SignalCompiled(Thread* self, ArtMethod* method)
-      SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+      SHARED_REQUIRES(Locks::mutator_lock_);
   void CreateThreadPool();
   void DeleteThreadPool();
-  Mutex lock_;
-  std::unordered_map<jmethodID, size_t> samples_;
   size_t hot_method_threshold_;
+  size_t warm_method_threshold_;
   std::unique_ptr<ThreadPool> thread_pool_;
@@ -66,37 +63,43 @@
   explicit JitInstrumentationListener(JitInstrumentationCache* cache);
-  virtual void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
-                             ArtMethod* method, uint32_t /*dex_pc*/)
+  void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
+                     ArtMethod* method, uint32_t /*dex_pc*/)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     instrumentation_cache_->AddSamples(thread, method, 1);
-  virtual void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            const JValue& /*return_value*/)
+  void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    const JValue& /*return_value*/)
       OVERRIDE { }
-  virtual void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
-  virtual void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                         ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                         ArtField* /*field*/) OVERRIDE { }
-  virtual void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
-                            ArtMethod* /*method*/, uint32_t /*dex_pc*/,
-                            ArtField* /*field*/, const JValue& /*field_value*/)
+  void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
+  void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                 ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                 ArtField* /*field*/) OVERRIDE { }
+  void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
+                    ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+                    ArtField* /*field*/, const JValue& /*field_value*/)
       OVERRIDE { }
-  virtual void ExceptionCaught(Thread* /*thread*/,
-                               mirror::Throwable* /*exception_object*/) OVERRIDE { }
+  void ExceptionCaught(Thread* /*thread*/,
+                       mirror::Throwable* /*exception_object*/) OVERRIDE { }
-  virtual void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
-                          ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
+  void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
+                  ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
-  // We only care about how many dex instructions were executed in the Jit.
-  virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+  void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
       OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
     CHECK_LE(dex_pc_offset, 0);
     instrumentation_cache_->AddSamples(thread, method, 1);
+  void InvokeVirtualOrInterface(Thread* thread,
+                                mirror::Object* this_object,
+                                ArtMethod* caller,
+                                uint32_t dex_pc,
+                                ArtMethod* callee)
+      OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
   JitInstrumentationCache* const instrumentation_cache_;
diff --git a/runtime/jit/ b/runtime/jit/
new file mode 100644
index 0000000..0c039f2
--- /dev/null
+++ b/runtime/jit/
@@ -0,0 +1,117 @@
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "profiling_info.h"
+#include "art_method-inl.h"
+#include "dex_instruction.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+namespace art {
+ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+  // Walk over the dex instructions of the method and keep track of
+  // instructions we are interested in profiling.
+  const uint16_t* code_ptr = nullptr;
+  const uint16_t* code_end = nullptr;
+  {
+    ScopedObjectAccess soa(Thread::Current());
+    DCHECK(!method->IsNative());
+    const DexFile::CodeItem& code_item = *method->GetCodeItem();
+    code_ptr = code_item.insns_;
+    code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+  }
+  uint32_t dex_pc = 0;
+  std::vector<uint32_t> entries;
+  while (code_ptr < code_end) {
+    const Instruction& instruction = *Instruction::At(code_ptr);
+    switch (instruction.Opcode()) {
+      case Instruction::INVOKE_VIRTUAL:
+      case Instruction::INVOKE_VIRTUAL_RANGE:
+      case Instruction::INVOKE_VIRTUAL_QUICK:
+      case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+      case Instruction::INVOKE_INTERFACE:
+      case Instruction::INVOKE_INTERFACE_RANGE:
+        entries.push_back(dex_pc);
+        break;
+      default:
+        break;
+    }
+    dex_pc += instruction.SizeInCodeUnits();
+    code_ptr += instruction.SizeInCodeUnits();
+  }
+  // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
+  // object, it will never be filled.
+  if (entries.empty()) {
+    return nullptr;
+  }
+  // Allocate the `ProfilingInfo` object int the JIT's data space.
+  jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
+  size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
+  uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
+  if (data == nullptr) {
+    VLOG(jit) << "Cannot allocate profiling info anymore";
+    return nullptr;
+  }
+  return new (data) ProfilingInfo(entries);
+void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+  InlineCache* cache = nullptr;
+  // TODO: binary search if array is too long.
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    if (cache_[i].dex_pc == dex_pc) {
+      cache = &cache_[i];
+      break;
+    }
+  }
+  DCHECK(cache != nullptr);
+  ScopedObjectAccess soa(self);
+  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+    mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+    if (existing == cls) {
+      // Receiver type is already in the cache, nothing else to do.
+      return;
+    } else if (existing == nullptr) {
+      // Cache entry is empty, try to put `cls` in it.
+      GcRoot<mirror::Class> expected_root(nullptr);
+      GcRoot<mirror::Class> desired_root(cls);
+      if (!reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&cache->classes_[i])->
+              CompareExchangeStrongSequentiallyConsistent(expected_root, desired_root)) {
+        // Some other thread put a class in the cache, continue iteration starting at this
+        // entry in case the entry contains `cls`.
+        --i;
+      } else {
+        // We successfully set `cls`, just return.
+        return;
+      }
+    }
+  }
+  // Unsuccessfull - cache is full, making it megamorphic.
+  DCHECK(cache->IsMegamorphic());
+}  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
new file mode 100644
index 0000000..73ca41a
--- /dev/null
+++ b/runtime/jit/profiling_info.h
@@ -0,0 +1,106 @@
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vector>
+#include "base/macros.h"
+#include "gc_root.h"
+namespace art {
+class ArtMethod;
+namespace mirror {
+class Class;
+ * Profiling info for a method, created and filled by the interpreter once the
+ * method is warm, and used by the compiler to drive optimizations.
+ */
+class ProfilingInfo {
+ public:
+  static ProfilingInfo* Create(ArtMethod* method);
+  // Add information from an executed INVOKE instruction to the profile.
+  void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+  // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
+  template<typename RootVisitorType>
+  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      InlineCache* cache = &cache_[i];
+      for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
+        visitor.VisitRootIfNonNull(cache->classes_[j].AddressWithoutBarrier());
+      }
+    }
+  }
+ private:
+  // Structure to store the classes seen at runtime for a specific instruction.
+  // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+  struct InlineCache {
+    bool IsMonomorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 2);
+      return !classes_[0].IsNull() && classes_[1].IsNull();
+    }
+    bool IsMegamorphic() const {
+      for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+        if (classes_[i].IsNull()) {
+          return false;
+        }
+      }
+      return true;
+    }
+    bool IsUnitialized() const {
+      return classes_[0].IsNull();
+    }
+    bool IsPolymorphic() const {
+      DCHECK_GE(kIndividualCacheSize, 3);
+      return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+    }
+    static constexpr uint16_t kIndividualCacheSize = 5;
+    uint32_t dex_pc;
+    GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+  };
+  explicit ProfilingInfo(const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()) {
+    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+      cache_[i].dex_pc = entries[i];
+    }
+  }
+  // Number of instructions we are profiling in the ArtMethod.
+  const uint32_t number_of_inline_caches_;
+  // Dynamically allocated array of size `number_of_inline_caches_`.
+  InlineCache cache_[0];
+}  // namespace art