Record profiling information before Jitting.
- Add a new instrumentation kind to record dynamic invokes.
- Use the JNI entry point field to store the profiling data.
- Record seen receivers for every dynamic invoke.
Change-Id: I2c1738ab2a72052d45964d055dc16b44b906e54c
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 26a4fe4..683b2cf 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -39,6 +39,8 @@
options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheCapacity);
jit_options->compile_threshold_ =
options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+ jit_options->warmup_threshold_ =
+ options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
jit_options->dump_info_on_shutdown_ =
options.Exists(RuntimeArgumentMap::DumpJITInfoOnShutdown);
return jit_options;
@@ -160,17 +162,19 @@
}
}
-void Jit::CreateInstrumentationCache(size_t compile_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
CHECK_GT(compile_threshold, 0U);
Runtime* const runtime = Runtime::Current();
runtime->GetThreadList()->SuspendAll(__FUNCTION__);
// Add Jit interpreter instrumentation, tells the interpreter when to notify the jit to compile
// something.
- instrumentation_cache_.reset(new jit::JitInstrumentationCache(compile_threshold));
+ instrumentation_cache_.reset(
+ new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
runtime->GetInstrumentation()->AddListener(
new jit::JitInstrumentationListener(instrumentation_cache_.get()),
instrumentation::Instrumentation::kMethodEntered |
- instrumentation::Instrumentation::kBackwardBranch);
+ instrumentation::Instrumentation::kBackwardBranch |
+ instrumentation::Instrumentation::kInvokeVirtualOrInterface);
runtime->GetThreadList()->ResumeAll();
}
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index ca6e7ea..643bc23 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -43,13 +43,14 @@
class Jit {
public:
static constexpr bool kStressMode = kIsDebugBuild;
- static constexpr size_t kDefaultCompileThreshold = kStressMode ? 1 : 1000;
+ static constexpr size_t kDefaultCompileThreshold = kStressMode ? 2 : 1000;
+ static constexpr size_t kDefaultWarmupThreshold = kDefaultCompileThreshold / 2;
virtual ~Jit();
static Jit* Create(JitOptions* options, std::string* error_msg);
bool CompileMethod(ArtMethod* method, Thread* self)
SHARED_REQUIRES(Locks::mutator_lock_);
- void CreateInstrumentationCache(size_t compile_threshold);
+ void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
void CreateThreadPool();
CompilerCallbacks* GetCompilerCallbacks() {
return compiler_callbacks_;
@@ -95,6 +96,9 @@
size_t GetCompileThreshold() const {
return compile_threshold_;
}
+ size_t GetWarmupThreshold() const {
+ return warmup_threshold_;
+ }
size_t GetCodeCacheCapacity() const {
return code_cache_capacity_;
}
@@ -112,6 +116,7 @@
bool use_jit_;
size_t code_cache_capacity_;
size_t compile_threshold_;
+ size_t warmup_threshold_;
bool dump_info_on_shutdown_;
JitOptions() : use_jit_(false), code_cache_capacity_(0), compile_threshold_(0),
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index cd5f4cb..4c53162 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -82,9 +82,19 @@
return code_cache_ptr_ - size;
}
+uint8_t* JitCodeCache::ReserveData(Thread* self, size_t size) {
+ MutexLock mu(self, lock_);
+ size = RoundUp(size, sizeof(void*));
+ if (size > DataCacheRemain()) {
+ return nullptr;
+ }
+ data_cache_ptr_ += size;
+ return data_cache_ptr_ - size;
+}
+
uint8_t* JitCodeCache::AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end) {
MutexLock mu(self, lock_);
- const size_t size = end - begin;
+ const size_t size = RoundUp(end - begin, sizeof(void*));
if (size > DataCacheRemain()) {
return nullptr; // Out of space in the data cache.
}
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 9707f6f..f485e4a 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -86,6 +86,9 @@
// Reserve a region of code of size at least "size". Returns null if there is no more room.
uint8_t* ReserveCode(Thread* self, size_t size) REQUIRES(!lock_);
+ // Reserve a region of data of size at least "size". Returns null if there is no more room.
+ uint8_t* ReserveData(Thread* self, size_t size) REQUIRES(!lock_);
+
// Add a data array of size (end - begin) with the associated contents, returns null if there
// is no more room.
uint8_t* AddDataArray(Thread* self, const uint8_t* begin, const uint8_t* end)
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index 258c29d..f485682 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -26,16 +26,12 @@
class JitCompileTask : public Task {
public:
- JitCompileTask(ArtMethod* method, JitInstrumentationCache* cache)
- : method_(method), cache_(cache) {
- }
+ explicit JitCompileTask(ArtMethod* method) : method_(method) {}
virtual void Run(Thread* self) OVERRIDE {
ScopedObjectAccess soa(self);
VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
- if (Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
- cache_->SignalCompiled(self, method_);
- } else {
+ if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
}
}
@@ -46,13 +42,14 @@
private:
ArtMethod* const method_;
- JitInstrumentationCache* const cache_;
DISALLOW_IMPLICIT_CONSTRUCTORS(JitCompileTask);
};
-JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold)
- : lock_("jit instrumentation lock"), hot_method_threshold_(hot_method_threshold) {
+JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
+ size_t warm_method_threshold)
+ : hot_method_threshold_(hot_method_threshold),
+ warm_method_threshold_(warm_method_threshold) {
}
void JitInstrumentationCache::CreateThreadPool() {
@@ -60,20 +57,11 @@
}
void JitInstrumentationCache::DeleteThreadPool() {
+ DCHECK(Runtime::Current()->IsShuttingDown(Thread::Current()));
thread_pool_.reset();
}
-void JitInstrumentationCache::SignalCompiled(Thread* self, ArtMethod* method) {
- ScopedObjectAccessUnchecked soa(self);
- jmethodID method_id = soa.EncodeMethod(method);
- MutexLock mu(self, lock_);
- auto it = samples_.find(method_id);
- if (it != samples_.end()) {
- samples_.erase(it);
- }
-}
-
-void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t count) {
+void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t) {
ScopedObjectAccessUnchecked soa(self);
// Since we don't have on-stack replacement, some methods can remain in the interpreter longer
// than we want resulting in samples even after the method is compiled.
@@ -81,34 +69,21 @@
Runtime::Current()->GetJit()->GetCodeCache()->ContainsMethod(method)) {
return;
}
- jmethodID method_id = soa.EncodeMethod(method);
- bool is_hot = false;
- {
- MutexLock mu(self, lock_);
- size_t sample_count = 0;
- auto it = samples_.find(method_id);
- if (it != samples_.end()) {
- it->second += count;
- sample_count = it->second;
- } else {
- sample_count = count;
- samples_.insert(std::make_pair(method_id, count));
- }
- // If we have enough samples, mark as hot and request Jit compilation.
- if (sample_count >= hot_method_threshold_ && sample_count - count < hot_method_threshold_) {
- is_hot = true;
+ if (thread_pool_.get() == nullptr) {
+ DCHECK(Runtime::Current()->IsShuttingDown(self));
+ return;
+ }
+ uint16_t sample_count = method->IncrementCounter();
+ if (sample_count == warm_method_threshold_) {
+ ProfilingInfo* info = method->CreateProfilingInfo();
+ if (info != nullptr) {
+ VLOG(jit) << "Start profiling " << PrettyMethod(method);
}
}
- if (is_hot) {
- if (thread_pool_.get() != nullptr) {
- thread_pool_->AddTask(self, new JitCompileTask(
- method->GetInterfaceMethodIfProxy(sizeof(void*)), this));
- thread_pool_->StartWorkers(self);
- } else {
- VLOG(jit) << "Compiling hot method " << PrettyMethod(method);
- Runtime::Current()->GetJit()->CompileMethod(
- method->GetInterfaceMethodIfProxy(sizeof(void*)), self);
- }
+ if (sample_count == hot_method_threshold_) {
+ thread_pool_->AddTask(self, new JitCompileTask(
+ method->GetInterfaceMethodIfProxy(sizeof(void*))));
+ thread_pool_->StartWorkers(self);
}
}
@@ -117,5 +92,17 @@
CHECK(instrumentation_cache_ != nullptr);
}
+void JitInstrumentationListener::InvokeVirtualOrInterface(Thread* thread,
+ mirror::Object* this_object,
+ ArtMethod* caller,
+ uint32_t dex_pc,
+ ArtMethod* callee ATTRIBUTE_UNUSED) {
+ DCHECK(this_object != nullptr);
+ ProfilingInfo* info = caller->GetProfilingInfo();
+ if (info != nullptr) {
+ info->AddInvokeInfo(thread, dex_pc, this_object->GetClass());
+ }
+}
+
} // namespace jit
} // namespace art
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 0deaf8a..6fdef65 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -45,18 +45,15 @@
// Keeps track of which methods are hot.
class JitInstrumentationCache {
public:
- explicit JitInstrumentationCache(size_t hot_method_threshold);
+ JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
void AddSamples(Thread* self, ArtMethod* method, size_t samples)
- SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
- void SignalCompiled(Thread* self, ArtMethod* method)
- SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_);
+ SHARED_REQUIRES(Locks::mutator_lock_);
void CreateThreadPool();
void DeleteThreadPool();
private:
- Mutex lock_;
- std::unordered_map<jmethodID, size_t> samples_;
size_t hot_method_threshold_;
+ size_t warm_method_threshold_;
std::unique_ptr<ThreadPool> thread_pool_;
DISALLOW_IMPLICIT_CONSTRUCTORS(JitInstrumentationCache);
@@ -66,37 +63,43 @@
public:
explicit JitInstrumentationListener(JitInstrumentationCache* cache);
- virtual void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
- ArtMethod* method, uint32_t /*dex_pc*/)
+ void MethodEntered(Thread* thread, mirror::Object* /*this_object*/,
+ ArtMethod* method, uint32_t /*dex_pc*/)
OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
instrumentation_cache_->AddSamples(thread, method, 1);
}
- virtual void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
- ArtMethod* /*method*/, uint32_t /*dex_pc*/,
- const JValue& /*return_value*/)
+ void MethodExited(Thread* /*thread*/, mirror::Object* /*this_object*/,
+ ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+ const JValue& /*return_value*/)
OVERRIDE { }
- virtual void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
- ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
- virtual void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
- ArtMethod* /*method*/, uint32_t /*dex_pc*/,
- ArtField* /*field*/) OVERRIDE { }
- virtual void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
- ArtMethod* /*method*/, uint32_t /*dex_pc*/,
- ArtField* /*field*/, const JValue& /*field_value*/)
+ void MethodUnwind(Thread* /*thread*/, mirror::Object* /*this_object*/,
+ ArtMethod* /*method*/, uint32_t /*dex_pc*/) OVERRIDE { }
+ void FieldRead(Thread* /*thread*/, mirror::Object* /*this_object*/,
+ ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+ ArtField* /*field*/) OVERRIDE { }
+ void FieldWritten(Thread* /*thread*/, mirror::Object* /*this_object*/,
+ ArtMethod* /*method*/, uint32_t /*dex_pc*/,
+ ArtField* /*field*/, const JValue& /*field_value*/)
OVERRIDE { }
- virtual void ExceptionCaught(Thread* /*thread*/,
- mirror::Throwable* /*exception_object*/) OVERRIDE { }
+ void ExceptionCaught(Thread* /*thread*/,
+ mirror::Throwable* /*exception_object*/) OVERRIDE { }
- virtual void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
- ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
+ void DexPcMoved(Thread* /*self*/, mirror::Object* /*this_object*/,
+ ArtMethod* /*method*/, uint32_t /*new_dex_pc*/) OVERRIDE { }
- // We only care about how many dex instructions were executed in the Jit.
- virtual void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
+ void BackwardBranch(Thread* thread, ArtMethod* method, int32_t dex_pc_offset)
OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) {
CHECK_LE(dex_pc_offset, 0);
instrumentation_cache_->AddSamples(thread, method, 1);
}
+ void InvokeVirtualOrInterface(Thread* thread,
+ mirror::Object* this_object,
+ ArtMethod* caller,
+ uint32_t dex_pc,
+ ArtMethod* callee)
+ OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_);
+
private:
JitInstrumentationCache* const instrumentation_cache_;
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
new file mode 100644
index 0000000..0c039f2
--- /dev/null
+++ b/runtime/jit/profiling_info.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "profiling_info.h"
+
+#include "art_method-inl.h"
+#include "dex_instruction.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "scoped_thread_state_change.h"
+#include "thread.h"
+
+namespace art {
+
+ProfilingInfo* ProfilingInfo::Create(ArtMethod* method) {
+ // Walk over the dex instructions of the method and keep track of
+ // instructions we are interested in profiling.
+ const uint16_t* code_ptr = nullptr;
+ const uint16_t* code_end = nullptr;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(!method->IsNative());
+ const DexFile::CodeItem& code_item = *method->GetCodeItem();
+ code_ptr = code_item.insns_;
+ code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+ }
+
+ uint32_t dex_pc = 0;
+ std::vector<uint32_t> entries;
+ while (code_ptr < code_end) {
+ const Instruction& instruction = *Instruction::At(code_ptr);
+ switch (instruction.Opcode()) {
+ case Instruction::INVOKE_VIRTUAL:
+ case Instruction::INVOKE_VIRTUAL_RANGE:
+ case Instruction::INVOKE_VIRTUAL_QUICK:
+ case Instruction::INVOKE_VIRTUAL_RANGE_QUICK:
+ case Instruction::INVOKE_INTERFACE:
+ case Instruction::INVOKE_INTERFACE_RANGE:
+ entries.push_back(dex_pc);
+ break;
+
+ default:
+ break;
+ }
+ dex_pc += instruction.SizeInCodeUnits();
+ code_ptr += instruction.SizeInCodeUnits();
+ }
+
+ // If there is no instruction we are interested in, no need to create a `ProfilingInfo`
+ // object, it will never be filled.
+ if (entries.empty()) {
+ return nullptr;
+ }
+
+ // Allocate the `ProfilingInfo` object int the JIT's data space.
+ jit::JitCodeCache* code_cache = Runtime::Current()->GetJit()->GetCodeCache();
+ size_t profile_info_size = sizeof(ProfilingInfo) + sizeof(InlineCache) * entries.size();
+ uint8_t* data = code_cache->ReserveData(Thread::Current(), profile_info_size);
+
+ if (data == nullptr) {
+ VLOG(jit) << "Cannot allocate profiling info anymore";
+ return nullptr;
+ }
+
+ return new (data) ProfilingInfo(entries);
+}
+
+void ProfilingInfo::AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls) {
+ InlineCache* cache = nullptr;
+ // TODO: binary search if array is too long.
+ for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+ if (cache_[i].dex_pc == dex_pc) {
+ cache = &cache_[i];
+ break;
+ }
+ }
+ DCHECK(cache != nullptr);
+
+ ScopedObjectAccess soa(self);
+ for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
+ mirror::Class* existing = cache->classes_[i].Read<kWithoutReadBarrier>();
+ if (existing == cls) {
+ // Receiver type is already in the cache, nothing else to do.
+ return;
+ } else if (existing == nullptr) {
+ // Cache entry is empty, try to put `cls` in it.
+ GcRoot<mirror::Class> expected_root(nullptr);
+ GcRoot<mirror::Class> desired_root(cls);
+ if (!reinterpret_cast<Atomic<GcRoot<mirror::Class>>*>(&cache->classes_[i])->
+ CompareExchangeStrongSequentiallyConsistent(expected_root, desired_root)) {
+ // Some other thread put a class in the cache, continue iteration starting at this
+ // entry in case the entry contains `cls`.
+ --i;
+ } else {
+ // We successfully set `cls`, just return.
+ return;
+ }
+ }
+ }
+ // Unsuccessfull - cache is full, making it megamorphic.
+ DCHECK(cache->IsMegamorphic());
+}
+
+} // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
new file mode 100644
index 0000000..73ca41a
--- /dev/null
+++ b/runtime/jit/profiling_info.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILING_INFO_H_
+#define ART_RUNTIME_JIT_PROFILING_INFO_H_
+
+#include <vector>
+
+#include "base/macros.h"
+#include "gc_root.h"
+
+namespace art {
+
+class ArtMethod;
+
+namespace mirror {
+class Class;
+}
+
+/**
+ * Profiling info for a method, created and filled by the interpreter once the
+ * method is warm, and used by the compiler to drive optimizations.
+ */
+class ProfilingInfo {
+ public:
+ static ProfilingInfo* Create(ArtMethod* method);
+
+ // Add information from an executed INVOKE instruction to the profile.
+ void AddInvokeInfo(Thread* self, uint32_t dex_pc, mirror::Class* cls);
+
+ // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
+ template<typename RootVisitorType>
+ void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+ for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+ InlineCache* cache = &cache_[i];
+ for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
+ visitor.VisitRootIfNonNull(cache->classes_[j].AddressWithoutBarrier());
+ }
+ }
+ }
+
+ private:
+ // Structure to store the classes seen at runtime for a specific instruction.
+ // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
+ struct InlineCache {
+ bool IsMonomorphic() const {
+ DCHECK_GE(kIndividualCacheSize, 2);
+ return !classes_[0].IsNull() && classes_[1].IsNull();
+ }
+
+ bool IsMegamorphic() const {
+ for (size_t i = 0; i < kIndividualCacheSize; ++i) {
+ if (classes_[i].IsNull()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ bool IsUnitialized() const {
+ return classes_[0].IsNull();
+ }
+
+ bool IsPolymorphic() const {
+ DCHECK_GE(kIndividualCacheSize, 3);
+ return !classes_[1].IsNull() && classes_[kIndividualCacheSize - 1].IsNull();
+ }
+
+ static constexpr uint16_t kIndividualCacheSize = 5;
+ uint32_t dex_pc;
+ GcRoot<mirror::Class> classes_[kIndividualCacheSize];
+ };
+
+ explicit ProfilingInfo(const std::vector<uint32_t>& entries)
+ : number_of_inline_caches_(entries.size()) {
+ memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+ for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+ cache_[i].dex_pc = entries[i];
+ }
+ }
+
+ // Number of instructions we are profiling in the ArtMethod.
+ const uint32_t number_of_inline_caches_;
+
+ // Dynamically allocated array of size `number_of_inline_caches_`.
+ InlineCache cache_[0];
+
+ DISALLOW_COPY_AND_ASSIGN(ProfilingInfo);
+};
+
+} // namespace art
+
+#endif // ART_RUNTIME_JIT_PROFILING_INFO_H_