summaryrefslogtreecommitdiff
path: root/runtime/interpreter/interpreter_cache.h
diff options
context:
space:
mode:
Diffstat (limited to 'runtime/interpreter/interpreter_cache.h')
-rw-r--r--runtime/interpreter/interpreter_cache.h80
1 files changed, 47 insertions, 33 deletions
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
index 0ada562438..af025cecbd 100644
--- a/runtime/interpreter/interpreter_cache.h
+++ b/runtime/interpreter/interpreter_cache.h
@@ -20,17 +20,22 @@
#include <array>
#include <atomic>
+#include "base/atomic_pair.h"
#include "base/bit_utils.h"
#include "base/macros.h"
namespace art {
+class Instruction;
class Thread;
// Small fast thread-local cache for the interpreter.
-// It can hold arbitrary pointer-sized key-value pair.
-// The interpretation of the value depends on the key.
+//
+// The key is an absolute pointer to a dex instruction.
+//
+// The value depends on the opcode of the dex instruction.
// Presence of entry might imply some pre-conditions.
+//
// All operations must be done from the owning thread,
// or at a point when the owning thread is suspended.
//
@@ -46,52 +51,61 @@ class Thread;
// from assembly (it ensures that the offset is valid immediate value).
class ALIGNED(16) InterpreterCache {
public:
- // Aligned since we load the whole entry in single assembly instruction.
- typedef std::pair<const void*, size_t> Entry ALIGNED(2 * sizeof(size_t));
+ using Entry = AtomicPair<size_t>;
- // 2x size increase/decrease corresponds to ~0.5% interpreter performance change.
- // Value of 256 has around 75% cache hit rate.
- static constexpr size_t kSize = 256;
+ static constexpr size_t kThreadLocalSize = 256; // Value of 256 has around 75% cache hit rate.
+ static constexpr size_t kSharedSize = 16 * 1024; // Value of 16k has around 90% cache hit rate.
+ static constexpr size_t kHashShift = 2; // Number of tailing dex pc bits to drop.
- InterpreterCache() {
- // We can not use the Clear() method since the constructor will not
- // be called from the owning thread.
- data_.fill(Entry{});
- }
+ InterpreterCache();
- // Clear the whole cache. It requires the owning thread for DCHECKs.
- void Clear(Thread* owning_thread);
+ void ClearThreadLocal(Thread* owning_thread);
- ALWAYS_INLINE bool Get(const void* key, /* out */ size_t* value) {
- DCHECK(IsCalledFromOwningThread());
- Entry& entry = data_[IndexOf(key)];
- if (LIKELY(entry.first == key)) {
- *value = entry.second;
- return true;
- }
- return false;
- }
+ static void ClearShared();
- ALWAYS_INLINE void Set(const void* key, size_t value) {
- DCHECK(IsCalledFromOwningThread());
- data_[IndexOf(key)] = Entry{key, value};
+ template<bool kSkipThreadLocal = false>
+ ALWAYS_INLINE bool Get(Thread* self, const void* dex_instruction, /* out */ size_t* value);
+
+ ALWAYS_INLINE void Set(Thread* self, const void* dex_instruction, size_t value);
+
+ template<typename Callback>
+ void ForEachTheadLocalEntry(Callback&& callback) {
+ for (Entry& entry : thread_local_array_) {
+ callback(reinterpret_cast<const Instruction*>(entry.first), entry.second);
+ }
}
- std::array<Entry, kSize>& GetArray() {
- return data_;
+ template<typename Callback>
+ static void ForEachSharedEntry(Callback&& callback) {
+ for (std::atomic<Entry>& atomic_entry : shared_array_) {
+ Entry old_entry = AtomicPairLoadAcquire(&atomic_entry);
+ Entry new_entry = old_entry;
+ callback(reinterpret_cast<const Instruction*>(new_entry.first), new_entry.second);
+ if (old_entry.second != new_entry.second) {
+ AtomicPairStoreRelease(&atomic_entry, new_entry);
+ }
+ }
}
private:
- bool IsCalledFromOwningThread();
-
- static ALWAYS_INLINE size_t IndexOf(const void* key) {
+ template<size_t kSize>
+ static ALWAYS_INLINE size_t IndexOf(size_t key) {
static_assert(IsPowerOfTwo(kSize), "Size must be power of two");
- size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1);
+ size_t index = (key >> kHashShift) & (kSize - 1);
DCHECK_LT(index, kSize);
return index;
}
- std::array<Entry, kSize> data_;
+ // Small cache of fixed size which is always present for every thread.
+ // It is stored directly (without indrection) inside the Thread object.
+ // This makes it as fast as possible to access from assembly fast-path.
+ std::array<Entry, kThreadLocalSize> thread_local_array_;
+
+ // Larger cache which is shared by all threads.
+ // It is used as next cache level if lookup in the local array fails.
+ // It needs to be accessed using atomic operations, and is contended,
+ // but the sharing allows it to be larger then the per-thread cache.
+ static std::array<std::atomic<Entry>, kSharedSize> shared_array_;
};
} // namespace art