diff options
| author | 2022-01-17 01:32:55 +0000 | |
|---|---|---|
| committer | 2022-01-17 01:32:55 +0000 | |
| commit | 3d2f148fe040b60452d5d9be7d08dec693132078 (patch) | |
| tree | 6a8a0aa66c68e8a4c49833b2a93b263985259aed /runtime/interpreter/interpreter_cache.h | |
| parent | fa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd (diff) | |
Revert "Add thread-shared interpreter cache"
This reverts commit fa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd.
Reason for revert: Seeing several different failures that appear related, both test failures and b/214850618. And it appears a potentially significant unresolved comment was overlooked.
Change-Id: I2b5260ac7f2168831f0d1b0d7c76b70ecc1fb77d
Diffstat (limited to 'runtime/interpreter/interpreter_cache.h')
| -rw-r--r-- | runtime/interpreter/interpreter_cache.h | 80 |
1 files changed, 33 insertions, 47 deletions
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h index af025cecbd..0ada562438 100644 --- a/runtime/interpreter/interpreter_cache.h +++ b/runtime/interpreter/interpreter_cache.h @@ -20,22 +20,17 @@ #include <array> #include <atomic> -#include "base/atomic_pair.h" #include "base/bit_utils.h" #include "base/macros.h" namespace art { -class Instruction; class Thread; // Small fast thread-local cache for the interpreter. -// -// The key is an absolute pointer to a dex instruction. -// -// The value depends on the opcode of the dex instruction. +// It can hold arbitrary pointer-sized key-value pair. +// The interpretation of the value depends on the key. // Presence of entry might imply some pre-conditions. -// // All operations must be done from the owning thread, // or at a point when the owning thread is suspended. // @@ -51,61 +46,52 @@ class Thread; // from assembly (it ensures that the offset is valid immediate value). class ALIGNED(16) InterpreterCache { public: - using Entry = AtomicPair<size_t>; - - static constexpr size_t kThreadLocalSize = 256; // Value of 256 has around 75% cache hit rate. - static constexpr size_t kSharedSize = 16 * 1024; // Value of 16k has around 90% cache hit rate. - static constexpr size_t kHashShift = 2; // Number of tailing dex pc bits to drop. - - InterpreterCache(); - - void ClearThreadLocal(Thread* owning_thread); + // Aligned since we load the whole entry in single assembly instruction. + typedef std::pair<const void*, size_t> Entry ALIGNED(2 * sizeof(size_t)); - static void ClearShared(); + // 2x size increase/decrease corresponds to ~0.5% interpreter performance change. + // Value of 256 has around 75% cache hit rate. + static constexpr size_t kSize = 256; - template<bool kSkipThreadLocal = false> - ALWAYS_INLINE bool Get(Thread* self, const void* dex_instruction, /* out */ size_t* value); + InterpreterCache() { + // We can not use the Clear() method since the constructor will not + // be called from the owning thread. + data_.fill(Entry{}); + } - ALWAYS_INLINE void Set(Thread* self, const void* dex_instruction, size_t value); + // Clear the whole cache. It requires the owning thread for DCHECKs. + void Clear(Thread* owning_thread); - template<typename Callback> - void ForEachTheadLocalEntry(Callback&& callback) { - for (Entry& entry : thread_local_array_) { - callback(reinterpret_cast<const Instruction*>(entry.first), entry.second); + ALWAYS_INLINE bool Get(const void* key, /* out */ size_t* value) { + DCHECK(IsCalledFromOwningThread()); + Entry& entry = data_[IndexOf(key)]; + if (LIKELY(entry.first == key)) { + *value = entry.second; + return true; } + return false; } - template<typename Callback> - static void ForEachSharedEntry(Callback&& callback) { - for (std::atomic<Entry>& atomic_entry : shared_array_) { - Entry old_entry = AtomicPairLoadAcquire(&atomic_entry); - Entry new_entry = old_entry; - callback(reinterpret_cast<const Instruction*>(new_entry.first), new_entry.second); - if (old_entry.second != new_entry.second) { - AtomicPairStoreRelease(&atomic_entry, new_entry); - } - } + ALWAYS_INLINE void Set(const void* key, size_t value) { + DCHECK(IsCalledFromOwningThread()); + data_[IndexOf(key)] = Entry{key, value}; + } + + std::array<Entry, kSize>& GetArray() { + return data_; } private: - template<size_t kSize> - static ALWAYS_INLINE size_t IndexOf(size_t key) { + bool IsCalledFromOwningThread(); + + static ALWAYS_INLINE size_t IndexOf(const void* key) { static_assert(IsPowerOfTwo(kSize), "Size must be power of two"); - size_t index = (key >> kHashShift) & (kSize - 1); + size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1); DCHECK_LT(index, kSize); return index; } - // Small cache of fixed size which is always present for every thread. - // It is stored directly (without indrection) inside the Thread object. - // This makes it as fast as possible to access from assembly fast-path. - std::array<Entry, kThreadLocalSize> thread_local_array_; - - // Larger cache which is shared by all threads. - // It is used as next cache level if lookup in the local array fails. - // It needs to be accessed using atomic operations, and is contended, - // but the sharing allows it to be larger then the per-thread cache. - static std::array<std::atomic<Entry>, kSharedSize> shared_array_; + std::array<Entry, kSize> data_; }; } // namespace art |