summaryrefslogtreecommitdiff
path: root/runtime/interpreter/interpreter_cache.h
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2022-01-17 01:32:55 +0000
committer Hans Boehm <hboehm@google.com> 2022-01-17 01:32:55 +0000
commit3d2f148fe040b60452d5d9be7d08dec693132078 (patch)
tree6a8a0aa66c68e8a4c49833b2a93b263985259aed /runtime/interpreter/interpreter_cache.h
parentfa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd (diff)
Revert "Add thread-shared interpreter cache"
This reverts commit fa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd. Reason for revert: Seeing several different failures that appear related, both test failures and b/214850618. And it appears a potentially significant unresolved comment was overlooked. Change-Id: I2b5260ac7f2168831f0d1b0d7c76b70ecc1fb77d
Diffstat (limited to 'runtime/interpreter/interpreter_cache.h')
-rw-r--r--runtime/interpreter/interpreter_cache.h80
1 files changed, 33 insertions, 47 deletions
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
index af025cecbd..0ada562438 100644
--- a/runtime/interpreter/interpreter_cache.h
+++ b/runtime/interpreter/interpreter_cache.h
@@ -20,22 +20,17 @@
#include <array>
#include <atomic>
-#include "base/atomic_pair.h"
#include "base/bit_utils.h"
#include "base/macros.h"
namespace art {
-class Instruction;
class Thread;
// Small fast thread-local cache for the interpreter.
-//
-// The key is an absolute pointer to a dex instruction.
-//
-// The value depends on the opcode of the dex instruction.
+// It can hold arbitrary pointer-sized key-value pair.
+// The interpretation of the value depends on the key.
// Presence of entry might imply some pre-conditions.
-//
// All operations must be done from the owning thread,
// or at a point when the owning thread is suspended.
//
@@ -51,61 +46,52 @@ class Thread;
// from assembly (it ensures that the offset is valid immediate value).
class ALIGNED(16) InterpreterCache {
public:
- using Entry = AtomicPair<size_t>;
-
- static constexpr size_t kThreadLocalSize = 256; // Value of 256 has around 75% cache hit rate.
- static constexpr size_t kSharedSize = 16 * 1024; // Value of 16k has around 90% cache hit rate.
- static constexpr size_t kHashShift = 2; // Number of tailing dex pc bits to drop.
-
- InterpreterCache();
-
- void ClearThreadLocal(Thread* owning_thread);
+ // Aligned since we load the whole entry in single assembly instruction.
+ typedef std::pair<const void*, size_t> Entry ALIGNED(2 * sizeof(size_t));
- static void ClearShared();
+ // 2x size increase/decrease corresponds to ~0.5% interpreter performance change.
+ // Value of 256 has around 75% cache hit rate.
+ static constexpr size_t kSize = 256;
- template<bool kSkipThreadLocal = false>
- ALWAYS_INLINE bool Get(Thread* self, const void* dex_instruction, /* out */ size_t* value);
+ InterpreterCache() {
+ // We can not use the Clear() method since the constructor will not
+ // be called from the owning thread.
+ data_.fill(Entry{});
+ }
- ALWAYS_INLINE void Set(Thread* self, const void* dex_instruction, size_t value);
+ // Clear the whole cache. It requires the owning thread for DCHECKs.
+ void Clear(Thread* owning_thread);
- template<typename Callback>
- void ForEachTheadLocalEntry(Callback&& callback) {
- for (Entry& entry : thread_local_array_) {
- callback(reinterpret_cast<const Instruction*>(entry.first), entry.second);
+ ALWAYS_INLINE bool Get(const void* key, /* out */ size_t* value) {
+ DCHECK(IsCalledFromOwningThread());
+ Entry& entry = data_[IndexOf(key)];
+ if (LIKELY(entry.first == key)) {
+ *value = entry.second;
+ return true;
}
+ return false;
}
- template<typename Callback>
- static void ForEachSharedEntry(Callback&& callback) {
- for (std::atomic<Entry>& atomic_entry : shared_array_) {
- Entry old_entry = AtomicPairLoadAcquire(&atomic_entry);
- Entry new_entry = old_entry;
- callback(reinterpret_cast<const Instruction*>(new_entry.first), new_entry.second);
- if (old_entry.second != new_entry.second) {
- AtomicPairStoreRelease(&atomic_entry, new_entry);
- }
- }
+ ALWAYS_INLINE void Set(const void* key, size_t value) {
+ DCHECK(IsCalledFromOwningThread());
+ data_[IndexOf(key)] = Entry{key, value};
+ }
+
+ std::array<Entry, kSize>& GetArray() {
+ return data_;
}
private:
- template<size_t kSize>
- static ALWAYS_INLINE size_t IndexOf(size_t key) {
+ bool IsCalledFromOwningThread();
+
+ static ALWAYS_INLINE size_t IndexOf(const void* key) {
static_assert(IsPowerOfTwo(kSize), "Size must be power of two");
- size_t index = (key >> kHashShift) & (kSize - 1);
+ size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1);
DCHECK_LT(index, kSize);
return index;
}
- // Small cache of fixed size which is always present for every thread.
- // It is stored directly (without indrection) inside the Thread object.
- // This makes it as fast as possible to access from assembly fast-path.
- std::array<Entry, kThreadLocalSize> thread_local_array_;
-
- // Larger cache which is shared by all threads.
- // It is used as next cache level if lookup in the local array fails.
- // It needs to be accessed using atomic operations, and is contended,
- // but the sharing allows it to be larger then the per-thread cache.
- static std::array<std::atomic<Entry>, kSharedSize> shared_array_;
+ std::array<Entry, kSize> data_;
};
} // namespace art