summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author David Srbecky <dsrbecky@google.com> 2018-09-08 12:22:58 +0100
committer David Srbecky <dsrbecky@google.com> 2018-09-27 15:16:09 +0100
commit912f36c954a91bdc7d9801a111ba089ec2a23681 (patch)
tree87e576e86867140c478d6959ce20a261daaad60a
parent26f048f48cdb1e884aab2b6fddf26d58346d29ad (diff)
Add small thread-local cache for use by the interpreter.
Small (one page) cache which can be used on the hottest paths in the interpreter and which does not require synchronisation. This CL adds the code but it does not use it for anything yet. Test: test-art-host-gtest Change-Id: I41d4e7a86a0f62f7a4efc165b8934232b4e766c7
-rw-r--r--libartbase/base/macros.h1
-rw-r--r--runtime/Android.bp1
-rw-r--r--runtime/asm_support.h4
-rw-r--r--runtime/generated/asm_support_gen.h2
-rw-r--r--runtime/interpreter/interpreter_cache.cc32
-rw-r--r--runtime/interpreter/interpreter_cache.h88
-rw-r--r--runtime/native/dalvik_system_DexFile.cc3
-rw-r--r--runtime/thread.cc9
-rw-r--r--runtime/thread.h29
-rw-r--r--tools/cpp-define-generator/constant_thread.def3
10 files changed, 170 insertions, 2 deletions
diff --git a/libartbase/base/macros.h b/libartbase/base/macros.h
index 33866bba08..315f4d265d 100644
--- a/libartbase/base/macros.h
+++ b/libartbase/base/macros.h
@@ -48,6 +48,7 @@ template<typename T> ART_FRIEND_TEST(test_set_name, individual_test)
#define OFFSETOF_MEMBERPTR(t, f) \
(reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT
+#define ALIGNED(x) __attribute__ ((__aligned__(x)))
#define PACKED(x) __attribute__ ((__aligned__(x), __packed__))
// Stringify the argument.
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 15ccb70df0..f4b8697470 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -93,6 +93,7 @@ libart_cc_defaults {
"instrumentation.cc",
"intern_table.cc",
"interpreter/interpreter.cc",
+ "interpreter/interpreter_cache.cc",
"interpreter/interpreter_common.cc",
"interpreter/interpreter_intrinsics.cc",
"interpreter/interpreter_switch_impl.cc",
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index e65c19495e..00c9360ba4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -96,6 +96,10 @@ ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value())
+// Offset of field Thread::interpreter_cache_.
+#define THREAD_INTERPRETER_CACHE_OFFSET (144 + 312 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_INTERPRETER_CACHE_OFFSET,
+ art::Thread::InterpreterCacheOffset<POINTER_SIZE>().Int32Value())
// Offsets within ShadowFrame.
#define SHADOWFRAME_LINK_OFFSET 0
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 464c2b749f..ae31a542b7 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -164,6 +164,8 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<in
DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest))))
#define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7
DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest))))
+#define THREAD_INTERPRETER_CACHE_SIZE_LOG2 8
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_INTERPRETER_CACHE_SIZE_LOG2), (static_cast<int32_t>((art::Thread::InterpreterCacheSizeLog2()))))
#define JIT_CHECK_OSR (-1)
DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
#define JIT_HOTNESS_DISABLE (-2)
diff --git a/runtime/interpreter/interpreter_cache.cc b/runtime/interpreter/interpreter_cache.cc
new file mode 100644
index 0000000000..e43fe318cc
--- /dev/null
+++ b/runtime/interpreter/interpreter_cache.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter_cache.h"
+#include "thread-inl.h"
+
+namespace art {
+
+void InterpreterCache::Clear(Thread* owning_thread) {
+ DCHECK(owning_thread->GetInterpreterCache() == this);
+ DCHECK(owning_thread == Thread::Current() || owning_thread->IsSuspended());
+ data_.fill(Entry{});
+}
+
+bool InterpreterCache::IsCalledFromOwningThread() {
+ return Thread::Current()->GetInterpreterCache() == this;
+}
+
+} // namespace art
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
new file mode 100644
index 0000000000..c25222eff3
--- /dev/null
+++ b/runtime/interpreter/interpreter_cache.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
+
+#include <array>
+#include <atomic>
+
+#include "base/bit_utils.h"
+#include "base/macros.h"
+
+namespace art {
+
+class Instruction;
+class Thread;
+
+// Small fast thread-local cache for the interpreter.
+// The key for the cache is the dex instruction pointer.
+// The interpretation of the value depends on the opcode.
+// Presence of entry might imply some performance pre-conditions.
+// All operations must be done from the owning thread,
+// or at a point when the owning thread is suspended.
+//
+// Aligned to 16-bytes to make it easier to get the address of the cache
+// from assembly (it ensures that the offset is valid immediate value).
+class ALIGNED(16) InterpreterCache {
+ // Aligned since we load the whole entry in single assembly instruction.
+ typedef std::pair<const Instruction*, size_t> Entry ALIGNED(2 * sizeof(size_t));
+
+ public:
+ // 2x size increase/decrease corresponds to ~0.5% interpreter performance change.
+ // Value of 256 has around 75% cache hit rate.
+ static constexpr size_t kSize = 256;
+
+ InterpreterCache() {
+ // We can not use the Clear() method since the constructor will not
+ // be called from the owning thread.
+ data_.fill(Entry{});
+ }
+
+ // Clear the whole cache. It requires the owning thread for DCHECKs.
+ void Clear(Thread* owning_thread);
+
+ ALWAYS_INLINE bool Get(const Instruction* key, /* out */ size_t* value) {
+ DCHECK(IsCalledFromOwningThread());
+ Entry& entry = data_[IndexOf(key)];
+ if (LIKELY(entry.first == key)) {
+ *value = entry.second;
+ return true;
+ }
+ return false;
+ }
+
+ ALWAYS_INLINE void Set(const Instruction* key, size_t value) {
+ DCHECK(IsCalledFromOwningThread());
+ data_[IndexOf(key)] = Entry{key, value};
+ }
+
+ private:
+ bool IsCalledFromOwningThread();
+
+ static ALWAYS_INLINE size_t IndexOf(const Instruction* key) {
+ static_assert(IsPowerOfTwo(kSize), "Size must be power of two");
+ size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1);
+ DCHECK_LT(index, kSize);
+ return index;
+ }
+
+ std::array<Entry, kSize> data_;
+};
+
+} // namespace art
+
+#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 71fabd0250..0d1fe44725 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -323,6 +323,9 @@ static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) {
}
Runtime* const runtime = Runtime::Current();
bool all_deleted = true;
+ // We need to clear the caches since they may contain pointers to the dex instructions.
+ // Different dex file can be loaded at the same memory location later by chance.
+ Thread::ClearAllInterpreterCaches();
{
ScopedObjectAccess soa(env);
ObjPtr<mirror::Object> dex_files_object = soa.Decode<mirror::Object>(cookie);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8a8f53743e..497b146f17 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4076,4 +4076,13 @@ void Thread::SetReadBarrierEntrypoints() {
UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true);
}
+void Thread::ClearAllInterpreterCaches() {
+ static struct ClearInterpreterCacheClosure : Closure {
+ virtual void Run(Thread* thread) {
+ thread->GetInterpreterCache()->Clear(thread);
+ }
+ } closure;
+ Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+}
+
} // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index d169a62198..3c85b80976 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -38,6 +38,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "handle_scope.h"
#include "instrumentation.h"
+#include "interpreter/interpreter_cache.h"
#include "jvalue.h"
#include "managed_stack.h"
#include "offsets.h"
@@ -1299,6 +1300,29 @@ class Thread {
jobject thread_group)
REQUIRES_SHARED(Locks::mutator_lock_);
+ ALWAYS_INLINE InterpreterCache* GetInterpreterCache() {
+ return &interpreter_cache_;
+ }
+
+ // Clear all thread-local interpreter caches.
+ //
+ // Since the caches are keyed by memory pointer to dex instructions, this must be
+ // called when any dex code is unloaded (before different code gets loaded at the
+ // same memory location).
+ //
+ // If presence of cache entry implies some pre-conditions, this must also be
+ // called if the pre-conditions might no longer hold true.
+ static void ClearAllInterpreterCaches();
+
+ template<PointerSize pointer_size>
+ static ThreadOffset<pointer_size> InterpreterCacheOffset() {
+ return ThreadOffset<pointer_size>(OFFSETOF_MEMBER(Thread, interpreter_cache_));
+ }
+
+ static int InterpreterCacheSizeLog2() {
+ return WhichPowerOf2(InterpreterCache::kSize);
+ }
+
private:
explicit Thread(bool daemon);
~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1788,6 +1812,11 @@ class Thread {
// be false for threads where '!can_call_into_java_'.
bool can_be_suspended_by_user_code_;
+ // Small thread-local cache to be used from the interpreter.
+ // It is keyed by dex instruction pointer.
+ // The value is opcode-depended (e.g. field offset).
+ InterpreterCache interpreter_cache_;
+
friend class Dbg; // For SetStateUnsafe.
friend class gc::collector::SemiSpace; // For getting stack traces.
friend class Runtime; // For CreatePeer.
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
index 1364b558ec..7e1df6b267 100644
--- a/tools/cpp-define-generator/constant_thread.def
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -27,5 +27,4 @@ DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST, int32_t, art::kSuspendRequest)
DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest)
DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST, int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)
-
-#undef DEFINE_THREAD_CONSTANT
+DEFINE_THREAD_CONSTANT(INTERPRETER_CACHE_SIZE_LOG2, int32_t, art::Thread::InterpreterCacheSizeLog2())