summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libartbase/base/macros.h1
-rw-r--r--runtime/Android.bp1
-rw-r--r--runtime/asm_support.h4
-rw-r--r--runtime/generated/asm_support_gen.h2
-rw-r--r--runtime/interpreter/interpreter_cache.cc32
-rw-r--r--runtime/interpreter/interpreter_cache.h88
-rw-r--r--runtime/native/dalvik_system_DexFile.cc3
-rw-r--r--runtime/thread.cc9
-rw-r--r--runtime/thread.h29
-rw-r--r--tools/cpp-define-generator/constant_thread.def3
10 files changed, 170 insertions, 2 deletions
diff --git a/libartbase/base/macros.h b/libartbase/base/macros.h
index 33866bba08..315f4d265d 100644
--- a/libartbase/base/macros.h
+++ b/libartbase/base/macros.h
@@ -48,6 +48,7 @@ template<typename T> ART_FRIEND_TEST(test_set_name, individual_test)
#define OFFSETOF_MEMBERPTR(t, f) \
(reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT
+#define ALIGNED(x) __attribute__ ((__aligned__(x)))
#define PACKED(x) __attribute__ ((__aligned__(x), __packed__))
// Stringify the argument.
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 15ccb70df0..f4b8697470 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -93,6 +93,7 @@ libart_cc_defaults {
"instrumentation.cc",
"intern_table.cc",
"interpreter/interpreter.cc",
+ "interpreter/interpreter_cache.cc",
"interpreter/interpreter_common.cc",
"interpreter/interpreter_intrinsics.cc",
"interpreter/interpreter_switch_impl.cc",
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index e65c19495e..00c9360ba4 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -96,6 +96,10 @@ ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET,
#define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,
art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value())
+// Offset of field Thread::interpreter_cache_.
+#define THREAD_INTERPRETER_CACHE_OFFSET (144 + 312 * __SIZEOF_POINTER__)
+ADD_TEST_EQ(THREAD_INTERPRETER_CACHE_OFFSET,
+ art::Thread::InterpreterCacheOffset<POINTER_SIZE>().Int32Value())
// Offsets within ShadowFrame.
#define SHADOWFRAME_LINK_OFFSET 0
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 464c2b749f..ae31a542b7 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -164,6 +164,8 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<in
DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest))))
#define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7
DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest))))
+#define THREAD_INTERPRETER_CACHE_SIZE_LOG2 8
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_INTERPRETER_CACHE_SIZE_LOG2), (static_cast<int32_t>((art::Thread::InterpreterCacheSizeLog2()))))
#define JIT_CHECK_OSR (-1)
DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
#define JIT_HOTNESS_DISABLE (-2)
diff --git a/runtime/interpreter/interpreter_cache.cc b/runtime/interpreter/interpreter_cache.cc
new file mode 100644
index 0000000000..e43fe318cc
--- /dev/null
+++ b/runtime/interpreter/interpreter_cache.cc
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter_cache.h"
+#include "thread-inl.h"
+
+namespace art {
+
+void InterpreterCache::Clear(Thread* owning_thread) {
+ DCHECK(owning_thread->GetInterpreterCache() == this);
+ DCHECK(owning_thread == Thread::Current() || owning_thread->IsSuspended());
+ data_.fill(Entry{});
+}
+
+bool InterpreterCache::IsCalledFromOwningThread() {
+ return Thread::Current()->GetInterpreterCache() == this;
+}
+
+} // namespace art
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
new file mode 100644
index 0000000000..c25222eff3
--- /dev/null
+++ b/runtime/interpreter/interpreter_cache.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
+
+#include <array>
+#include <atomic>
+
+#include "base/bit_utils.h"
+#include "base/macros.h"
+
+namespace art {
+
+class Instruction;
+class Thread;
+
+// Small fast thread-local cache for the interpreter.
+// The key for the cache is the dex instruction pointer.
+// The interpretation of the value depends on the opcode.
+// Presence of entry might imply some performance pre-conditions.
+// All operations must be done from the owning thread,
+// or at a point when the owning thread is suspended.
+//
+// Aligned to 16-bytes to make it easier to get the address of the cache
+// from assembly (it ensures that the offset is valid immediate value).
+class ALIGNED(16) InterpreterCache {
+ // Aligned since we load the whole entry in single assembly instruction.
+ typedef std::pair<const Instruction*, size_t> Entry ALIGNED(2 * sizeof(size_t));
+
+ public:
+ // 2x size increase/decrease corresponds to ~0.5% interpreter performance change.
+ // Value of 256 has around 75% cache hit rate.
+ static constexpr size_t kSize = 256;
+
+ InterpreterCache() {
+ // We can not use the Clear() method since the constructor will not
+ // be called from the owning thread.
+ data_.fill(Entry{});
+ }
+
+ // Clear the whole cache. It requires the owning thread for DCHECKs.
+ void Clear(Thread* owning_thread);
+
+ ALWAYS_INLINE bool Get(const Instruction* key, /* out */ size_t* value) {
+ DCHECK(IsCalledFromOwningThread());
+ Entry& entry = data_[IndexOf(key)];
+ if (LIKELY(entry.first == key)) {
+ *value = entry.second;
+ return true;
+ }
+ return false;
+ }
+
+ ALWAYS_INLINE void Set(const Instruction* key, size_t value) {
+ DCHECK(IsCalledFromOwningThread());
+ data_[IndexOf(key)] = Entry{key, value};
+ }
+
+ private:
+ bool IsCalledFromOwningThread();
+
+ static ALWAYS_INLINE size_t IndexOf(const Instruction* key) {
+ static_assert(IsPowerOfTwo(kSize), "Size must be power of two");
+ size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1);
+ DCHECK_LT(index, kSize);
+ return index;
+ }
+
+ std::array<Entry, kSize> data_;
+};
+
+} // namespace art
+
+#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 71fabd0250..0d1fe44725 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -323,6 +323,9 @@ static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) {
}
Runtime* const runtime = Runtime::Current();
bool all_deleted = true;
+ // We need to clear the caches since they may contain pointers to the dex instructions.
+ // Different dex file can be loaded at the same memory location later by chance.
+ Thread::ClearAllInterpreterCaches();
{
ScopedObjectAccess soa(env);
ObjPtr<mirror::Object> dex_files_object = soa.Decode<mirror::Object>(cookie);
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8a8f53743e..497b146f17 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4076,4 +4076,13 @@ void Thread::SetReadBarrierEntrypoints() {
UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true);
}
+void Thread::ClearAllInterpreterCaches() {
+ static struct ClearInterpreterCacheClosure : Closure {
+ virtual void Run(Thread* thread) {
+ thread->GetInterpreterCache()->Clear(thread);
+ }
+ } closure;
+ Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
+}
+
} // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index d169a62198..3c85b80976 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -38,6 +38,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "handle_scope.h"
#include "instrumentation.h"
+#include "interpreter/interpreter_cache.h"
#include "jvalue.h"
#include "managed_stack.h"
#include "offsets.h"
@@ -1299,6 +1300,29 @@ class Thread {
jobject thread_group)
REQUIRES_SHARED(Locks::mutator_lock_);
+ ALWAYS_INLINE InterpreterCache* GetInterpreterCache() {
+ return &interpreter_cache_;
+ }
+
+ // Clear all thread-local interpreter caches.
+ //
+ // Since the caches are keyed by memory pointer to dex instructions, this must be
+ // called when any dex code is unloaded (before different code gets loaded at the
+ // same memory location).
+ //
+ // If presence of cache entry implies some pre-conditions, this must also be
+ // called if the pre-conditions might no longer hold true.
+ static void ClearAllInterpreterCaches();
+
+ template<PointerSize pointer_size>
+ static ThreadOffset<pointer_size> InterpreterCacheOffset() {
+ return ThreadOffset<pointer_size>(OFFSETOF_MEMBER(Thread, interpreter_cache_));
+ }
+
+ static int InterpreterCacheSizeLog2() {
+ return WhichPowerOf2(InterpreterCache::kSize);
+ }
+
private:
explicit Thread(bool daemon);
~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1788,6 +1812,11 @@ class Thread {
// be false for threads where '!can_call_into_java_'.
bool can_be_suspended_by_user_code_;
+ // Small thread-local cache to be used from the interpreter.
+ // It is keyed by dex instruction pointer.
+ // The value is opcode-depended (e.g. field offset).
+ InterpreterCache interpreter_cache_;
+
friend class Dbg; // For SetStateUnsafe.
friend class gc::collector::SemiSpace; // For getting stack traces.
friend class Runtime; // For CreatePeer.
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
index 1364b558ec..7e1df6b267 100644
--- a/tools/cpp-define-generator/constant_thread.def
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -27,5 +27,4 @@ DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST, int32_t, art::kSuspendRequest)
DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest)
DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST, int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)
-
-#undef DEFINE_THREAD_CONSTANT
+DEFINE_THREAD_CONSTANT(INTERPRETER_CACHE_SIZE_LOG2, int32_t, art::Thread::InterpreterCacheSizeLog2())