diff options
| -rw-r--r-- | libartbase/base/macros.h | 1 | ||||
| -rw-r--r-- | runtime/Android.bp | 1 | ||||
| -rw-r--r-- | runtime/asm_support.h | 4 | ||||
| -rw-r--r-- | runtime/generated/asm_support_gen.h | 2 | ||||
| -rw-r--r-- | runtime/interpreter/interpreter_cache.cc | 32 | ||||
| -rw-r--r-- | runtime/interpreter/interpreter_cache.h | 88 | ||||
| -rw-r--r-- | runtime/native/dalvik_system_DexFile.cc | 3 | ||||
| -rw-r--r-- | runtime/thread.cc | 9 | ||||
| -rw-r--r-- | runtime/thread.h | 29 | ||||
| -rw-r--r-- | tools/cpp-define-generator/constant_thread.def | 3 |
10 files changed, 170 insertions, 2 deletions
diff --git a/libartbase/base/macros.h b/libartbase/base/macros.h index 33866bba08..315f4d265d 100644 --- a/libartbase/base/macros.h +++ b/libartbase/base/macros.h @@ -48,6 +48,7 @@ template<typename T> ART_FRIEND_TEST(test_set_name, individual_test) #define OFFSETOF_MEMBERPTR(t, f) \ (reinterpret_cast<uintptr_t>(&(reinterpret_cast<t*>(16)->*f)) - static_cast<uintptr_t>(16)) // NOLINT +#define ALIGNED(x) __attribute__ ((__aligned__(x))) #define PACKED(x) __attribute__ ((__aligned__(x), __packed__)) // Stringify the argument. diff --git a/runtime/Android.bp b/runtime/Android.bp index 15ccb70df0..f4b8697470 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -93,6 +93,7 @@ libart_cc_defaults { "instrumentation.cc", "intern_table.cc", "interpreter/interpreter.cc", + "interpreter/interpreter_cache.cc", "interpreter/interpreter_common.cc", "interpreter/interpreter_intrinsics.cc", "interpreter/interpreter_switch_impl.cc", diff --git a/runtime/asm_support.h b/runtime/asm_support.h index e65c19495e..00c9360ba4 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -96,6 +96,10 @@ ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET, #define THREAD_LOCAL_ALLOC_STACK_END_OFFSET (THREAD_ROSALLOC_RUNS_OFFSET + 17 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_ALLOC_STACK_END_OFFSET, art::Thread::ThreadLocalAllocStackEndOffset<POINTER_SIZE>().Int32Value()) +// Offset of field Thread::interpreter_cache_. +#define THREAD_INTERPRETER_CACHE_OFFSET (144 + 312 * __SIZEOF_POINTER__) +ADD_TEST_EQ(THREAD_INTERPRETER_CACHE_OFFSET, + art::Thread::InterpreterCacheOffset<POINTER_SIZE>().Int32Value()) // Offsets within ShadowFrame. #define SHADOWFRAME_LINK_OFFSET 0 diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index 464c2b749f..ae31a542b7 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -164,6 +164,8 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<in DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest)))) #define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)))) +#define THREAD_INTERPRETER_CACHE_SIZE_LOG2 8 +DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_INTERPRETER_CACHE_SIZE_LOG2), (static_cast<int32_t>((art::Thread::InterpreterCacheSizeLog2())))) #define JIT_CHECK_OSR (-1) DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR)))) #define JIT_HOTNESS_DISABLE (-2) diff --git a/runtime/interpreter/interpreter_cache.cc b/runtime/interpreter/interpreter_cache.cc new file mode 100644 index 0000000000..e43fe318cc --- /dev/null +++ b/runtime/interpreter/interpreter_cache.cc @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "interpreter_cache.h" +#include "thread-inl.h" + +namespace art { + +void InterpreterCache::Clear(Thread* owning_thread) { + DCHECK(owning_thread->GetInterpreterCache() == this); + DCHECK(owning_thread == Thread::Current() || owning_thread->IsSuspended()); + data_.fill(Entry{}); +} + +bool InterpreterCache::IsCalledFromOwningThread() { + return Thread::Current()->GetInterpreterCache() == this; +} + +} // namespace art diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h new file mode 100644 index 0000000000..c25222eff3 --- /dev/null +++ b/runtime/interpreter/interpreter_cache.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ +#define ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ + +#include <array> +#include <atomic> + +#include "base/bit_utils.h" +#include "base/macros.h" + +namespace art { + +class Instruction; +class Thread; + +// Small fast thread-local cache for the interpreter. +// The key for the cache is the dex instruction pointer. +// The interpretation of the value depends on the opcode. +// Presence of entry might imply some performance pre-conditions. +// All operations must be done from the owning thread, +// or at a point when the owning thread is suspended. +// +// Aligned to 16-bytes to make it easier to get the address of the cache +// from assembly (it ensures that the offset is valid immediate value). +class ALIGNED(16) InterpreterCache { + // Aligned since we load the whole entry in single assembly instruction. + typedef std::pair<const Instruction*, size_t> Entry ALIGNED(2 * sizeof(size_t)); + + public: + // 2x size increase/decrease corresponds to ~0.5% interpreter performance change. + // Value of 256 has around 75% cache hit rate. + static constexpr size_t kSize = 256; + + InterpreterCache() { + // We can not use the Clear() method since the constructor will not + // be called from the owning thread. + data_.fill(Entry{}); + } + + // Clear the whole cache. It requires the owning thread for DCHECKs. + void Clear(Thread* owning_thread); + + ALWAYS_INLINE bool Get(const Instruction* key, /* out */ size_t* value) { + DCHECK(IsCalledFromOwningThread()); + Entry& entry = data_[IndexOf(key)]; + if (LIKELY(entry.first == key)) { + *value = entry.second; + return true; + } + return false; + } + + ALWAYS_INLINE void Set(const Instruction* key, size_t value) { + DCHECK(IsCalledFromOwningThread()); + data_[IndexOf(key)] = Entry{key, value}; + } + + private: + bool IsCalledFromOwningThread(); + + static ALWAYS_INLINE size_t IndexOf(const Instruction* key) { + static_assert(IsPowerOfTwo(kSize), "Size must be power of two"); + size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1); + DCHECK_LT(index, kSize); + return index; + } + + std::array<Entry, kSize> data_; +}; + +} // namespace art + +#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_H_ diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 71fabd0250..0d1fe44725 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -323,6 +323,9 @@ static jboolean DexFile_closeDexFile(JNIEnv* env, jclass, jobject cookie) { } Runtime* const runtime = Runtime::Current(); bool all_deleted = true; + // We need to clear the caches since they may contain pointers to the dex instructions. + // Different dex file can be loaded at the same memory location later by chance. + Thread::ClearAllInterpreterCaches(); { ScopedObjectAccess soa(env); ObjPtr<mirror::Object> dex_files_object = soa.Decode<mirror::Object>(cookie); diff --git a/runtime/thread.cc b/runtime/thread.cc index 8a8f53743e..497b146f17 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -4076,4 +4076,13 @@ void Thread::SetReadBarrierEntrypoints() { UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true); } +void Thread::ClearAllInterpreterCaches() { + static struct ClearInterpreterCacheClosure : Closure { + virtual void Run(Thread* thread) { + thread->GetInterpreterCache()->Clear(thread); + } + } closure; + Runtime::Current()->GetThreadList()->RunCheckpoint(&closure); +} + } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index d169a62198..3c85b80976 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -38,6 +38,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "handle_scope.h" #include "instrumentation.h" +#include "interpreter/interpreter_cache.h" #include "jvalue.h" #include "managed_stack.h" #include "offsets.h" @@ -1299,6 +1300,29 @@ class Thread { jobject thread_group) REQUIRES_SHARED(Locks::mutator_lock_); + ALWAYS_INLINE InterpreterCache* GetInterpreterCache() { + return &interpreter_cache_; + } + + // Clear all thread-local interpreter caches. + // + // Since the caches are keyed by memory pointer to dex instructions, this must be + // called when any dex code is unloaded (before different code gets loaded at the + // same memory location). + // + // If presence of cache entry implies some pre-conditions, this must also be + // called if the pre-conditions might no longer hold true. + static void ClearAllInterpreterCaches(); + + template<PointerSize pointer_size> + static ThreadOffset<pointer_size> InterpreterCacheOffset() { + return ThreadOffset<pointer_size>(OFFSETOF_MEMBER(Thread, interpreter_cache_)); + } + + static int InterpreterCacheSizeLog2() { + return WhichPowerOf2(InterpreterCache::kSize); + } + private: explicit Thread(bool daemon); ~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_); @@ -1788,6 +1812,11 @@ class Thread { // be false for threads where '!can_call_into_java_'. bool can_be_suspended_by_user_code_; + // Small thread-local cache to be used from the interpreter. + // It is keyed by dex instruction pointer. + // The value is opcode-depended (e.g. field offset). + InterpreterCache interpreter_cache_; + friend class Dbg; // For SetStateUnsafe. friend class gc::collector::SemiSpace; // For getting stack traces. friend class Runtime; // For CreatePeer. diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def index 1364b558ec..7e1df6b267 100644 --- a/tools/cpp-define-generator/constant_thread.def +++ b/tools/cpp-define-generator/constant_thread.def @@ -27,5 +27,4 @@ DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST, int32_t, art::kSuspendRequest) DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest) DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest) DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST, int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest) - -#undef DEFINE_THREAD_CONSTANT +DEFINE_THREAD_CONSTANT(INTERPRETER_CACHE_SIZE_LOG2, int32_t, art::Thread::InterpreterCacheSizeLog2()) |