summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2022-01-17 01:32:55 +0000
committer Hans Boehm <hboehm@google.com> 2022-01-17 01:32:55 +0000
commit3d2f148fe040b60452d5d9be7d08dec693132078 (patch)
tree6a8a0aa66c68e8a4c49833b2a93b263985259aed
parentfa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd (diff)
Revert "Add thread-shared interpreter cache"
This reverts commit fa40e6e318b21d4a1885a6ffea6efc3c0b5cc1cd. Reason for revert: Seeing several different failures that appear related, both test failures and b/214850618. And it appears a potentially significant unresolved comment was overlooked. Change-Id: I2b5260ac7f2168831f0d1b0d7c76b70ecc1fb77d
-rw-r--r--openjdkjvmti/ti_redefine.cc3
-rw-r--r--runtime/base/atomic_pair.h85
-rw-r--r--runtime/interpreter/interpreter_cache-inl.h67
-rw-r--r--runtime/interpreter/interpreter_cache.cc24
-rw-r--r--runtime/interpreter/interpreter_cache.h80
-rw-r--r--runtime/interpreter/interpreter_common.h5
-rw-r--r--runtime/interpreter/mterp/nterp.cc64
-rw-r--r--runtime/mirror/dex_cache-inl.h26
-rw-r--r--runtime/mirror/dex_cache.cc18
-rw-r--r--runtime/mirror/dex_cache.h57
-rw-r--r--runtime/runtime.cc2
-rw-r--r--runtime/thread.cc27
-rw-r--r--runtime/thread.h7
-rw-r--r--runtime/thread_list.cc7
-rw-r--r--runtime/thread_list.h4
-rw-r--r--tools/cpp-define-generator/thread.def7
16 files changed, 179 insertions, 304 deletions
diff --git a/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc
index 16baf935db..37a61d3c54 100644
--- a/openjdkjvmti/ti_redefine.cc
+++ b/openjdkjvmti/ti_redefine.cc
@@ -2929,9 +2929,8 @@ void Redefiner::ClassRedefinition::UpdateClassStructurally(const RedefinitionDat
// TODO We might be able to avoid doing this but given the rather unstructured nature of the
// interpreter cache it's probably not worth the effort.
art::MutexLock mu(driver_->self_, *art::Locks::thread_list_lock_);
- art::InterpreterCache::ClearShared();
driver_->runtime_->GetThreadList()->ForEach(
- [](art::Thread* t) { t->GetInterpreterCache()->ClearThreadLocal(t); });
+ [](art::Thread* t) { t->GetInterpreterCache()->Clear(t); });
}
if (art::kIsDebugBuild) {
diff --git a/runtime/base/atomic_pair.h b/runtime/base/atomic_pair.h
deleted file mode 100644
index 4e43d3062c..0000000000
--- a/runtime/base/atomic_pair.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (C) 2021 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_BASE_ATOMIC_PAIR_H_
-#define ART_RUNTIME_BASE_ATOMIC_PAIR_H_
-
-#include "base/macros.h"
-
-#include <type_traits>
-
-namespace art {
-
-// std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations.
-template <typename IntType>
-struct PACKED(2 * sizeof(IntType)) AtomicPair {
- static_assert(std::is_integral<IntType>::value);
-
- constexpr AtomicPair() : first(0), second(0) { }
- AtomicPair(IntType f, IntType s) : first(f), second(s) { }
- AtomicPair(const AtomicPair&) = default;
- AtomicPair& operator=(const AtomicPair&) = default;
-
- IntType first;
- IntType second;
-};
-
-template <typename IntType>
-ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(
- std::atomic<AtomicPair<IntType>>* target) {
- static_assert(std::atomic<AtomicPair<IntType>>::is_always_lock_free);
- return target->load(std::memory_order_acquire);
-}
-
-template <typename IntType>
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(
- std::atomic<AtomicPair<IntType>>* target, AtomicPair<IntType> value) {
- static_assert(std::atomic<AtomicPair<IntType>>::is_always_lock_free);
- target->store(value, std::memory_order_release);
-}
-
-// llvm does not implement 16-byte atomic operations on x86-64.
-#if defined(__x86_64__)
-ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(
- std::atomic<AtomicPair<uint64_t>>* target) {
- uint64_t first, second;
- __asm__ __volatile__(
- "lock cmpxchg16b (%2)"
- : "=&a"(first), "=&d"(second)
- : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
- : "cc");
- return {first, second};
-}
-
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(
- std::atomic<AtomicPair<uint64_t>>* target, AtomicPair<uint64_t> value) {
- uint64_t first, second;
- __asm__ __volatile__ (
- "movq (%2), %%rax\n\t"
- "movq 8(%2), %%rdx\n\t"
- "1:\n\t"
- "lock cmpxchg16b (%2)\n\t"
- "jnz 1b"
- : "=&a"(first), "=&d"(second)
- : "r"(target), "b"(value.first), "c"(value.second)
- : "cc");
-}
-#endif // defined(__x86_64__)
-
-} // namespace art
-
-#endif // ART_RUNTIME_BASE_ATOMIC_PAIR_H_
-
diff --git a/runtime/interpreter/interpreter_cache-inl.h b/runtime/interpreter/interpreter_cache-inl.h
deleted file mode 100644
index 249df23b27..0000000000
--- a/runtime/interpreter/interpreter_cache-inl.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (C) 2022 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_INL_H_
-#define ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_INL_H_
-
-#include "interpreter_cache.h"
-#include "thread.h"
-
-namespace art {
-
-template<bool kSkipThreadLocal>
-inline bool InterpreterCache::Get(Thread* self, const void* dex_instr, /* out */ size_t* value) {
- DCHECK(self->GetInterpreterCache() == this) << "Must be called from owning thread";
- size_t key = reinterpret_cast<size_t>(dex_instr);
- Entry& local_entry = thread_local_array_[IndexOf<kThreadLocalSize>(key)];
- if (kSkipThreadLocal) {
- DCHECK_NE(local_entry.first, key) << "Expected cache miss";
- } else {
- if (LIKELY(local_entry.first == key)) {
- *value = local_entry.second;
- return true;
- }
- }
- Entry shared_entry = AtomicPairLoadAcquire(&shared_array_[IndexOf<kSharedSize>(key)]);
- if (LIKELY(shared_entry.first == key)) {
- // For simplicity, only update the cache if weak ref accesses are enabled. If
- // they are disabled, this means the GC is processing the cache, and is
- // reading it concurrently.
- if (self->GetWeakRefAccessEnabled()) {
- local_entry = shared_entry; // Copy to local array to make future lookup faster.
- }
- *value = shared_entry.second;
- return true;
- }
- return false;
-}
-
-inline void InterpreterCache::Set(Thread* self, const void* dex_instr, size_t value) {
- DCHECK(self->GetInterpreterCache() == this) << "Must be called from owning thread";
-
- // For simplicity, only update the cache if weak ref accesses are enabled. If
- // they are disabled, this means the GC is processing the cache, and is
- // reading it concurrently.
- if (self->GetWeakRefAccessEnabled()) {
- size_t key = reinterpret_cast<size_t>(dex_instr);
- thread_local_array_[IndexOf<kThreadLocalSize>(key)] = {key, value};
- AtomicPairStoreRelease(&shared_array_[IndexOf<kSharedSize>(key)], {key, value});
- }
-}
-
-} // namespace art
-
-#endif // ART_RUNTIME_INTERPRETER_INTERPRETER_CACHE_INL_H_
diff --git a/runtime/interpreter/interpreter_cache.cc b/runtime/interpreter/interpreter_cache.cc
index 1a35c038e9..e43fe318cc 100644
--- a/runtime/interpreter/interpreter_cache.cc
+++ b/runtime/interpreter/interpreter_cache.cc
@@ -19,30 +19,14 @@
namespace art {
-std::array<std::atomic<InterpreterCache::Entry>,
- InterpreterCache::kSharedSize> InterpreterCache::shared_array_;
-
-InterpreterCache::InterpreterCache() {
- // We can not use the ClearThreadLocal method since the constructor will not
- // be called from the owning thread.
- thread_local_array_.fill(Entry{});
-}
-
-void InterpreterCache::ClearThreadLocal(Thread* owning_thread) {
- // Must be called from the owning thread or when the owning thread is suspended.
+void InterpreterCache::Clear(Thread* owning_thread) {
DCHECK(owning_thread->GetInterpreterCache() == this);
DCHECK(owning_thread == Thread::Current() || owning_thread->IsSuspended());
-
- thread_local_array_.fill(Entry{});
+ data_.fill(Entry{});
}
-void InterpreterCache::ClearShared() {
- // Can be called from any thread since the writes are atomic.
- // The static shared cache isn't bound to specific thread in the first place.
-
- for (std::atomic<Entry>& entry : shared_array_) {
- AtomicPairStoreRelease(&entry, Entry{});
- }
+bool InterpreterCache::IsCalledFromOwningThread() {
+ return Thread::Current()->GetInterpreterCache() == this;
}
} // namespace art
diff --git a/runtime/interpreter/interpreter_cache.h b/runtime/interpreter/interpreter_cache.h
index af025cecbd..0ada562438 100644
--- a/runtime/interpreter/interpreter_cache.h
+++ b/runtime/interpreter/interpreter_cache.h
@@ -20,22 +20,17 @@
#include <array>
#include <atomic>
-#include "base/atomic_pair.h"
#include "base/bit_utils.h"
#include "base/macros.h"
namespace art {
-class Instruction;
class Thread;
// Small fast thread-local cache for the interpreter.
-//
-// The key is an absolute pointer to a dex instruction.
-//
-// The value depends on the opcode of the dex instruction.
+// It can hold arbitrary pointer-sized key-value pair.
+// The interpretation of the value depends on the key.
// Presence of entry might imply some pre-conditions.
-//
// All operations must be done from the owning thread,
// or at a point when the owning thread is suspended.
//
@@ -51,61 +46,52 @@ class Thread;
// from assembly (it ensures that the offset is valid immediate value).
class ALIGNED(16) InterpreterCache {
public:
- using Entry = AtomicPair<size_t>;
-
- static constexpr size_t kThreadLocalSize = 256; // Value of 256 has around 75% cache hit rate.
- static constexpr size_t kSharedSize = 16 * 1024; // Value of 16k has around 90% cache hit rate.
- static constexpr size_t kHashShift = 2; // Number of tailing dex pc bits to drop.
-
- InterpreterCache();
-
- void ClearThreadLocal(Thread* owning_thread);
+ // Aligned since we load the whole entry in single assembly instruction.
+ typedef std::pair<const void*, size_t> Entry ALIGNED(2 * sizeof(size_t));
- static void ClearShared();
+ // 2x size increase/decrease corresponds to ~0.5% interpreter performance change.
+ // Value of 256 has around 75% cache hit rate.
+ static constexpr size_t kSize = 256;
- template<bool kSkipThreadLocal = false>
- ALWAYS_INLINE bool Get(Thread* self, const void* dex_instruction, /* out */ size_t* value);
+ InterpreterCache() {
+ // We can not use the Clear() method since the constructor will not
+ // be called from the owning thread.
+ data_.fill(Entry{});
+ }
- ALWAYS_INLINE void Set(Thread* self, const void* dex_instruction, size_t value);
+ // Clear the whole cache. It requires the owning thread for DCHECKs.
+ void Clear(Thread* owning_thread);
- template<typename Callback>
- void ForEachTheadLocalEntry(Callback&& callback) {
- for (Entry& entry : thread_local_array_) {
- callback(reinterpret_cast<const Instruction*>(entry.first), entry.second);
+ ALWAYS_INLINE bool Get(const void* key, /* out */ size_t* value) {
+ DCHECK(IsCalledFromOwningThread());
+ Entry& entry = data_[IndexOf(key)];
+ if (LIKELY(entry.first == key)) {
+ *value = entry.second;
+ return true;
}
+ return false;
}
- template<typename Callback>
- static void ForEachSharedEntry(Callback&& callback) {
- for (std::atomic<Entry>& atomic_entry : shared_array_) {
- Entry old_entry = AtomicPairLoadAcquire(&atomic_entry);
- Entry new_entry = old_entry;
- callback(reinterpret_cast<const Instruction*>(new_entry.first), new_entry.second);
- if (old_entry.second != new_entry.second) {
- AtomicPairStoreRelease(&atomic_entry, new_entry);
- }
- }
+ ALWAYS_INLINE void Set(const void* key, size_t value) {
+ DCHECK(IsCalledFromOwningThread());
+ data_[IndexOf(key)] = Entry{key, value};
+ }
+
+ std::array<Entry, kSize>& GetArray() {
+ return data_;
}
private:
- template<size_t kSize>
- static ALWAYS_INLINE size_t IndexOf(size_t key) {
+ bool IsCalledFromOwningThread();
+
+ static ALWAYS_INLINE size_t IndexOf(const void* key) {
static_assert(IsPowerOfTwo(kSize), "Size must be power of two");
- size_t index = (key >> kHashShift) & (kSize - 1);
+ size_t index = (reinterpret_cast<uintptr_t>(key) >> 2) & (kSize - 1);
DCHECK_LT(index, kSize);
return index;
}
- // Small cache of fixed size which is always present for every thread.
- // It is stored directly (without indrection) inside the Thread object.
- // This makes it as fast as possible to access from assembly fast-path.
- std::array<Entry, kThreadLocalSize> thread_local_array_;
-
- // Larger cache which is shared by all threads.
- // It is used as next cache level if lookup in the local array fails.
- // It needs to be accessed using atomic operations, and is contended,
- // but the sharing allows it to be larger then the per-thread cache.
- static std::array<std::atomic<Entry>, kSharedSize> shared_array_;
+ std::array<Entry, kSize> data_;
};
} // namespace art
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 4cbe81d0c2..1809227f2c 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -46,7 +46,6 @@
#include "dex/dex_instruction-inl.h"
#include "entrypoints/entrypoint_utils-inl.h"
#include "handle_scope-inl.h"
-#include "interpreter_cache-inl.h"
#include "interpreter_switch_impl.h"
#include "jit/jit-inl.h"
#include "mirror/call_site.h"
@@ -239,7 +238,7 @@ static ALWAYS_INLINE bool DoInvoke(Thread* self,
InterpreterCache* tls_cache = self->GetInterpreterCache();
size_t tls_value;
ArtMethod* resolved_method;
- if (!IsNterpSupported() && LIKELY(tls_cache->Get(self, inst, &tls_value))) {
+ if (!IsNterpSupported() && LIKELY(tls_cache->Get(inst, &tls_value))) {
resolved_method = reinterpret_cast<ArtMethod*>(tls_value);
} else {
ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
@@ -253,7 +252,7 @@ static ALWAYS_INLINE bool DoInvoke(Thread* self,
return false;
}
if (!IsNterpSupported()) {
- tls_cache->Set(self, inst, reinterpret_cast<size_t>(resolved_method));
+ tls_cache->Set(inst, reinterpret_cast<size_t>(resolved_method));
}
}
diff --git a/runtime/interpreter/mterp/nterp.cc b/runtime/interpreter/mterp/nterp.cc
index d49260e56a..670ae1b0c2 100644
--- a/runtime/interpreter/mterp/nterp.cc
+++ b/runtime/interpreter/mterp/nterp.cc
@@ -24,7 +24,6 @@
#include "dex/dex_instruction_utils.h"
#include "debugger.h"
#include "entrypoints/entrypoint_utils-inl.h"
-#include "interpreter/interpreter_cache-inl.h"
#include "interpreter/interpreter_common.h"
#include "interpreter/interpreter_intrinsics.h"
#include "interpreter/shadow_frame-inl.h"
@@ -94,7 +93,12 @@ inline void UpdateHotness(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock
template<typename T>
inline void UpdateCache(Thread* self, uint16_t* dex_pc_ptr, T value) {
DCHECK(kUseReadBarrier) << "Nterp only works with read barriers";
- self->GetInterpreterCache()->Set(self, dex_pc_ptr, value);
+ // For simplicity, only update the cache if weak ref accesses are enabled. If
+ // they are disabled, this means the GC is processing the cache, and is
+ // reading it concurrently.
+ if (self->GetWeakRefAccessEnabled()) {
+ self->GetInterpreterCache()->Set(dex_pc_ptr, value);
+ }
}
template<typename T>
@@ -248,7 +252,7 @@ extern "C" const char* NterpGetShortyFromInvokeCustom(ArtMethod* caller, uint16_
}
FLATTEN
-extern "C" size_t NterpGetMethodSlowPath(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr)
+extern "C" size_t NterpGetMethod(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr)
REQUIRES_SHARED(Locks::mutator_lock_) {
UpdateHotness(caller);
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -406,16 +410,6 @@ extern "C" size_t NterpGetMethodSlowPath(Thread* self, ArtMethod* caller, uint16
}
}
-extern "C" size_t NterpGetMethod(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- InterpreterCache* cache = self->GetInterpreterCache();
- size_t cached_value;
- if (LIKELY(cache->Get</*kSkipThreadLocal=*/true>(self, dex_pc_ptr, &cached_value))) {
- return cached_value;
- }
- return NterpGetMethodSlowPath(self, caller, dex_pc_ptr); // Tail call.
-}
-
FLATTEN
static ArtField* ResolveFieldWithAccessChecks(Thread* self,
ClassLinker* class_linker,
@@ -465,10 +459,10 @@ static ArtField* ResolveFieldWithAccessChecks(Thread* self,
return resolved_field;
}
-extern "C" size_t NterpGetStaticFieldSlowPath(Thread* self,
- ArtMethod* caller,
- uint16_t* dex_pc_ptr,
- size_t resolve_field_type) // Resolve if not zero
+extern "C" size_t NterpGetStaticField(Thread* self,
+ ArtMethod* caller,
+ uint16_t* dex_pc_ptr,
+ size_t resolve_field_type) // Resolve if not zero
REQUIRES_SHARED(Locks::mutator_lock_) {
UpdateHotness(caller);
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -508,25 +502,10 @@ extern "C" size_t NterpGetStaticFieldSlowPath(Thread* self,
}
}
-extern "C" size_t NterpGetStaticField(Thread* self,
- ArtMethod* caller,
- uint16_t* dex_pc_ptr,
- size_t resolve_field_type) // Resolve if not zero
- REQUIRES_SHARED(Locks::mutator_lock_) {
- InterpreterCache* cache = self->GetInterpreterCache();
- size_t cached_value;
- if (LIKELY(cache->Get</*kSkipThreadLocal=*/true>(self, dex_pc_ptr, &cached_value))) {
- return cached_value;
- }
- return NterpGetStaticFieldSlowPath(self, caller, dex_pc_ptr, resolve_field_type);
-}
-
-
-extern "C" uint32_t NterpGetInstanceFieldOffsetSlowPath(
- Thread* self,
- ArtMethod* caller,
- uint16_t* dex_pc_ptr,
- size_t resolve_field_type) // Resolve if not zero
+extern "C" uint32_t NterpGetInstanceFieldOffset(Thread* self,
+ ArtMethod* caller,
+ uint16_t* dex_pc_ptr,
+ size_t resolve_field_type) // Resolve if not zero
REQUIRES_SHARED(Locks::mutator_lock_) {
UpdateHotness(caller);
const Instruction* inst = Instruction::At(dex_pc_ptr);
@@ -553,19 +532,6 @@ extern "C" uint32_t NterpGetInstanceFieldOffsetSlowPath(
return resolved_field->GetOffset().Uint32Value();
}
-extern "C" uint32_t NterpGetInstanceFieldOffset(Thread* self,
- ArtMethod* caller,
- uint16_t* dex_pc_ptr,
- size_t resolve_field_type) // Resolve if not zero
- REQUIRES_SHARED(Locks::mutator_lock_) {
- InterpreterCache* cache = self->GetInterpreterCache();
- size_t cached_value;
- if (LIKELY(cache->Get</*kSkipThreadLocal=*/true>(self, dex_pc_ptr, &cached_value))) {
- return cached_value;
- }
- return NterpGetInstanceFieldOffsetSlowPath(self, caller, dex_pc_ptr, resolve_field_type);
-}
-
extern "C" mirror::Object* NterpGetClassOrAllocateObject(Thread* self,
ArtMethod* caller,
uint16_t* dex_pc_ptr)
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 8a1ed71197..31f2bd2d7b 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -357,18 +357,32 @@ inline void DexCache::SetResolvedMethod(uint32_t method_idx, ArtMethod* method)
template <typename T>
NativeDexCachePair<T> DexCache::GetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array,
size_t idx) {
- auto* array = reinterpret_cast<std::atomic<AtomicPair<size_t>>*>(pair_array);
- AtomicPair<size_t> value = AtomicPairLoadAcquire(&array[idx]);
- return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second);
+ if (kRuntimePointerSize == PointerSize::k64) {
+ auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+ ConversionPair64 value = AtomicLoadRelaxed16B(&array[idx]);
+ return NativeDexCachePair<T>(reinterpret_cast64<T*>(value.first),
+ dchecked_integral_cast<size_t>(value.second));
+ } else {
+ auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+ ConversionPair32 value = array[idx].load(std::memory_order_relaxed);
+ return NativeDexCachePair<T>(reinterpret_cast32<T*>(value.first), value.second);
+ }
}
template <typename T>
void DexCache::SetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array,
size_t idx,
NativeDexCachePair<T> pair) {
- auto* array = reinterpret_cast<std::atomic<AtomicPair<size_t>>*>(pair_array);
- AtomicPair<size_t> v(reinterpret_cast<size_t>(pair.object), pair.index);
- AtomicPairStoreRelease(&array[idx], v);
+ if (kRuntimePointerSize == PointerSize::k64) {
+ auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+ ConversionPair64 v(reinterpret_cast64<uint64_t>(pair.object), pair.index);
+ AtomicStoreRelease16B(&array[idx], v);
+ } else {
+ auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+ ConversionPair32 v(reinterpret_cast32<uint32_t>(pair.object),
+ dchecked_integral_cast<uint32_t>(pair.index));
+ array[idx].store(v, std::memory_order_release);
+ }
}
template <typename T,
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index b7f8ee7a07..c80f9dfe2f 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -126,5 +126,23 @@ ObjPtr<ClassLoader> DexCache::GetClassLoader() {
return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(DexCache, class_loader_));
}
+#if !defined(__aarch64__) && !defined(__x86_64__)
+static pthread_mutex_t dex_cache_slow_atomic_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+DexCache::ConversionPair64 DexCache::AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target) {
+ pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+ DexCache::ConversionPair64 value = *reinterpret_cast<ConversionPair64*>(target);
+ pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+ return value;
+}
+
+void DexCache::AtomicStoreRelease16B(std::atomic<ConversionPair64>* target,
+ ConversionPair64 value) {
+ pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+ *reinterpret_cast<ConversionPair64*>(target) = value;
+ pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+}
+#endif
+
} // namespace mirror
} // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 19197095aa..26fc520cd7 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -149,7 +149,7 @@ class MANAGED DexCache final : public Object {
"String dex cache size is not a power of 2.");
// Size of field dex cache. Needs to be a power of 2 for entrypoint assumptions to hold.
- static constexpr size_t kDexCacheFieldCacheSize = 512;
+ static constexpr size_t kDexCacheFieldCacheSize = 1024;
static_assert(IsPowerOfTwo(kDexCacheFieldCacheSize),
"Field dex cache size is not a power of 2.");
@@ -448,6 +448,19 @@ class MANAGED DexCache final : public Object {
T* AllocArray(MemberOffset obj_offset, MemberOffset num_offset, size_t num)
REQUIRES_SHARED(Locks::mutator_lock_);
+ // std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations,
+ // so we use a custom pair class for loading and storing the NativeDexCachePair<>.
+ template <typename IntType>
+ struct PACKED(2 * sizeof(IntType)) ConversionPair {
+ ConversionPair(IntType f, IntType s) : first(f), second(s) { }
+ ConversionPair(const ConversionPair&) = default;
+ ConversionPair& operator=(const ConversionPair&) = default;
+ IntType first;
+ IntType second;
+ };
+ using ConversionPair32 = ConversionPair<uint32_t>;
+ using ConversionPair64 = ConversionPair<uint64_t>;
+
// Visit instance fields of the dex cache as well as its associated arrays.
template <bool kVisitNativeRoots,
VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -456,6 +469,48 @@ class MANAGED DexCache final : public Object {
void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
+ // Due to lack of 16-byte atomics support, we use hand-crafted routines.
+#if defined(__aarch64__)
+ // 16-byte atomics are supported on aarch64.
+ ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+ std::atomic<ConversionPair64>* target) {
+ return target->load(std::memory_order_relaxed);
+ }
+
+ ALWAYS_INLINE static void AtomicStoreRelease16B(
+ std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+ target->store(value, std::memory_order_release);
+ }
+#elif defined(__x86_64__)
+ ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+ std::atomic<ConversionPair64>* target) {
+ uint64_t first, second;
+ __asm__ __volatile__(
+ "lock cmpxchg16b (%2)"
+ : "=&a"(first), "=&d"(second)
+ : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
+ : "cc");
+ return ConversionPair64(first, second);
+ }
+
+ ALWAYS_INLINE static void AtomicStoreRelease16B(
+ std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+ uint64_t first, second;
+ __asm__ __volatile__ (
+ "movq (%2), %%rax\n\t"
+ "movq 8(%2), %%rdx\n\t"
+ "1:\n\t"
+ "lock cmpxchg16b (%2)\n\t"
+ "jnz 1b"
+ : "=&a"(first), "=&d"(second)
+ : "r"(target), "b"(value.first), "c"(value.second)
+ : "cc");
+ }
+#else
+ static ConversionPair64 AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target);
+ static void AtomicStoreRelease16B(std::atomic<ConversionPair64>* target, ConversionPair64 value);
+#endif
+
HeapReference<ClassLoader> class_loader_;
HeapReference<String> location_;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 2a3afa8c99..54e9d38b3c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -739,7 +739,7 @@ void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) {
// from mutators. See b/32167580.
GetJit()->GetCodeCache()->SweepRootTables(visitor);
}
- Thread::SweepInterpreterCaches(visitor);
+ thread_list_->SweepInterpreterCaches(visitor);
// All other generic system-weak holders.
for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 2b3f47298e..25d493f0a5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4240,28 +4240,27 @@ void Thread::VisitRoots(RootVisitor* visitor) {
}
#pragma GCC diagnostic pop
-void Thread::SweepInterpreterCaches(IsMarkedVisitor* visitor) {
- Thread* self = Thread::Current();
- auto visit = [visitor, self](const Instruction* inst, size_t& value) {
- Locks::mutator_lock_->AssertSharedHeld(self);
+void Thread::SweepInterpreterCache(IsMarkedVisitor* visitor) {
+ for (InterpreterCache::Entry& entry : GetInterpreterCache()->GetArray()) {
+ const Instruction* inst = reinterpret_cast<const Instruction*>(entry.first);
if (inst != nullptr) {
if (inst->Opcode() == Instruction::NEW_INSTANCE ||
inst->Opcode() == Instruction::CHECK_CAST ||
inst->Opcode() == Instruction::INSTANCE_OF ||
inst->Opcode() == Instruction::NEW_ARRAY ||
inst->Opcode() == Instruction::CONST_CLASS) {
- mirror::Class* cls = reinterpret_cast<mirror::Class*>(value);
+ mirror::Class* cls = reinterpret_cast<mirror::Class*>(entry.second);
if (cls == nullptr || cls == Runtime::GetWeakClassSentinel()) {
// Entry got deleted in a previous sweep.
- return;
+ continue;
}
Runtime::ProcessWeakClass(
- reinterpret_cast<GcRoot<mirror::Class>*>(&value),
+ reinterpret_cast<GcRoot<mirror::Class>*>(&entry.second),
visitor,
Runtime::GetWeakClassSentinel());
} else if (inst->Opcode() == Instruction::CONST_STRING ||
inst->Opcode() == Instruction::CONST_STRING_JUMBO) {
- mirror::Object* object = reinterpret_cast<mirror::Object*>(value);
+ mirror::Object* object = reinterpret_cast<mirror::Object*>(entry.second);
mirror::Object* new_object = visitor->IsMarked(object);
// We know the string is marked because it's a strongly-interned string that
// is always alive (see b/117621117 for trying to make those strings weak).
@@ -4269,16 +4268,11 @@ void Thread::SweepInterpreterCaches(IsMarkedVisitor* visitor) {
// null for newly allocated objects, but we know those haven't moved. Therefore,
// only update the entry if we get a different non-null string.
if (new_object != nullptr && new_object != object) {
- value = reinterpret_cast<size_t>(new_object);
+ entry.second = reinterpret_cast<size_t>(new_object);
}
}
}
- };
- InterpreterCache::ForEachSharedEntry(visit);
- MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
- Runtime::Current()->GetThreadList()->ForEach([&visit](Thread* thread) {
- thread->GetInterpreterCache()->ForEachTheadLocalEntry(visit);
- });
+ }
}
// FIXME: clang-r433403 reports the below function exceeds frame size limit.
@@ -4493,10 +4487,9 @@ void Thread::SetReadBarrierEntrypoints() {
void Thread::ClearAllInterpreterCaches() {
static struct ClearInterpreterCacheClosure : Closure {
void Run(Thread* thread) override {
- thread->GetInterpreterCache()->ClearThreadLocal(thread);
+ thread->GetInterpreterCache()->Clear(thread);
}
} closure;
- InterpreterCache::ClearShared();
Runtime::Current()->GetThreadList()->RunCheckpoint(&closure);
}
diff --git a/runtime/thread.h b/runtime/thread.h
index ec971a564f..1085a563c4 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1348,6 +1348,10 @@ class Thread {
return ThreadOffset<pointer_size>(OFFSETOF_MEMBER(Thread, interpreter_cache_));
}
+ static constexpr int InterpreterCacheSizeLog2() {
+ return WhichPowerOf2(InterpreterCache::kSize);
+ }
+
static constexpr uint32_t AllThreadFlags() {
return enum_cast<uint32_t>(ThreadFlag::kLastFlag) |
(enum_cast<uint32_t>(ThreadFlag::kLastFlag) - 1u);
@@ -1522,8 +1526,7 @@ class Thread {
template <bool kPrecise>
void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
- static void SweepInterpreterCaches(IsMarkedVisitor* visitor)
- REQUIRES_SHARED(Locks::mutator_lock_);
+ void SweepInterpreterCache(IsMarkedVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
static bool IsAotCompiler();
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 6482e72417..4e3b40ba7a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -1407,6 +1407,13 @@ void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const {
}
}
+void ThreadList::SweepInterpreterCaches(IsMarkedVisitor* visitor) const {
+ MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+ for (const auto& thread : list_) {
+ thread->SweepInterpreterCache(visitor);
+ }
+}
+
void ThreadList::VisitReflectiveTargets(ReflectiveValueVisitor *visitor) const {
MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
for (const auto& thread : list_) {
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 29b0c52186..f5b58a0c54 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -179,6 +179,10 @@ class ThreadList {
return empty_checkpoint_barrier_.get();
}
+ void SweepInterpreterCaches(IsMarkedVisitor* visitor) const
+ REQUIRES(!Locks::thread_list_lock_)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
void WaitForOtherNonDaemonThreadsToExit(bool check_no_birth = true)
REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
!Locks::mutator_lock_);
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index 07ca8841f8..ec8e28b977 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -30,12 +30,11 @@ ASM_DEFINE(THREAD_ID_OFFSET,
ASM_DEFINE(THREAD_INTERPRETER_CACHE_OFFSET,
art::Thread::InterpreterCacheOffset<art::kRuntimePointerSize>().Int32Value())
ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_LOG2,
- art::WhichPowerOf2(art::InterpreterCache::kThreadLocalSize))
+ art::Thread::InterpreterCacheSizeLog2())
ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_MASK,
- (sizeof(art::InterpreterCache::Entry) * (art::InterpreterCache::kThreadLocalSize - 1)))
+ (sizeof(art::InterpreterCache::Entry) * (art::InterpreterCache::kSize - 1)))
ASM_DEFINE(THREAD_INTERPRETER_CACHE_SIZE_SHIFT,
- (art::WhichPowerOf2(sizeof(art::InterpreterCache::Entry)) -
- art::InterpreterCache::kHashShift))
+ 2)
ASM_DEFINE(THREAD_IS_GC_MARKING_OFFSET,
art::Thread::IsGcMarkingOffset<art::kRuntimePointerSize>().Int32Value())
ASM_DEFINE(THREAD_LOCAL_ALLOC_STACK_END_OFFSET,