diff options
author | 2023-05-16 17:07:47 +0100 | |
---|---|---|
committer | 2023-06-01 10:22:47 +0000 | |
commit | 80cb41997a3010ff25022f726c6a11634f5ae1b1 (patch) | |
tree | 163598ac6021a50cdcbefeb4a65dbe1c98cc8fa1 | |
parent | 3092d17a21cd6cd4e28231bcb271ac2a17225413 (diff) |
Use seq-lock for DexCache
Avoid the need for 16-byte atomics.
Test: ./art/test.py -b -r --host --optimizing
Change-Id: I818916dd0429de9b0ef364579264868db1d4082c
-rw-r--r-- | runtime/base/atomic_pair.h | 85 | ||||
-rw-r--r-- | runtime/mirror/dex_cache-inl.h | 4 | ||||
-rw-r--r-- | runtime/mirror/dex_cache.h | 8 |
3 files changed, 57 insertions, 40 deletions
diff --git a/runtime/base/atomic_pair.h b/runtime/base/atomic_pair.h index 1523b3b049..802627b041 100644 --- a/runtime/base/atomic_pair.h +++ b/runtime/base/atomic_pair.h @@ -17,65 +17,82 @@ #ifndef ART_RUNTIME_BASE_ATOMIC_PAIR_H_ #define ART_RUNTIME_BASE_ATOMIC_PAIR_H_ -#include "base/macros.h" +#include <android-base/logging.h> +#include <atomic> #include <type_traits> +#include "base/macros.h" + namespace art { +// Implement 16-byte atomic pair using the seq-lock synchronization algorithm. +// This is currently only used for DexCache. +// +// This uses top 4-bytes of the key as version counter and lock bit, +// which means the stored pair key can not use those bytes. +// +// This allows us to read the cache without exclusive access to the cache line. +// +// The 8-byte atomic pair uses the normal single-instruction implementation. +// +static constexpr uint64_t kSeqMask = (0xFFFFFFFFull << 32); +static constexpr uint64_t kSeqLock = (0x80000000ull << 32); +static constexpr uint64_t kSeqIncr = (0x00000001ull << 32); + // std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations. template <typename IntType> struct PACKED(2 * sizeof(IntType)) AtomicPair { static_assert(std::is_integral_v<IntType>); - constexpr AtomicPair() : first(0), second(0) { } - AtomicPair(IntType f, IntType s) : first(f), second(s) { } - AtomicPair(const AtomicPair&) = default; - AtomicPair& operator=(const AtomicPair&) = default; + AtomicPair(IntType f, IntType s) : key(f), val(s) {} - IntType first; - IntType second; + IntType key; + IntType val; }; template <typename IntType> -ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire( - std::atomic<AtomicPair<IntType>>* target) { +ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(AtomicPair<IntType>* pair) { + static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value); + auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair); return target->load(std::memory_order_acquire); } template <typename IntType> -ALWAYS_INLINE static inline void AtomicPairStoreRelease(std::atomic<AtomicPair<IntType>>* target, +ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<IntType>* pair, AtomicPair<IntType> value) { + static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value); + auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair); target->store(value, std::memory_order_release); } -// LLVM uses generic lock-based implementation for x86_64, we can do better with CMPXCHG16B. -#if defined(__x86_64__) -ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire( - std::atomic<AtomicPair<uint64_t>>* target) { - uint64_t first, second; - __asm__ __volatile__( - "lock cmpxchg16b (%2)" - : "=&a"(first), "=&d"(second) - : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0) - : "cc"); - return {first, second}; +ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(AtomicPair<uint64_t>* pair) { + auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key); + auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val); + while (true) { + uint64_t key0 = key_ptr->load(std::memory_order_acquire); + uint64_t val = val_ptr->load(std::memory_order_acquire); + uint64_t key1 = key_ptr->load(std::memory_order_relaxed); + uint64_t key = key0 & ~kSeqMask; + if (LIKELY((key0 & kSeqLock) == 0 && key0 == key1)) { + return {key, val}; + } + } } -ALWAYS_INLINE static inline void AtomicPairStoreRelease( - std::atomic<AtomicPair<uint64_t>>* target, AtomicPair<uint64_t> value) { - uint64_t first, second; - __asm__ __volatile__ ( - "movq (%2), %%rax\n\t" - "movq 8(%2), %%rdx\n\t" - "1:\n\t" - "lock cmpxchg16b (%2)\n\t" - "jnz 1b" - : "=&a"(first), "=&d"(second) - : "r"(target), "b"(value.first), "c"(value.second) - : "cc"); +ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<uint64_t>* pair, + AtomicPair<uint64_t> value) { + DCHECK((value.key & kSeqMask) == 0) << "Key=0x" << std::hex << value.key; + auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key); + auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val); + uint64_t key = key_ptr->load(std::memory_order_relaxed); + do { + key &= ~kSeqLock; // Ensure that the CAS below fails if the lock bit is already set. + } while (!key_ptr->compare_exchange_weak(key, key | kSeqLock)); + key = (((key & kSeqMask) + kSeqIncr) & ~kSeqLock) | (value.key & ~kSeqMask); + val_ptr->store(value.val, std::memory_order_release); + key_ptr->store(key, std::memory_order_release); } -#endif // defined(__x86_64__) } // namespace art diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h index 8b8eecc0cc..dbc091d2af 100644 --- a/runtime/mirror/dex_cache-inl.h +++ b/runtime/mirror/dex_cache-inl.h @@ -110,8 +110,8 @@ inline void NativeDexCachePair<T>::Initialize(std::atomic<NativeDexCachePair<T>> first_elem.object = nullptr; first_elem.index = InvalidIndexForSlot(0); - auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(dex_cache); - AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(first_elem.object), first_elem.index); + auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(dex_cache); + AtomicPair<uintptr_t> v(first_elem.index, reinterpret_cast<size_t>(first_elem.object)); AtomicPairStoreRelease(&array[0], v); } diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h index 20e3e6c253..2b5aa8e911 100644 --- a/runtime/mirror/dex_cache.h +++ b/runtime/mirror/dex_cache.h @@ -142,16 +142,16 @@ template <typename T, size_t size> class NativeDexCachePairArray { private: NativeDexCachePair<T> GetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array, size_t idx) { - auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array); + auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array); AtomicPair<uintptr_t> value = AtomicPairLoadAcquire(&array[idx]); - return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second); + return NativeDexCachePair<T>(reinterpret_cast<T*>(value.val), value.key); } void SetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array, size_t idx, NativeDexCachePair<T> pair) { - auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array); - AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(pair.object), pair.index); + auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array); + AtomicPair<uintptr_t> v(pair.index, reinterpret_cast<size_t>(pair.object)); AtomicPairStoreRelease(&array[idx], v); } |