Use seq-lock for DexCache
Avoid the need for 16-byte atomics.
Test: ./art/test.py -b -r --host --optimizing
Change-Id: I818916dd0429de9b0ef364579264868db1d4082c
diff --git a/runtime/base/atomic_pair.h b/runtime/base/atomic_pair.h
index 1523b3b..802627b 100644
--- a/runtime/base/atomic_pair.h
+++ b/runtime/base/atomic_pair.h
@@ -17,65 +17,82 @@
#ifndef ART_RUNTIME_BASE_ATOMIC_PAIR_H_
#define ART_RUNTIME_BASE_ATOMIC_PAIR_H_
-#include "base/macros.h"
+#include <android-base/logging.h>
+#include <atomic>
#include <type_traits>
+#include "base/macros.h"
+
namespace art {
+// Implement 16-byte atomic pair using the seq-lock synchronization algorithm.
+// This is currently only used for DexCache.
+//
+// This uses top 4-bytes of the key as version counter and lock bit,
+// which means the stored pair key can not use those bytes.
+//
+// This allows us to read the cache without exclusive access to the cache line.
+//
+// The 8-byte atomic pair uses the normal single-instruction implementation.
+//
+static constexpr uint64_t kSeqMask = (0xFFFFFFFFull << 32);
+static constexpr uint64_t kSeqLock = (0x80000000ull << 32);
+static constexpr uint64_t kSeqIncr = (0x00000001ull << 32);
+
// std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations.
template <typename IntType>
struct PACKED(2 * sizeof(IntType)) AtomicPair {
static_assert(std::is_integral_v<IntType>);
- constexpr AtomicPair() : first(0), second(0) { }
- AtomicPair(IntType f, IntType s) : first(f), second(s) { }
- AtomicPair(const AtomicPair&) = default;
- AtomicPair& operator=(const AtomicPair&) = default;
+ AtomicPair(IntType f, IntType s) : key(f), val(s) {}
- IntType first;
- IntType second;
+ IntType key;
+ IntType val;
};
template <typename IntType>
-ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(
- std::atomic<AtomicPair<IntType>>* target) {
+ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(AtomicPair<IntType>* pair) {
+ static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value);
+ auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair);
return target->load(std::memory_order_acquire);
}
template <typename IntType>
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(std::atomic<AtomicPair<IntType>>* target,
+ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<IntType>* pair,
AtomicPair<IntType> value) {
+ static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value);
+ auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair);
target->store(value, std::memory_order_release);
}
-// LLVM uses generic lock-based implementation for x86_64, we can do better with CMPXCHG16B.
-#if defined(__x86_64__)
-ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(
- std::atomic<AtomicPair<uint64_t>>* target) {
- uint64_t first, second;
- __asm__ __volatile__(
- "lock cmpxchg16b (%2)"
- : "=&a"(first), "=&d"(second)
- : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
- : "cc");
- return {first, second};
+ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(AtomicPair<uint64_t>* pair) {
+ auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key);
+ auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val);
+ while (true) {
+ uint64_t key0 = key_ptr->load(std::memory_order_acquire);
+ uint64_t val = val_ptr->load(std::memory_order_acquire);
+ uint64_t key1 = key_ptr->load(std::memory_order_relaxed);
+ uint64_t key = key0 & ~kSeqMask;
+ if (LIKELY((key0 & kSeqLock) == 0 && key0 == key1)) {
+ return {key, val};
+ }
+ }
}
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(
- std::atomic<AtomicPair<uint64_t>>* target, AtomicPair<uint64_t> value) {
- uint64_t first, second;
- __asm__ __volatile__ (
- "movq (%2), %%rax\n\t"
- "movq 8(%2), %%rdx\n\t"
- "1:\n\t"
- "lock cmpxchg16b (%2)\n\t"
- "jnz 1b"
- : "=&a"(first), "=&d"(second)
- : "r"(target), "b"(value.first), "c"(value.second)
- : "cc");
+ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<uint64_t>* pair,
+ AtomicPair<uint64_t> value) {
+ DCHECK((value.key & kSeqMask) == 0) << "Key=0x" << std::hex << value.key;
+ auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key);
+ auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val);
+ uint64_t key = key_ptr->load(std::memory_order_relaxed);
+ do {
+ key &= ~kSeqLock; // Ensure that the CAS below fails if the lock bit is already set.
+ } while (!key_ptr->compare_exchange_weak(key, key | kSeqLock));
+ key = (((key & kSeqMask) + kSeqIncr) & ~kSeqLock) | (value.key & ~kSeqMask);
+ val_ptr->store(value.val, std::memory_order_release);
+ key_ptr->store(key, std::memory_order_release);
}
-#endif // defined(__x86_64__)
} // namespace art
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 8b8eecc..dbc091d 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -110,8 +110,8 @@
first_elem.object = nullptr;
first_elem.index = InvalidIndexForSlot(0);
- auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(dex_cache);
- AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(first_elem.object), first_elem.index);
+ auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(dex_cache);
+ AtomicPair<uintptr_t> v(first_elem.index, reinterpret_cast<size_t>(first_elem.object));
AtomicPairStoreRelease(&array[0], v);
}
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 20e3e6c..2b5aa8e 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -142,16 +142,16 @@
private:
NativeDexCachePair<T> GetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array, size_t idx) {
- auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array);
+ auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array);
AtomicPair<uintptr_t> value = AtomicPairLoadAcquire(&array[idx]);
- return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second);
+ return NativeDexCachePair<T>(reinterpret_cast<T*>(value.val), value.key);
}
void SetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array,
size_t idx,
NativeDexCachePair<T> pair) {
- auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array);
- AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(pair.object), pair.index);
+ auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array);
+ AtomicPair<uintptr_t> v(pair.index, reinterpret_cast<size_t>(pair.object));
AtomicPairStoreRelease(&array[idx], v);
}