Use seq-lock for DexCache

Avoid the need for 16-byte atomics. Test: ./art/test.py -b -r --host --optimizing Change-Id: I818916dd0429de9b0ef364579264868db1d4082c
author: David Srbecky <dsrbecky@google.com> 2023-05-16 17:07:47 +0100
committer: David Srbecky <dsrbecky@google.com> 2023-06-01 10:22:47 +0000
commit: 80cb41997a3010ff25022f726c6a11634f5ae1b1 (patch)
tree: 163598ac6021a50cdcbefeb4a65dbe1c98cc8fa1
parent: 3092d17a21cd6cd4e28231bcb271ac2a17225413 (diff)
3 files changed, 57 insertions, 40 deletions
diff --git a/runtime/base/atomic_pair.h b/runtime/base/atomic_pair.h
index 1523b3b049..802627b041 100644
--- a/runtime/base/atomic_pair.h
+++ b/runtime/base/atomic_pair.h
@@ -17,65 +17,82 @@
 #ifndef ART_RUNTIME_BASE_ATOMIC_PAIR_H_
 #define ART_RUNTIME_BASE_ATOMIC_PAIR_H_
 
-#include "base/macros.h"
+#include <android-base/logging.h>
 
+#include <atomic>
 #include <type_traits>
 
+#include "base/macros.h"
+
 namespace art {
 
+// Implement 16-byte atomic pair using the seq-lock synchronization algorithm.
+// This is currently only used for DexCache.
+//
+// This uses top 4-bytes of the key as version counter and lock bit,
+// which means the stored pair key can not use those bytes.
+//
+// This allows us to read the cache without exclusive access to the cache line.
+//
+// The 8-byte atomic pair uses the normal single-instruction implementation.
+//
+static constexpr uint64_t kSeqMask = (0xFFFFFFFFull << 32);
+static constexpr uint64_t kSeqLock = (0x80000000ull << 32);
+static constexpr uint64_t kSeqIncr = (0x00000001ull << 32);
+
 // std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations.
 template <typename IntType>
 struct PACKED(2 * sizeof(IntType)) AtomicPair {
   static_assert(std::is_integral_v<IntType>);
 
-  constexpr AtomicPair() : first(0), second(0) { }
-  AtomicPair(IntType f, IntType s) : first(f), second(s) { }
-  AtomicPair(const AtomicPair&) = default;
-  AtomicPair& operator=(const AtomicPair&) = default;
+  AtomicPair(IntType f, IntType s) : key(f), val(s) {}
 
-  IntType first;
-  IntType second;
+  IntType key;
+  IntType val;
 };
 
 template <typename IntType>
-ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(
-    std::atomic<AtomicPair<IntType>>* target) {
+ALWAYS_INLINE static inline AtomicPair<IntType> AtomicPairLoadAcquire(AtomicPair<IntType>* pair) {
+  static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value);
+  auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair);
   return target->load(std::memory_order_acquire);
 }
 
 template <typename IntType>
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(std::atomic<AtomicPair<IntType>>* target,
+ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<IntType>* pair,
                                                         AtomicPair<IntType> value) {
+  static_assert(std::is_trivially_copyable<AtomicPair<IntType>>::value);
+  auto* target = reinterpret_cast<std::atomic<AtomicPair<IntType>>*>(pair);
   target->store(value, std::memory_order_release);
 }
 
-// LLVM uses generic lock-based implementation for x86_64, we can do better with CMPXCHG16B.
-#if defined(__x86_64__)
-ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(
-    std::atomic<AtomicPair<uint64_t>>* target) {
-  uint64_t first, second;
-  __asm__ __volatile__(
-      "lock cmpxchg16b (%2)"
-      : "=&a"(first), "=&d"(second)
-      : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
-      : "cc");
-  return {first, second};
+ALWAYS_INLINE static inline AtomicPair<uint64_t> AtomicPairLoadAcquire(AtomicPair<uint64_t>* pair) {
+  auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key);
+  auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val);
+  while (true) {
+    uint64_t key0 = key_ptr->load(std::memory_order_acquire);
+    uint64_t val = val_ptr->load(std::memory_order_acquire);
+    uint64_t key1 = key_ptr->load(std::memory_order_relaxed);
+    uint64_t key = key0 & ~kSeqMask;
+    if (LIKELY((key0 & kSeqLock) == 0 && key0 == key1)) {
+      return {key, val};
+    }
+  }
 }
 
-ALWAYS_INLINE static inline void AtomicPairStoreRelease(
-    std::atomic<AtomicPair<uint64_t>>* target, AtomicPair<uint64_t> value) {
-  uint64_t first, second;
-  __asm__ __volatile__ (
-      "movq (%2), %%rax\n\t"
-      "movq 8(%2), %%rdx\n\t"
-      "1:\n\t"
-      "lock cmpxchg16b (%2)\n\t"
-      "jnz 1b"
-      : "=&a"(first), "=&d"(second)
-      : "r"(target), "b"(value.first), "c"(value.second)
-      : "cc");
+ALWAYS_INLINE static inline void AtomicPairStoreRelease(AtomicPair<uint64_t>* pair,
+                                                        AtomicPair<uint64_t> value) {
+  DCHECK((value.key & kSeqMask) == 0) << "Key=0x" << std::hex << value.key;
+  auto* key_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->key);
+  auto* val_ptr = reinterpret_cast<std::atomic_uint64_t*>(&pair->val);
+  uint64_t key = key_ptr->load(std::memory_order_relaxed);
+  do {
+    key &= ~kSeqLock;  // Ensure that the CAS below fails if the lock bit is already set.
+  } while (!key_ptr->compare_exchange_weak(key, key | kSeqLock));
+  key = (((key & kSeqMask) + kSeqIncr) & ~kSeqLock) | (value.key & ~kSeqMask);
+  val_ptr->store(value.val, std::memory_order_release);
+  key_ptr->store(key, std::memory_order_release);
 }
-#endif  // defined(__x86_64__)
 
 }  // namespace art
 
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 8b8eecc0cc..dbc091d2af 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -110,8 +110,8 @@ inline void NativeDexCachePair<T>::Initialize(std::atomic<NativeDexCachePair<T>>
   first_elem.object = nullptr;
   first_elem.index = InvalidIndexForSlot(0);
 
-  auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(dex_cache);
-  AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(first_elem.object), first_elem.index);
+  auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(dex_cache);
+  AtomicPair<uintptr_t> v(first_elem.index, reinterpret_cast<size_t>(first_elem.object));
   AtomicPairStoreRelease(&array[0], v);
 }
 
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 20e3e6c253..2b5aa8e911 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -142,16 +142,16 @@ template <typename T, size_t size> class NativeDexCachePairArray {
 
  private:
   NativeDexCachePair<T> GetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array, size_t idx) {
-    auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array);
+    auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array);
     AtomicPair<uintptr_t> value = AtomicPairLoadAcquire(&array[idx]);
-    return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second);
+    return NativeDexCachePair<T>(reinterpret_cast<T*>(value.val), value.key);
   }
 
   void SetNativePair(std::atomic<NativeDexCachePair<T>>* pair_array,
                      size_t idx,
                      NativeDexCachePair<T> pair) {
-    auto* array = reinterpret_cast<std::atomic<AtomicPair<uintptr_t>>*>(pair_array);
-    AtomicPair<uintptr_t> v(reinterpret_cast<size_t>(pair.object), pair.index);
+    auto* array = reinterpret_cast<AtomicPair<uintptr_t>*>(pair_array);
+    AtomicPair<uintptr_t> v(pair.index, reinterpret_cast<size_t>(pair.object));
     AtomicPairStoreRelease(&array[idx], v);
   }
author	David Srbecky <dsrbecky@google.com>	2023-05-16 17:07:47 +0100
committer	David Srbecky <dsrbecky@google.com>	2023-06-01 10:22:47 +0000
commit	80cb41997a3010ff25022f726c6a11634f5ae1b1 (patch)
tree	163598ac6021a50cdcbefeb4a65dbe1c98cc8fa1
parent	3092d17a21cd6cd4e28231bcb271ac2a17225413 (diff)