Port my AOSP QuasiAtomic rewrite to art.
Change-Id: I9e8fe487b15083cfc441a90ec1ec0eb5e645229e
diff --git a/src/atomic.cc b/src/atomic.cc
index 0625f1f..b923f91 100644
--- a/src/atomic.cc
+++ b/src/atomic.cc
@@ -16,73 +16,54 @@
#include "atomic.h"
-#include <sched.h>
+#include <pthread.h>
+
+#include "mutex.h"
+#include "stl_util.h"
+#include "stringprintf.h"
+
+#if defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#endif
+#if defined(__arm__)
+#include <machine/cpu-features.h>
+#endif
namespace art {
-/*
- * Quasi-atomic 64-bit operations, for platforms that lack the real thing.
- *
- * TODO: unify ARMv6/x86/sh implementations using the to-be-written
- * spin lock implementation. We don't want to rely on mutex innards,
- * and it would be great if all platforms were running the same code.
- */
-
#if defined(HAVE_MACOSX_IPC)
-
-#include <libkern/OSAtomic.h>
-
-#if defined(__ppc__) \
- || defined(__PPC__) \
- || defined(__powerpc__) \
- || defined(__powerpc) \
- || defined(__POWERPC__) \
- || defined(_M_PPC) \
- || defined(__PPC)
-#define NEED_QUASIATOMICS 1
-#else
-
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
- return OSAtomicCompareAndSwap64Barrier(old_value, new_value, const_cast<int64_t*>(addr)) == 0;
-}
-
-static inline int64_t QuasiAtomicSwap64Impl(int64_t value, volatile int64_t* addr) {
- int64_t old_value;
- do {
- old_value = *addr;
- } while (QuasiAtomicCas64(old_value, value, addr));
- return old_value;
-}
-
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
- return QuasiAtomicSwap64Impl(value, addr);
-}
-
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
- ANDROID_MEMBAR_STORE();
- int64_t old_value = QuasiAtomicSwap64Impl(value, addr);
- /* TUNING: barriers can be avoided on some architectures */
- ANDROID_MEMBAR_FULL();
- return old_value;
-}
-
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
- return OSAtomicAdd64Barrier(0, const_cast<volatile int64_t*>(addr));
-}
-#endif
+#define NEED_MAC_QUASI_ATOMICS 1
#elif defined(__i386__) || defined(__x86_64__)
-#define NEED_QUASIATOMICS 1
+#define NEED_PTHREADS_QUASI_ATOMICS 1
-#elif __arm__
-#include <machine/cpu-features.h>
+#elif defined(__mips__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
-#ifdef __ARM_HAVE_LDREXD
+#elif defined(__arm__)
+
+#if defined(__ARM_HAVE_LDREXD)
+#define NEED_ARM_LDREXD_QUASI_ATOMICS 1
+#else
+#define NEED_PTHREADS_QUASI_ATOMICS 1
+#endif
+
+#elif defined(__sh__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
+
+#else
+#error "QuasiAtomic unsupported on this platform"
+#endif
+
+// *****************************************************************************
+
+#if NEED_ARM_LDREXD_QUASI_ATOMICS
+
static inline int64_t QuasiAtomicSwap64Impl(int64_t new_value, volatile int64_t* addr) {
int64_t prev;
int status;
do {
- __asm__ __volatile__("@ QuasiAtomicSwap64\n"
+ __asm__ __volatile__("@ QuasiAtomic::Swap64\n"
"ldrexd %0, %H0, [%3]\n"
"strexd %1, %4, %H4, [%3]"
: "=&r" (prev), "=&r" (status), "+m"(*addr)
@@ -92,22 +73,31 @@
return prev;
}
-int64_t QuasiAtomicSwap64(int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Swap64(int64_t new_value, volatile int64_t* addr) {
return QuasiAtomicSwap64Impl(new_value, addr);
}
-int64_t QuasiAtomicSwap64Sync(int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Swap64Sync(int64_t new_value, volatile int64_t* addr) {
ANDROID_MEMBAR_STORE();
int64_t old_value = QuasiAtomicSwap64Impl(new_value, addr);
ANDROID_MEMBAR_FULL();
return old_value;
}
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+ int64_t value;
+ __asm__ __volatile__("@ QuasiAtomic::Read64\n"
+ "ldrexd %0, %H0, [%1]"
+ : "=&r" (value)
+ : "r" (addr));
+ return value;
+}
+
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
int64_t prev;
int status;
do {
- __asm__ __volatile__("@ QuasiAtomicCas64\n"
+ __asm__ __volatile__("@ QuasiAtomic::Cas64\n"
"ldrexd %0, %H0, [%3]\n"
"mov %1, #0\n"
"teq %0, %4\n"
@@ -120,180 +110,101 @@
return prev != old_value;
}
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
- int64_t value;
- __asm__ __volatile__("@ QuasiAtomicRead64\n"
- "ldrexd %0, %H0, [%1]"
- : "=&r" (value)
- : "r" (addr));
- return value;
-}
+#endif
-#else
+// *****************************************************************************
-// on the device, we implement the 64-bit atomic operations through
-// mutex locking. normally, this is bad because we must initialize
-// a pthread_mutex_t before being able to use it, and this means
-// having to do an initialization check on each function call, and
-// that's where really ugly things begin...
-//
-// BUT, as a special twist, we take advantage of the fact that in our
-// pthread library, a mutex is simply a volatile word whose value is always
-// initialized to 0. In other words, simply declaring a static mutex
-// object initializes it !
-//
-// another twist is that we use a small array of mutexes to dispatch
-// the contention locks from different memory addresses
-//
+#if NEED_MAC_QUASI_ATOMICS
-#include <pthread.h>
-
-#define SWAP_LOCK_COUNT 32U
-static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT];
-
-#define SWAP_LOCK(addr) &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
-
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
- pthread_mutex_t* lock = SWAP_LOCK(addr);
-
- pthread_mutex_lock(lock);
-
- int64_t old_value = *addr;
- *addr = value;
-
- pthread_mutex_unlock(lock);
+static inline int64_t QuasiAtomicSwap64Impl(int64_t value, volatile int64_t* addr) {
+ int64_t old_value;
+ do {
+ old_value = *addr;
+ } while (QuasiAtomic::Cas64(old_value, value, addr));
return old_value;
}
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
- // Same as QuasiAtomicSwap64 - mutex handles barrier.
- return QuasiAtomicSwap64(value, addr);
+int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
+ return QuasiAtomicSwap64Impl(value, addr);
}
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
- int result;
- pthread_mutex_t* lock = SWAP_LOCK(addr);
+int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
+ ANDROID_MEMBAR_STORE();
+ int64_t old_value = QuasiAtomicSwap64Impl(value, addr);
+ // TUNING: barriers can be avoided on some architectures.
+ ANDROID_MEMBAR_FULL();
+ return old_value;
+}
- pthread_mutex_lock(lock);
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+ return OSAtomicAdd64Barrier(0, const_cast<volatile int64_t*>(addr));
+}
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+ return OSAtomicCompareAndSwap64Barrier(old_value, new_value, const_cast<int64_t*>(addr)) == 0;
+}
+
+#endif
+
+// *****************************************************************************
+
+#if NEED_PTHREADS_QUASI_ATOMICS
+
+// In the absence of a better implementation, we implement the 64-bit atomic
+// operations through mutex locking.
+
+// We stripe across a bunch of different mutexes to reduce contention.
+static const size_t kSwapLockCount = 32;
+static std::vector<Mutex*>* gSwapLocks;
+
+void QuasiAtomic::Startup() {
+ gSwapLocks = new std::vector<Mutex*>;
+ for (size_t i = 0; i < kSwapLockCount; ++i) {
+ gSwapLocks->push_back(new Mutex(StringPrintf("QuasiAtomic stripe %d", i).c_str()));
+ }
+}
+
+void QuasiAtomic::Shutdown() {
+ STLDeleteElements(gSwapLocks);
+ delete gSwapLocks;
+}
+
+static inline Mutex& GetSwapLock(const volatile int64_t* addr) {
+ return *(*gSwapLocks)[((unsigned)(void*)(addr) >> 3U) % kSwapLockCount];
+}
+
+int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
+ MutexLock mu(GetSwapLock(addr));
+ int64_t old_value = *addr;
+ *addr = value;
+ return old_value;
+}
+
+int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
+ // Same as QuasiAtomicSwap64 - mutex handles barrier.
+ return QuasiAtomic::Swap64(value, addr);
+}
+
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+ MutexLock mu(GetSwapLock(addr));
if (*addr == old_value) {
*addr = new_value;
- result = 0;
- } else {
- result = 1;
+ return 0;
}
- pthread_mutex_unlock(lock);
- return result;
+ return 1;
}
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
- int64_t result;
- pthread_mutex_t* lock = SWAP_LOCK(addr);
-
- pthread_mutex_lock(lock);
- result = *addr;
- pthread_mutex_unlock(lock);
- return result;
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+ MutexLock mu(GetSwapLock(addr));
+ return *addr;
}
-#endif /*__ARM_HAVE_LDREXD*/
-
-/*****************************************************************************/
-#elif __sh__
-#define NEED_QUASIATOMICS 1
-
#else
-#error "Unsupported atomic operations for this platform"
+
+// The other implementations don't need any special setup.
+void QuasiAtomic::Startup() {}
+void QuasiAtomic::Shutdown() {}
+
#endif
-
-#if NEED_QUASIATOMICS
-
-/* Note that a spinlock is *not* a good idea in general
- * since they can introduce subtle issues. For example,
- * a real-time thread trying to acquire a spinlock already
- * acquired by another thread will never yeld, making the
- * CPU loop endlessly!
- *
- * However, this code is only used on the Linux simulator
- * so it's probably ok for us.
- *
- * The alternative is to use a pthread mutex, but
- * these must be initialized before being used, and
- * then you have the problem of lazily initializing
- * a mutex without any other synchronization primitive.
- *
- * TODO: these currently use sched_yield(), which is not guaranteed to
- * do anything at all. We need to use dvmIterativeSleep or a wait /
- * notify mechanism if the initial attempt fails.
- */
-
-/* global spinlock for all 64-bit quasiatomic operations */
-static int32_t quasiatomic_spinlock = 0;
-
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
- int result;
-
- while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
- Sleep(0);
-#else
- sched_yield();
-#endif
- }
-
- if (*addr == old_value) {
- *addr = new_value;
- result = 0;
- } else {
- result = 1;
- }
-
- android_atomic_release_store(0, &quasiatomic_spinlock);
-
- return result;
-}
-
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
- int64_t result;
-
- while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
- Sleep(0);
-#else
- sched_yield();
-#endif
- }
-
- result = *addr;
- android_atomic_release_store(0, &quasiatomic_spinlock);
-
- return result;
-}
-
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
- int64_t result;
-
- while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
- Sleep(0);
-#else
- sched_yield();
-#endif
- }
-
- result = *addr;
- *addr = value;
- android_atomic_release_store(0, &quasiatomic_spinlock);
-
- return result;
-}
-
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
- // Same as QuasiAtomicSwap64 - syscall handles barrier.
- return QuasiAtomicSwap64(value, addr);
-}
-
-#endif /*NEED_QUASIATOMICS*/
-
} // namespace art
diff --git a/src/atomic.h b/src/atomic.h
index dab625e..c6c0f7d 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -17,44 +17,46 @@
#ifndef ART_SRC_ATOMIC_H_
#define ART_SRC_ATOMIC_H_
-#include <cutils/atomic.h> /* use common Android atomic ops */
-#include <cutils/atomic-inline.h> /* and some uncommon ones */
+#include <stdint.h>
+
+#include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
+#include "macros.h"
namespace art {
-/*
- * NOTE: Two "quasiatomic" operations on the exact same memory address
- * are guaranteed to operate atomically with respect to each other,
- * but no guarantees are made about quasiatomic operations mixed with
- * non-quasiatomic operations on the same address, nor about
- * quasiatomic operations that are performed on partially-overlapping
- * memory.
- *
- * Only the "Sync" functions provide a memory barrier.
- */
+// NOTE: Two "quasiatomic" operations on the exact same memory address
+// are guaranteed to operate atomically with respect to each other,
+// but no guarantees are made about quasiatomic operations mixed with
+// non-quasiatomic operations on the same address, nor about
+// quasiatomic operations that are performed on partially-overlapping
+// memory.
+//
+// Only the "Sync" functions provide a memory barrier.
+class QuasiAtomic {
+ public:
+ static void Startup();
-/*
- * Swap the 64-bit value at "addr" with "value". Returns the previous
- * value. No memory barriers.
- */
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr);
+ static void Shutdown();
-/*
- * Swap the 64-bit value at "addr" with "value". Returns the previous
- * value. Provides memory barriers.
- */
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr);
+ // Swaps the 64-bit value at "addr" with "value". Returns the previous
+ // value. No memory barriers.
+ static int64_t Swap64(int64_t value, volatile int64_t* addr);
-/*
- * Read the 64-bit value at "addr".
- */
-int64_t QuasiAtomicRead64(volatile const int64_t* addr);
+ // Swaps the 64-bit value at "addr" with "value". Returns the previous
+ // value. Provides memory barriers.
+ static int64_t Swap64Sync(int64_t value, volatile int64_t* addr);
-/*
- * If the value at "addr" is equal to "old_value", replace it with "new_value"
- * and return 0. Otherwise, don't swap, and return nonzero.
- */
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+ // Reads the 64-bit value at "addr".
+ static int64_t Read64(volatile const int64_t* addr);
+
+ // If the value at "addr" is equal to "old_value", replace it with "new_value"
+ // and return 0. Otherwise, don't swap, and return nonzero.
+ static int Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
+};
} // namespace art
diff --git a/src/jdwp/jdwp_handler.cc b/src/jdwp/jdwp_handler.cc
index 355fc5e..73c70cb 100644
--- a/src/jdwp/jdwp_handler.cc
+++ b/src/jdwp/jdwp_handler.cc
@@ -1629,7 +1629,7 @@
* so waitForDebugger() doesn't return if we stall for a bit here.
*/
Dbg::GoActive();
- QuasiAtomicSwap64(0, &lastActivityWhen);
+ QuasiAtomic::Swap64(0, &lastActivityWhen);
}
/*
@@ -1698,7 +1698,7 @@
* the initial setup. Only update if this is a non-DDMS packet.
*/
if (pHeader->cmdSet != kJDWPDdmCmdSet) {
- QuasiAtomicSwap64(MilliTime(), &lastActivityWhen);
+ QuasiAtomic::Swap64(MilliTime(), &lastActivityWhen);
}
/* tell the VM that GC is okay again */
diff --git a/src/jdwp/jdwp_main.cc b/src/jdwp/jdwp_main.cc
index a820cc1..df24b8c 100644
--- a/src/jdwp/jdwp_main.cc
+++ b/src/jdwp/jdwp_main.cc
@@ -416,7 +416,7 @@
return -1;
}
- int64_t last = QuasiAtomicRead64(&lastActivityWhen);
+ int64_t last = QuasiAtomic::Read64(&lastActivityWhen);
/* initializing or in the middle of something? */
if (last == 0) {
diff --git a/src/native/sun_misc_Unsafe.cc b/src/native/sun_misc_Unsafe.cc
index 214771b..8cc549a 100644
--- a/src/native/sun_misc_Unsafe.cc
+++ b/src/native/sun_misc_Unsafe.cc
@@ -54,7 +54,7 @@
byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
// Note: android_atomic_cmpxchg() returns 0 on success, not failure.
- int result = QuasiAtomicCas64(expectedValue, newValue, address);
+ int result = QuasiAtomic::Cas64(expectedValue, newValue, address);
return (result == 0);
}
diff --git a/src/object.h b/src/object.h
index 5e67f62..f5970f6 100644
--- a/src/object.h
+++ b/src/object.h
@@ -361,7 +361,7 @@
const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
if (UNLIKELY(is_volatile)) {
- uint64_t result = QuasiAtomicRead64(addr);
+ uint64_t result = QuasiAtomic::Read64(addr);
ANDROID_MEMBAR_FULL();
return result;
} else {
@@ -375,7 +375,7 @@
int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
if (UNLIKELY(is_volatile)) {
ANDROID_MEMBAR_STORE();
- QuasiAtomicSwap64(new_value, addr);
+ QuasiAtomic::Swap64(new_value, addr);
// Post-store barrier not required due to use of atomic op or mutex.
} else {
*addr = new_value;
diff --git a/src/runtime.cc b/src/runtime.cc
index a94a93a..aabd86f 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -108,6 +108,8 @@
delete intern_table_;
delete java_vm_;
Thread::Shutdown();
+ QuasiAtomic::Shutdown();
+
// TODO: acquire a static mutex on Runtime to avoid racing.
CHECK(instance_ == NULL || instance_ == this);
instance_ = NULL;
@@ -615,6 +617,8 @@
}
VLOG(startup) << "Runtime::Init -verbose:startup enabled";
+ QuasiAtomic::Startup();
+
SetJniGlobalsMax(options->jni_globals_max_);
Monitor::Init(options->lock_profiling_threshold_, options->hook_is_sensitive_thread_);