Implement Intel QuasiAtomics.
Don't use striped locks for 64bit atomics on x86.
Modify QuasiAtomic::Swap to be QuasiAtomic::Write that fits our current use of
Swap and is closer to Intel's implementation.
Return that MIPS doesn't support 64bit compare-and-exchanges in AtomicLong.
Set the SSE2 flag for host and target Intel ART builds as our codegen assumes
it.
Change-Id: Ic1cd5c3b06838e42c6f94e0dd91e77a2d0bb5868
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 2556fa2..f2f38e1 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -87,7 +87,11 @@
ART_HOST_CFLAGS := $(art_cflags) -DANDROID_SMP=1 -DART_BASE_ADDRESS=$(IMG_HOST_BASE_ADDRESS)
# The host GCC isn't necessarily new enough to support -Wthread-safety (GCC 4.4).
-ART_HOST_CFLAGS := $(filter-out -Wthread-safety,$(ART_HOST_CFLAGS))
+ART_HOST_CFLAGS := $(filter-out -Wthread-safety,$(ART_HOST_CFLAGS)) -msse2
+
+ifeq ($(TARGET_ARCH),x86)
+ART_TARGET_CFLAGS += -msse2
+endif
ART_TARGET_CFLAGS := $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(IMG_TARGET_BASE_ADDRESS)
ifeq ($(TARGET_CPU_SMP),true)
diff --git a/src/atomic.cc b/src/atomic.cc
index 5bbbb4f..e7bab09 100644
--- a/src/atomic.cc
+++ b/src/atomic.cc
@@ -16,84 +16,94 @@
#include "atomic.h"
-#include <pthread.h>
+#define NEED_SWAP_MUTEXES !defined(__arm__) && !defined(__i386__)
+#if NEED_SWAP_MUTEXES
#include <vector>
-
#include "base/mutex.h"
#include "base/stl_util.h"
#include "base/stringprintf.h"
#include "thread.h"
-
-#if defined(__APPLE__)
-#include <libkern/OSAtomic.h>
-#endif
-#if defined(__arm__)
-#include <machine/cpu-features.h>
#endif
namespace art {
-#if defined(HAVE_MACOSX_IPC)
-#define NEED_MAC_QUASI_ATOMICS 1
+#if NEED_SWAP_MUTEXES
+// We stripe across a bunch of different mutexes to reduce contention.
+static const size_t kSwapMutexCount = 32;
+static std::vector<Mutex*>* gSwapMutexes;
-#elif defined(__i386__) || defined(__x86_64__)
-#define NEED_PTHREADS_QUASI_ATOMICS 1
-
-#elif defined(__mips__)
-#define NEED_PTHREADS_QUASI_ATOMICS 1
-
-#elif defined(__arm__)
-
-#if defined(__ARM_HAVE_LDREXD)
-#define NEED_ARM_LDREXD_QUASI_ATOMICS 1
-#else
-#define NEED_PTHREADS_QUASI_ATOMICS 1
+static Mutex& GetSwapMutex(const volatile int64_t* addr) {
+ return *(*gSwapMutexes)[((unsigned)(void*)(addr) >> 3U) % kSwapMutexCount];
+}
#endif
-#else
-#error "QuasiAtomic unsupported on this platform"
+void QuasiAtomic::Startup() {
+#if NEED_SWAP_MUTEXES
+ gSwapMutexes = new std::vector<Mutex*>;
+ for (size_t i = 0; i < kSwapMutexCount; ++i) {
+ gSwapMutexes->push_back(new Mutex(StringPrintf("QuasiAtomic stripe %d", i).c_str()));
+ }
#endif
-
-// *****************************************************************************
-
-#if NEED_ARM_LDREXD_QUASI_ATOMICS
-
-static inline int64_t QuasiAtomicSwap64Impl(int64_t new_value, volatile int64_t* addr) {
- int64_t prev;
- int status;
- do {
- __asm__ __volatile__("@ QuasiAtomic::Swap64\n"
- "ldrexd %0, %H0, [%3]\n"
- "strexd %1, %4, %H4, [%3]"
- : "=&r" (prev), "=&r" (status), "+m"(*addr)
- : "r" (addr), "r" (new_value)
- : "cc");
- } while (__builtin_expect(status != 0, 0));
- return prev;
}
-int64_t QuasiAtomic::Swap64(int64_t new_value, volatile int64_t* addr) {
- return QuasiAtomicSwap64Impl(new_value, addr);
-}
-
-int64_t QuasiAtomic::Swap64Sync(int64_t new_value, volatile int64_t* addr) {
- ANDROID_MEMBAR_STORE();
- int64_t old_value = QuasiAtomicSwap64Impl(new_value, addr);
- ANDROID_MEMBAR_FULL();
- return old_value;
+void QuasiAtomic::Shutdown() {
+#if NEED_SWAP_MUTEXES
+ STLDeleteElements(gSwapMutexes);
+ delete gSwapMutexes;
+#endif
}
int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
int64_t value;
+#if defined(__arm__)
+ // Exclusive loads are defined not to tear, clearing the exclusive state isn't necessary. If we
+ // have LPAE (such as Cortex-A15) then ldrd would suffice.
__asm__ __volatile__("@ QuasiAtomic::Read64\n"
"ldrexd %0, %H0, [%1]"
: "=&r" (value)
: "r" (addr));
+#elif defined(__i386__)
+ __asm__ __volatile__(
+ "movq %1, %0\n"
+ : "=x" (value)
+ : "m" (*addr));
+#else
+ MutexLock mu(Thread::Current(), GetSwapMutex(addr));
+ return *addr;
+#endif
return value;
}
-int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+void QuasiAtomic::Write64(volatile int64_t* addr, int64_t value) {
+#if defined(__arm__)
+ // The write is done as a swap so that the cache-line is in the exclusive state for the store. If
+ // we know that ARM architecture has LPAE (such as Cortex-A15) this isn't necessary and strd will
+ // suffice.
+ int64_t prev;
+ int status;
+ do {
+ __asm__ __volatile__("@ QuasiAtomic::Write64\n"
+ "ldrexd %0, %H0, [%3]\n"
+ "strexd %1, %4, %H4, [%3]"
+ : "=&r" (prev), "=&r" (status), "+m"(*addr)
+ : "r" (addr), "r" (value)
+ : "cc");
+ } while (__builtin_expect(status != 0, 0));
+#elif defined(__i386__)
+ __asm__ __volatile__(
+ "movq %1, %0"
+ : "=m" (*addr)
+ : "x" (value));
+#else
+ MutexLock mu(Thread::Current(), GetSwapMutex(addr));
+ *addr = value;
+#endif
+}
+
+
+bool QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+#if defined(__arm__)
int64_t prev;
int status;
do {
@@ -108,103 +118,37 @@
: "cc");
} while (__builtin_expect(status != 0, 0));
return prev != old_value;
-}
-
-#endif
-
-// *****************************************************************************
-
-#if NEED_MAC_QUASI_ATOMICS
-
-static inline int64_t QuasiAtomicSwap64Impl(int64_t value, volatile int64_t* addr) {
- int64_t old_value;
- do {
- old_value = *addr;
- } while (QuasiAtomic::Cas64(old_value, value, addr));
- return old_value;
-}
-
-int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
- return QuasiAtomicSwap64Impl(value, addr);
-}
-
-int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
- ANDROID_MEMBAR_STORE();
- int64_t old_value = QuasiAtomicSwap64Impl(value, addr);
- // TUNING: barriers can be avoided on some architectures.
- ANDROID_MEMBAR_FULL();
- return old_value;
-}
-
-int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
- return OSAtomicAdd64Barrier(0, const_cast<volatile int64_t*>(addr));
-}
-
-int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
- return OSAtomicCompareAndSwap64Barrier(old_value, new_value, const_cast<int64_t*>(addr)) == 0;
-}
-
-#endif
-
-// *****************************************************************************
-
-#if NEED_PTHREADS_QUASI_ATOMICS
-
-// In the absence of a better implementation, we implement the 64-bit atomic
-// operations through mutex locking.
-
-// We stripe across a bunch of different mutexes to reduce contention.
-static const size_t kSwapLockCount = 32;
-static std::vector<Mutex*>* gSwapLocks;
-
-void QuasiAtomic::Startup() {
- gSwapLocks = new std::vector<Mutex*>;
- for (size_t i = 0; i < kSwapLockCount; ++i) {
- gSwapLocks->push_back(new Mutex(StringPrintf("QuasiAtomic stripe %d", i).c_str()));
- }
-}
-
-void QuasiAtomic::Shutdown() {
- STLDeleteElements(gSwapLocks);
- delete gSwapLocks;
-}
-
-static inline Mutex& GetSwapLock(const volatile int64_t* addr) {
- return *(*gSwapLocks)[((unsigned)(void*)(addr) >> 3U) % kSwapLockCount];
-}
-
-int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
- MutexLock mu(Thread::Current(), GetSwapLock(addr));
- int64_t old_value = *addr;
- *addr = value;
- return old_value;
-}
-
-int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
- // Same as QuasiAtomicSwap64 - mutex handles barrier.
- return QuasiAtomic::Swap64(value, addr);
-}
-
-int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
- MutexLock mu(Thread::Current(), GetSwapLock(addr));
- if (*addr == old_value) {
- *addr = new_value;
- return 0;
- }
- return 1;
-}
-
-int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
- MutexLock mu(Thread::Current(), GetSwapLock(addr));
- return *addr;
-}
-
+#elif defined(__i386__)
+ // cmpxchg8b implicitly uses %ebx which is also the PIC register.
+ int8_t status;
+ __asm__ __volatile__ (
+ "pushl %%ebx\n"
+ "movl (%3), %%ebx\n"
+ "movl 4(%3), %%ecx\n"
+ "lock cmpxchg8b %1\n"
+ "sete %0\n"
+ "popl %%ebx"
+ : "=R" (status), "+m" (*addr)
+ : "A"(old_value), "D" (&new_value)
+ : "%ecx"
+ );
+ return status != 0;
#else
-
-// The other implementations don't need any special setup.
-void QuasiAtomic::Startup() {}
-void QuasiAtomic::Shutdown() {}
-
+ MutexLock mu(Thread::Current(), GetSwapMutex(addr));
+ if (*addr == old_value) {
+ *addr = new_value;
+ return true;
+ }
+ return false;
#endif
+}
+
+bool QuasiAtomic::LongAtomicsUseMutexes() {
+#if NEED_SWAP_MUTEXES
+ return true;
+#else
+ return false;
+#endif
+}
} // namespace art
diff --git a/src/atomic.h b/src/atomic.h
index c69a9d1..d340dc5 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -20,8 +20,6 @@
#include <stdint.h>
#include "base/macros.h"
-#include "cutils/atomic.h"
-#include "cutils/atomic-inline.h"
namespace art {
@@ -31,28 +29,24 @@
// non-quasiatomic operations on the same address, nor about
// quasiatomic operations that are performed on partially-overlapping
// memory.
-//
-// Only the "Sync" functions provide a memory barrier.
class QuasiAtomic {
public:
static void Startup();
static void Shutdown();
- // Swaps the 64-bit value at "addr" with "value". Returns the previous
- // value. No memory barriers.
- static int64_t Swap64(int64_t value, volatile int64_t* addr);
-
- // Swaps the 64-bit value at "addr" with "value". Returns the previous
- // value. Provides memory barriers.
- static int64_t Swap64Sync(int64_t value, volatile int64_t* addr);
-
- // Reads the 64-bit value at "addr".
+ // Reads the 64-bit value at "addr" without tearing.
static int64_t Read64(volatile const int64_t* addr);
- // If the value at "addr" is equal to "old_value", replace it with "new_value"
- // and return 0. Otherwise, don't swap, and return nonzero.
- static int Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+ // Writes to the 64-bit value at "addr" without tearing.
+ static void Write64(volatile int64_t* addr, int64_t val);
+
+ // Atomically compare the value at "addr" to "old_value", if equal replace it with "new_value"
+ // and return true. Otherwise, don't swap, and return false.
+ static bool Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+
+ // Does the architecture provide reasonable atomic long operations or do we fall back on mutexes?
+ static bool LongAtomicsUseMutexes();
private:
DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
diff --git a/src/jdwp/jdwp_handler.cc b/src/jdwp/jdwp_handler.cc
index bd50c61..cb13695 100644
--- a/src/jdwp/jdwp_handler.cc
+++ b/src/jdwp/jdwp_handler.cc
@@ -1842,7 +1842,7 @@
* so waitForDebugger() doesn't return if we stall for a bit here.
*/
Dbg::GoActive();
- QuasiAtomic::Swap64(0, &last_activity_time_ms_);
+ QuasiAtomic::Write64(&last_activity_time_ms_, 0);
}
/*
@@ -1912,7 +1912,7 @@
* the initial setup. Only update if this is a non-DDMS packet.
*/
if (pHeader->cmdSet != kJDWPDdmCmdSet) {
- QuasiAtomic::Swap64(MilliTime(), &last_activity_time_ms_);
+ QuasiAtomic::Write64(&last_activity_time_ms_, MilliTime());
}
/* tell the VM that GC is okay again */
diff --git a/src/native/java_util_concurrent_atomic_AtomicLong.cc b/src/native/java_util_concurrent_atomic_AtomicLong.cc
index 7caa23f..bf92e12 100644
--- a/src/native/java_util_concurrent_atomic_AtomicLong.cc
+++ b/src/native/java_util_concurrent_atomic_AtomicLong.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
+#include "atomic.h"
#include "jni_internal.h"
-#include "object.h"
namespace art {
static jboolean AtomicLong_VMSupportsCS8(JNIEnv*, jclass) {
- return JNI_TRUE;
+ return QuasiAtomic::LongAtomicsUseMutexes() ? JNI_FALSE : JNI_TRUE;
}
static JNINativeMethod gMethods[] = {
diff --git a/src/native/sun_misc_Unsafe.cc b/src/native/sun_misc_Unsafe.cc
index 5dc32b0..cb06a0b 100644
--- a/src/native/sun_misc_Unsafe.cc
+++ b/src/native/sun_misc_Unsafe.cc
@@ -27,7 +27,7 @@
volatile int32_t* address = reinterpret_cast<volatile int32_t*>(raw_addr);
// Note: android_atomic_release_cas() returns 0 on success, not failure.
int result = android_atomic_release_cas(expectedValue, newValue, address);
- return (result == 0);
+ return (result == 0) ? JNI_TRUE : JNI_FALSE;
}
static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj, jlong offset, jlong expectedValue, jlong newValue) {
@@ -36,8 +36,8 @@
byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
// Note: android_atomic_cmpxchg() returns 0 on success, not failure.
- int result = QuasiAtomic::Cas64(expectedValue, newValue, address);
- return (result == 0);
+ bool success = QuasiAtomic::Cas64(expectedValue, newValue, address);
+ return success ? JNI_TRUE : JNI_FALSE;
}
static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaObj, jlong offset, jobject javaExpectedValue, jobject javaNewValue) {
@@ -53,7 +53,7 @@
if (result == 0) {
Runtime::Current()->GetHeap()->WriteBarrierField(obj, MemberOffset(offset), newValue);
}
- return (result == 0);
+ return (result == 0) ? JNI_TRUE : JNI_FALSE;
}
static jint Unsafe_getInt(JNIEnv* env, jobject, jobject javaObj, jlong offset) {
diff --git a/src/object.h b/src/object.h
index 07bcde1..f02e312 100644
--- a/src/object.h
+++ b/src/object.h
@@ -315,7 +315,7 @@
int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
if (UNLIKELY(is_volatile)) {
ANDROID_MEMBAR_STORE();
- QuasiAtomic::Swap64(new_value, addr);
+ QuasiAtomic::Write64(addr, new_value);
// Post-store barrier not required due to use of atomic op or mutex.
} else {
*addr = new_value;