8 files changed, 156 insertions, 239 deletions
diff --git a/src/atomic.cc b/src/atomic.cc
index 0625f1f7cb..b923f91ebb 100644
--- a/src/atomic.cc
+++ b/src/atomic.cc
@@ -16,73 +16,54 @@
 
 #include "atomic.h"
 
-#include <sched.h>
+#include <pthread.h>
 
-namespace art {
+#include "mutex.h"
+#include "stl_util.h"
+#include "stringprintf.h"
 
-/*
- * Quasi-atomic 64-bit operations, for platforms that lack the real thing.
- *
- * TODO: unify ARMv6/x86/sh implementations using the to-be-written
- * spin lock implementation.  We don't want to rely on mutex innards,
- * and it would be great if all platforms were running the same code.
- */
+#if defined(__APPLE__)
+#include <libkern/OSAtomic.h>
+#endif
+#if defined(__arm__)
+#include <machine/cpu-features.h>
+#endif
+
+namespace art {
 
 #if defined(HAVE_MACOSX_IPC)
+#define NEED_MAC_QUASI_ATOMICS 1
 
-#include <libkern/OSAtomic.h>
+#elif defined(__i386__) || defined(__x86_64__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
-#if defined(__ppc__)        \
-    || defined(__PPC__)     \
-    || defined(__powerpc__) \
-    || defined(__powerpc)   \
-    || defined(__POWERPC__) \
-    || defined(_M_PPC)      \
-    || defined(__PPC)
-#define NEED_QUASIATOMICS 1
-#else
+#elif defined(__mips__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
-  return OSAtomicCompareAndSwap64Barrier(old_value, new_value, const_cast<int64_t*>(addr)) == 0;
-}
+#elif defined(__arm__)
 
-static inline int64_t QuasiAtomicSwap64Impl(int64_t value, volatile int64_t* addr) {
-  int64_t old_value;
-  do {
-    old_value = *addr;
-  } while (QuasiAtomicCas64(old_value, value, addr));
-  return old_value;
-}
+#if defined(__ARM_HAVE_LDREXD)
+#define NEED_ARM_LDREXD_QUASI_ATOMICS 1
+#else
+#define NEED_PTHREADS_QUASI_ATOMICS 1
+#endif
 
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
-  return QuasiAtomicSwap64Impl(value, addr);
-}
+#elif defined(__sh__)
+#define NEED_PTHREADS_QUASI_ATOMICS 1
 
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
-  ANDROID_MEMBAR_STORE();
-  int64_t old_value = QuasiAtomicSwap64Impl(value, addr);
-  /* TUNING: barriers can be avoided on some architectures */
-  ANDROID_MEMBAR_FULL();
-  return old_value;
-}
-
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
-  return OSAtomicAdd64Barrier(0, const_cast<volatile int64_t*>(addr));
-}
+#else
+#error "QuasiAtomic unsupported on this platform"
 #endif
 
-#elif defined(__i386__) || defined(__x86_64__)
-#define NEED_QUASIATOMICS 1
+// *****************************************************************************
 
-#elif __arm__
-#include <machine/cpu-features.h>
+#if NEED_ARM_LDREXD_QUASI_ATOMICS
 
-#ifdef __ARM_HAVE_LDREXD
 static inline int64_t QuasiAtomicSwap64Impl(int64_t new_value, volatile int64_t* addr) {
   int64_t prev;
   int status;
   do {
-    __asm__ __volatile__("@ QuasiAtomicSwap64\n"
+    __asm__ __volatile__("@ QuasiAtomic::Swap64\n"
         "ldrexd     %0, %H0, [%3]\n"
         "strexd     %1, %4, %H4, [%3]"
         : "=&r" (prev), "=&r" (status), "+m"(*addr)
@@ -92,22 +73,31 @@ static inline int64_t QuasiAtomicSwap64Impl(int64_t new_value, volatile int64_t*
   return prev;
 }
 
-int64_t QuasiAtomicSwap64(int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Swap64(int64_t new_value, volatile int64_t* addr) {
   return QuasiAtomicSwap64Impl(new_value, addr);
 }
 
-int64_t QuasiAtomicSwap64Sync(int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Swap64Sync(int64_t new_value, volatile int64_t* addr) {
   ANDROID_MEMBAR_STORE();
   int64_t old_value = QuasiAtomicSwap64Impl(new_value, addr);
   ANDROID_MEMBAR_FULL();
   return old_value;
 }
 
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+  int64_t value;
+  __asm__ __volatile__("@ QuasiAtomic::Read64\n"
+      "ldrexd     %0, %H0, [%1]"
+      : "=&r" (value)
+      : "r" (addr));
+  return value;
+}
+
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
   int64_t prev;
   int status;
   do {
-    __asm__ __volatile__("@ QuasiAtomicCas64\n"
+    __asm__ __volatile__("@ QuasiAtomic::Cas64\n"
         "ldrexd     %0, %H0, [%3]\n"
         "mov        %1, #0\n"
         "teq        %0, %4\n"
@@ -120,180 +110,101 @@ int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* add
   return prev != old_value;
 }
 
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
-  int64_t value;
-  __asm__ __volatile__("@ QuasiAtomicRead64\n"
-      "ldrexd     %0, %H0, [%1]"
-      : "=&r" (value)
-      : "r" (addr));
-  return value;
-}
-
-#else
-
-// on the device, we implement the 64-bit atomic operations through
-// mutex locking. normally, this is bad because we must initialize
-// a pthread_mutex_t before being able to use it, and this means
-// having to do an initialization check on each function call, and
-// that's where really ugly things begin...
-//
-// BUT, as a special twist, we take advantage of the fact that in our
-// pthread library, a mutex is simply a volatile word whose value is always
-// initialized to 0. In other words, simply declaring a static mutex
-// object initializes it !
-//
-// another twist is that we use a small array of mutexes to dispatch
-// the contention locks from different memory addresses
-//
-
-#include <pthread.h>
-
-#define SWAP_LOCK_COUNT  32U
-static pthread_mutex_t  _swap_locks[SWAP_LOCK_COUNT];
-
-#define SWAP_LOCK(addr) &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
-
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
-  pthread_mutex_t*  lock = SWAP_LOCK(addr);
+#endif
 
-  pthread_mutex_lock(lock);
+// *****************************************************************************
 
-  int64_t old_value = *addr;
-  *addr = value;
+#if NEED_MAC_QUASI_ATOMICS
 
-  pthread_mutex_unlock(lock);
+static inline int64_t QuasiAtomicSwap64Impl(int64_t value, volatile int64_t* addr) {
+  int64_t old_value;
+  do {
+    old_value = *addr;
+  } while (QuasiAtomic::Cas64(old_value, value, addr));
   return old_value;
 }
 
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
-  // Same as QuasiAtomicSwap64 - mutex handles barrier.
-  return QuasiAtomicSwap64(value, addr);
+int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
+  return QuasiAtomicSwap64Impl(value, addr);
 }
 
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
-  int result;
-  pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-  pthread_mutex_lock(lock);
-
-  if (*addr == old_value) {
-    *addr  = new_value;
-    result = 0;
-  } else {
-    result = 1;
-  }
-  pthread_mutex_unlock(lock);
-  return result;
+int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
+  ANDROID_MEMBAR_STORE();
+  int64_t old_value = QuasiAtomicSwap64Impl(value, addr);
+  // TUNING: barriers can be avoided on some architectures.
+  ANDROID_MEMBAR_FULL();
+  return old_value;
 }
 
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
-  int64_t result;
-  pthread_mutex_t*  lock = SWAP_LOCK(addr);
-
-  pthread_mutex_lock(lock);
-  result = *addr;
-  pthread_mutex_unlock(lock);
-  return result;
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+  return OSAtomicAdd64Barrier(0, const_cast<volatile int64_t*>(addr));
 }
 
-#endif /*__ARM_HAVE_LDREXD*/
-
-/*****************************************************************************/
-#elif __sh__
-#define NEED_QUASIATOMICS 1
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  return OSAtomicCompareAndSwap64Barrier(old_value, new_value, const_cast<int64_t*>(addr)) == 0;
+}
 
-#else
-#error "Unsupported atomic operations for this platform"
 #endif
 
+// *****************************************************************************
 
-#if NEED_QUASIATOMICS
+#if NEED_PTHREADS_QUASI_ATOMICS
 
-/* Note that a spinlock is *not* a good idea in general
- * since they can introduce subtle issues. For example,
- * a real-time thread trying to acquire a spinlock already
- * acquired by another thread will never yeld, making the
- * CPU loop endlessly!
- *
- * However, this code is only used on the Linux simulator
- * so it's probably ok for us.
- *
- * The alternative is to use a pthread mutex, but
- * these must be initialized before being used, and
- * then you have the problem of lazily initializing
- * a mutex without any other synchronization primitive.
- *
- * TODO: these currently use sched_yield(), which is not guaranteed to
- * do anything at all.  We need to use dvmIterativeSleep or a wait /
- * notify mechanism if the initial attempt fails.
- */
+// In the absence of a better implementation, we implement the 64-bit atomic
+// operations through mutex locking.
 
-/* global spinlock for all 64-bit quasiatomic operations */
-static int32_t quasiatomic_spinlock = 0;
+// We stripe across a bunch of different mutexes to reduce contention.
+static const size_t kSwapLockCount = 32;
+static std::vector<Mutex*>* gSwapLocks;
 
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
-  int result;
-
-  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-    Sleep(0);
-#else
-    sched_yield();
-#endif
+void QuasiAtomic::Startup() {
+  gSwapLocks = new std::vector<Mutex*>;
+  for (size_t i = 0; i < kSwapLockCount; ++i) {
+    gSwapLocks->push_back(new Mutex(StringPrintf("QuasiAtomic stripe %d", i).c_str()));
   }
+}
 
-  if (*addr == old_value) {
-    *addr = new_value;
-    result = 0;
-  } else {
-    result = 1;
-  }
+void QuasiAtomic::Shutdown() {
+  STLDeleteElements(gSwapLocks);
+  delete gSwapLocks;
+}
 
-  android_atomic_release_store(0, &quasiatomic_spinlock);
+static inline Mutex& GetSwapLock(const volatile int64_t* addr) {
+  return *(*gSwapLocks)[((unsigned)(void*)(addr) >> 3U) % kSwapLockCount];
+}
 
-  return result;
+int64_t QuasiAtomic::Swap64(int64_t value, volatile int64_t* addr) {
+  MutexLock mu(GetSwapLock(addr));
+  int64_t old_value = *addr;
+  *addr = value;
+  return old_value;
 }
 
-int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
-  int64_t result;
+int64_t QuasiAtomic::Swap64Sync(int64_t value, volatile int64_t* addr) {
+  // Same as QuasiAtomicSwap64 - mutex handles barrier.
+  return QuasiAtomic::Swap64(value, addr);
+}
 
-  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-    Sleep(0);
-#else
-    sched_yield();
-#endif
+int QuasiAtomic::Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  MutexLock mu(GetSwapLock(addr));
+  if (*addr == old_value) {
+    *addr  = new_value;
+    return 0;
   }
-
-  result = *addr;
-  android_atomic_release_store(0, &quasiatomic_spinlock);
-
-  return result;
+  return 1;
 }
 
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
-  int64_t result;
+int64_t QuasiAtomic::Read64(volatile const int64_t* addr) {
+  MutexLock mu(GetSwapLock(addr));
+  return *addr;
+}
 
-  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
-#ifdef HAVE_WIN32_THREADS
-    Sleep(0);
 #else
-    sched_yield();
-#endif
-  }
-
-  result = *addr;
-  *addr = value;
-  android_atomic_release_store(0, &quasiatomic_spinlock);
 
-  return result;
-}
+// The other implementations don't need any special setup.
+void QuasiAtomic::Startup() {}
+void QuasiAtomic::Shutdown() {}
 
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr) {
-  // Same as QuasiAtomicSwap64 - syscall handles barrier.
-  return QuasiAtomicSwap64(value, addr);
-}
-
-#endif /*NEED_QUASIATOMICS*/
+#endif
 
 }  // namespace art
diff --git a/src/atomic.h b/src/atomic.h
index dab625e4b6..c6c0f7d5ca 100644
--- a/src/atomic.h
+++ b/src/atomic.h
@@ -17,44 +17,46 @@
 #ifndef ART_SRC_ATOMIC_H_
 #define ART_SRC_ATOMIC_H_
 
-#include <cutils/atomic.h>          /* use common Android atomic ops */
-#include <cutils/atomic-inline.h>   /* and some uncommon ones */
+#include <stdint.h>
 
-namespace art {
-
-/*
- * NOTE: Two "quasiatomic" operations on the exact same memory address
- * are guaranteed to operate atomically with respect to each other,
- * but no guarantees are made about quasiatomic operations mixed with
- * non-quasiatomic operations on the same address, nor about
- * quasiatomic operations that are performed on partially-overlapping
- * memory.
- *
- * Only the "Sync" functions provide a memory barrier.
- */
-
-/*
- * Swap the 64-bit value at "addr" with "value".  Returns the previous
- * value. No memory barriers.
- */
-int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr);
+#include "cutils/atomic.h"
+#include "cutils/atomic-inline.h"
+#include "macros.h"
 
-/*
- * Swap the 64-bit value at "addr" with "value".  Returns the previous
- * value. Provides memory barriers.
- */
-int64_t QuasiAtomicSwap64Sync(int64_t value, volatile int64_t* addr);
-
-/*
- * Read the 64-bit value at "addr".
- */
-int64_t QuasiAtomicRead64(volatile const int64_t* addr);
+namespace art {
 
-/*
- * If the value at "addr" is equal to "old_value", replace it with "new_value"
- * and return 0.  Otherwise, don't swap, and return nonzero.
- */
-int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+// NOTE: Two "quasiatomic" operations on the exact same memory address
+// are guaranteed to operate atomically with respect to each other,
+// but no guarantees are made about quasiatomic operations mixed with
+// non-quasiatomic operations on the same address, nor about
+// quasiatomic operations that are performed on partially-overlapping
+// memory.
+//
+// Only the "Sync" functions provide a memory barrier.
+class QuasiAtomic {
+ public:
+  static void Startup();
+
+  static void Shutdown();
+
+  // Swaps the 64-bit value at "addr" with "value".  Returns the previous
+  // value. No memory barriers.
+  static int64_t Swap64(int64_t value, volatile int64_t* addr);
+
+  // Swaps the 64-bit value at "addr" with "value".  Returns the previous
+  // value. Provides memory barriers.
+  static int64_t Swap64Sync(int64_t value, volatile int64_t* addr);
+
+  // Reads the 64-bit value at "addr".
+  static int64_t Read64(volatile const int64_t* addr);
+
+  // If the value at "addr" is equal to "old_value", replace it with "new_value"
+  // and return 0. Otherwise, don't swap, and return nonzero.
+  static int Cas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(QuasiAtomic);
+};
 
 }  // namespace art
 
diff --git a/src/jdwp/jdwp_handler.cc b/src/jdwp/jdwp_handler.cc
index 355fc5e743..73c70cb735 100644
--- a/src/jdwp/jdwp_handler.cc
+++ b/src/jdwp/jdwp_handler.cc
@@ -1629,7 +1629,7 @@ void JdwpState::ProcessRequest(const JdwpReqHeader* pHeader, const uint8_t* buf,
      * so waitForDebugger() doesn't return if we stall for a bit here.
      */
     Dbg::GoActive();
-    QuasiAtomicSwap64(0, &lastActivityWhen);
+    QuasiAtomic::Swap64(0, &lastActivityWhen);
   }
 
   /*
@@ -1698,7 +1698,7 @@ void JdwpState::ProcessRequest(const JdwpReqHeader* pHeader, const uint8_t* buf,
    * the initial setup.  Only update if this is a non-DDMS packet.
    */
   if (pHeader->cmdSet != kJDWPDdmCmdSet) {
-    QuasiAtomicSwap64(MilliTime(), &lastActivityWhen);
+    QuasiAtomic::Swap64(MilliTime(), &lastActivityWhen);
   }
 
   /* tell the VM that GC is okay again */
diff --git a/src/jdwp/jdwp_main.cc b/src/jdwp/jdwp_main.cc
index a820cc1931..df24b8c285 100644
--- a/src/jdwp/jdwp_main.cc
+++ b/src/jdwp/jdwp_main.cc
@@ -416,7 +416,7 @@ int64_t JdwpState::LastDebuggerActivity() {
     return -1;
   }
 
-  int64_t last = QuasiAtomicRead64(&lastActivityWhen);
+  int64_t last = QuasiAtomic::Read64(&lastActivityWhen);
 
   /* initializing or in the middle of something? */
   if (last == 0) {
diff --git a/src/native/sun_misc_Unsafe.cc b/src/native/sun_misc_Unsafe.cc
index 214771b2a3..8cc549acfd 100644
--- a/src/native/sun_misc_Unsafe.cc
+++ b/src/native/sun_misc_Unsafe.cc
@@ -54,7 +54,7 @@ static jboolean Unsafe_compareAndSwapLong(JNIEnv* env, jobject, jobject javaObj,
   byte* raw_addr = reinterpret_cast<byte*>(obj) + offset;
   volatile int64_t* address = reinterpret_cast<volatile int64_t*>(raw_addr);
   // Note: android_atomic_cmpxchg() returns 0 on success, not failure.
-  int result = QuasiAtomicCas64(expectedValue, newValue, address);
+  int result = QuasiAtomic::Cas64(expectedValue, newValue, address);
   return (result == 0);
 }
 
diff --git a/src/object.h b/src/object.h
index 5e67f625db..f5970f65df 100644
--- a/src/object.h
+++ b/src/object.h
@@ -361,7 +361,7 @@ class MANAGED Object {
     const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
     const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
     if (UNLIKELY(is_volatile)) {
-      uint64_t result = QuasiAtomicRead64(addr);
+      uint64_t result = QuasiAtomic::Read64(addr);
       ANDROID_MEMBAR_FULL();
       return result;
     } else {
@@ -375,7 +375,7 @@ class MANAGED Object {
     int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
     if (UNLIKELY(is_volatile)) {
       ANDROID_MEMBAR_STORE();
-      QuasiAtomicSwap64(new_value, addr);
+      QuasiAtomic::Swap64(new_value, addr);
       // Post-store barrier not required due to use of atomic op or mutex.
     } else {
       *addr = new_value;
diff --git a/src/runtime.cc b/src/runtime.cc
index a94a93a047..aabd86f80e 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -108,6 +108,8 @@ Runtime::~Runtime() {
   delete intern_table_;
   delete java_vm_;
   Thread::Shutdown();
+  QuasiAtomic::Shutdown();
+
   // TODO: acquire a static mutex on Runtime to avoid racing.
   CHECK(instance_ == NULL || instance_ == this);
   instance_ = NULL;
@@ -615,6 +617,8 @@ bool Runtime::Init(const Options& raw_options, bool ignore_unrecognized) {
   }
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
+  QuasiAtomic::Startup();
+
   SetJniGlobalsMax(options->jni_globals_max_);
   Monitor::Init(options->lock_profiling_threshold_, options->hook_is_sensitive_thread_);
 
diff --git a/src/thread.cc b/src/thread.cc
index e5e985c366..4554cee4b6 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -412,9 +412,9 @@ void Thread::InitStackHwm() {
       size_t old_stack_size = stack_size_;
       stack_size_ = default_stack_size;
       stack_begin_ += (old_stack_size - stack_size_);
-      LOG(WARNING) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
-                   << " to " << PrettySize(stack_size_)
-                   << " with base " << reinterpret_cast<void*>(stack_begin_);
+      VLOG(threads) << "Limiting unlimited stack (reported as " << PrettySize(old_stack_size) << ")"
+                    << " to " << PrettySize(stack_size_)
+                    << " with base " << reinterpret_cast<void*>(stack_begin_);
     }
   }
 #endif