Support 64-bit volatiles and reference/pointer volatiles.

Change-Id: I62c0751747767c02d6c57afd0fce3db6c8b02510
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 7a31ceb..933fdf5 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -55,6 +55,7 @@
 	src/assembler.cc \
 	src/assembler_arm.cc \
 	src/assembler_x86.cc \
+	src/atomic.cc.arm \
 	src/calling_convention.cc \
 	src/calling_convention_arm.cc \
 	src/calling_convention_x86.cc \
diff --git a/src/atomic.cc b/src/atomic.cc
new file mode 100644
index 0000000..f03cbbc
--- /dev/null
+++ b/src/atomic.cc
@@ -0,0 +1,266 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "atomic.h"
+
+#include <sched.h>
+
+namespace art {
+
+/*
+ * Quasi-atomic 64-bit operations, for platforms that lack the real thing.
+ *
+ * TODO: unify ARMv6/x86/sh implementations using the to-be-written
+ * spin lock implementation.  We don't want to rely on mutex innards,
+ * and it would be great if all platforms were running the same code.
+ */
+
+#if defined(HAVE_MACOSX_IPC)
+
+#include <libkern/OSAtomic.h>
+
+#if defined(__ppc__)        \
+    || defined(__PPC__)     \
+    || defined(__powerpc__) \
+    || defined(__powerpc)   \
+    || defined(__POWERPC__) \
+    || defined(_M_PPC)      \
+    || defined(__PPC)
+#define NEED_QUASIATOMICS 1
+#else
+
+int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  return OSAtomicCompareAndSwap64Barrier(old_value, new_value, (int64_t*)addr) == 0;
+}
+
+int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
+  int64_t oldValue;
+  do {
+    oldValue = *addr;
+  } while (QuasiAtomicCas64(oldValue, value, addr));
+  return oldValue;
+}
+
+int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
+  return OSAtomicAdd64Barrier(0, addr);
+}
+#endif
+
+#elif defined(__i386__) || defined(__x86_64__)
+#define NEED_QUASIATOMICS 1
+
+#elif __arm__
+#include <machine/cpu-features.h>
+
+#ifdef __ARM_HAVE_LDREXD
+int64_t QuasiAtomicSwap64(int64_t new_value, volatile int64_t* addr) {
+  int64_t prev;
+  int status;
+  do {
+    __asm__ __volatile__ ("@ QuasiAtomicSwap64\n"
+        "ldrexd     %0, %H0, [%3]\n"
+        "strexd     %1, %4, %H4, [%3]"
+        : "=&r" (prev), "=&r" (status), "+m"(*addr)
+        : "r" (addr), "r" (new_value)
+        : "cc");
+  } while (__builtin_expect(status != 0, 0));
+  return prev;
+}
+
+int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  int64_t prev;
+  int status;
+  do {
+    __asm__ __volatile__ ("@ QuasiAtomicCas64\n"
+        "ldrexd     %0, %H0, [%3]\n"
+        "mov        %1, #0\n"
+        "teq        %0, %4\n"
+        "teqeq      %H0, %H4\n"
+        "strexdeq   %1, %5, %H5, [%3]"
+        : "=&r" (prev), "=&r" (status), "+m"(*addr)
+        : "r" (addr), "Ir" (old_value), "r" (new_value)
+        : "cc");
+  } while (__builtin_expect(status != 0, 0));
+  return prev != old_value;
+}
+
+int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
+  int64_t value;
+  __asm__ __volatile__ ("@ QuasiAtomicRead64\n"
+      "ldrexd     %0, %H0, [%1]"
+      : "=&r" (value)
+      : "r" (addr));
+  return value;
+}
+
+#else
+
+// on the device, we implement the 64-bit atomic operations through
+// mutex locking. normally, this is bad because we must initialize
+// a pthread_mutex_t before being able to use it, and this means
+// having to do an initialization check on each function call, and
+// that's where really ugly things begin...
+//
+// BUT, as a special twist, we take advantage of the fact that in our
+// pthread library, a mutex is simply a volatile word whose value is always
+// initialized to 0. In other words, simply declaring a static mutex
+// object initializes it !
+//
+// another twist is that we use a small array of mutexes to dispatch
+// the contention locks from different memory addresses
+//
+
+#include <pthread.h>
+
+#define  SWAP_LOCK_COUNT  32U
+static pthread_mutex_t  _swap_locks[SWAP_LOCK_COUNT];
+
+#define  SWAP_LOCK(addr) &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
+
+int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
+  pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+  pthread_mutex_lock(lock);
+
+  int64_t oldValue = *addr;
+  *addr = value;
+
+  pthread_mutex_unlock(lock);
+  return oldValue;
+}
+
+int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  int result;
+  pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+  pthread_mutex_lock(lock);
+
+  if (*addr == old_value) {
+    *addr  = new_value;
+    result = 0;
+  } else {
+    result = 1;
+  }
+  pthread_mutex_unlock(lock);
+  return result;
+}
+
+int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
+  int64_t result;
+  pthread_mutex_t*  lock = SWAP_LOCK(addr);
+
+  pthread_mutex_lock(lock);
+  result = *addr;
+  pthread_mutex_unlock(lock);
+  return result;
+}
+
+#endif /*__ARM_HAVE_LDREXD*/
+
+/*****************************************************************************/
+#elif __sh__
+#define NEED_QUASIATOMICS 1
+
+#else
+#error "Unsupported atomic operations for this platform"
+#endif
+
+
+#if NEED_QUASIATOMICS
+
+/* Note that a spinlock is *not* a good idea in general
+ * since they can introduce subtle issues. For example,
+ * a real-time thread trying to acquire a spinlock already
+ * acquired by another thread will never yeld, making the
+ * CPU loop endlessly!
+ *
+ * However, this code is only used on the Linux simulator
+ * so it's probably ok for us.
+ *
+ * The alternative is to use a pthread mutex, but
+ * these must be initialized before being used, and
+ * then you have the problem of lazily initializing
+ * a mutex without any other synchronization primitive.
+ *
+ * TODO: these currently use sched_yield(), which is not guaranteed to
+ * do anything at all.  We need to use dvmIterativeSleep or a wait /
+ * notify mechanism if the initial attempt fails.
+ */
+
+/* global spinlock for all 64-bit quasiatomic operations */
+static int32_t quasiatomic_spinlock = 0;
+
+int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr) {
+  int result;
+
+  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+    Sleep(0);
+#else
+    sched_yield();
+#endif
+  }
+
+  if (*addr == old_value) {
+    *addr = new_value;
+    result = 0;
+  } else {
+    result = 1;
+  }
+
+  android_atomic_release_store(0, &quasiatomic_spinlock);
+
+  return result;
+}
+
+int64_t QuasiAtomicRead64(volatile const int64_t* addr) {
+  int64_t result;
+
+  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+    Sleep(0);
+#else
+    sched_yield();
+#endif
+  }
+
+  result = *addr;
+  android_atomic_release_store(0, &quasiatomic_spinlock);
+
+  return result;
+}
+
+int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr) {
+  int64_t result;
+
+  while (android_atomic_acquire_cas(0, 1, &quasiatomic_spinlock)) {
+#ifdef HAVE_WIN32_THREADS
+    Sleep(0);
+#else
+    sched_yield();
+#endif
+  }
+
+  result = *addr;
+  *addr = value;
+  android_atomic_release_store(0, &quasiatomic_spinlock);
+
+  return result;
+}
+
+#endif /*NEED_QUASIATOMICS*/
+
+}  // namespace art
diff --git a/src/atomic.h b/src/atomic.h
new file mode 100644
index 0000000..e3e4fc0
--- /dev/null
+++ b/src/atomic.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_ATOMIC_H_
+#define ART_SRC_ATOMIC_H_
+
+#include <cutils/atomic.h>          /* use common Android atomic ops */
+#include <cutils/atomic-inline.h>   /* and some uncommon ones */
+
+namespace art {
+
+/*
+ * NOTE: Two "quasiatomic" operations on the exact same memory address
+ * are guaranteed to operate atomically with respect to each other,
+ * but no guarantees are made about quasiatomic operations mixed with
+ * non-quasiatomic operations on the same address, nor about
+ * quasiatomic operations that are performed on partially-overlapping
+ * memory.
+ *
+ * None of these provide a memory barrier.
+ */
+
+/*
+ * Swap the 64-bit value at "addr" with "value".  Returns the previous
+ * value.
+ */
+int64_t QuasiAtomicSwap64(int64_t value, volatile int64_t* addr);
+
+/*
+ * Read the 64-bit value at "addr".
+ */
+int64_t QuasiAtomicRead64(volatile const int64_t* addr);
+
+/*
+ * If the value at "addr" is equal to "old_value", replace it with "new_value"
+ * and return 0.  Otherwise, don't swap, and return nonzero.
+ */
+int QuasiAtomicCas64(int64_t old_value, int64_t new_value, volatile int64_t* addr);
+
+}  // namespace art
+
+#endif  // ART_SRC_ATOMIC_H_
diff --git a/src/heap.cc b/src/heap.cc
index 6228fcc..eb15f91 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -156,6 +156,10 @@
   return obj;
 }
 
+void Heap::WriteBarrier(const Object* obj) {
+  // TODO: mark obj's card.
+}
+
 bool Heap::IsHeapAddress(const Object* obj) {
   // Note: we deliberately don't take the lock here, and mustn't test anything that would
   // require taking the lock.
diff --git a/src/heap.h b/src/heap.h
index 52c1f10..773789b 100644
--- a/src/heap.h
+++ b/src/heap.h
@@ -137,6 +137,10 @@
   // Callers must hold the heap lock.
   static void RecordFreeLocked(Space* space, const Object* object);
 
+  // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
+  // The call is not needed if NULL is stored in the field.
+  static void WriteBarrier(const Object* object);
+
  private:
   // Allocates uninitialized storage.
   static Object* AllocateLocked(size_t num_bytes);
diff --git a/src/mutex.cc b/src/mutex.cc
index 4dba6b7..f9e471b 100644
--- a/src/mutex.cc
+++ b/src/mutex.cc
@@ -18,6 +18,7 @@
 
 #include <errno.h>
 
+#include "heap.h" // for VERIFY_OBJECT_ENABLED
 #include "logging.h"
 #include "utils.h"
 
diff --git a/src/object.cc b/src/object.cc
index d78eb24..373c8c2 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -50,8 +50,6 @@
 }
 
 Class* Field::GetType() const {
-  DCHECK(Runtime::Current()->IsStarted())
-      << "Can't call GetType without an initialized runtime";
   // Do full linkage (which sets dex cache value to speed next call)
   return Runtime::Current()->GetClassLinker()->ResolveType(GetTypeIdx(), this);
 }
@@ -474,7 +472,7 @@
     // Set the low-order bit so a BLX will switch to Thumb mode
     address |= 0x1;
   }
-  SetFieldPtr<uintptr_t>(OFFSET_OF_OBJECT_MEMBER(Method, code_), address, false);
+  SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(Method, code_), reinterpret_cast<const void*>(address), false);
 }
 
 void Method::SetInvokeStub(const ByteArray* invoke_stub_array) {
diff --git a/src/object.h b/src/object.h
index ed41494..b9cab1d 100644
--- a/src/object.h
+++ b/src/object.h
@@ -17,12 +17,10 @@
 #ifndef ART_SRC_OBJECT_H_
 #define ART_SRC_OBJECT_H_
 
-#include <cutils/atomic.h>
-#include <cutils/atomic-inline.h>
-
 #include <vector>
 
 #include "UniquePtr.h"
+#include "atomic.h"
 #include "casts.h"
 #include "constants.h"
 #include "globals.h"
@@ -349,32 +347,18 @@
   // Accessors for Java type fields
   template<class T>
   T GetFieldObject(MemberOffset field_offset, bool is_volatile) const {
-    Heap::VerifyObject(this);
-    DCHECK(Thread::Current() == NULL ||
-           Thread::Current()->CanAccessDirectReferences());
-    const byte* raw_addr = reinterpret_cast<const byte*>(this) +
-        field_offset.Int32Value();
-    if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
-    }
-    T result = *reinterpret_cast<T const *>(raw_addr);
+    DCHECK(Thread::Current() == NULL || Thread::Current()->CanAccessDirectReferences());
+    T result = reinterpret_cast<T>(GetField32(field_offset, is_volatile));
     Heap::VerifyObject(result);
     return result;
   }
 
-  void SetFieldObject(MemberOffset offset, const Object* new_value,
-                      bool is_volatile) {
-    // Avoid verifying this when initializing the Class*
-    if (offset.Int32Value() != ClassOffset().Int32Value()) {
-      Heap::VerifyObject(this);
-    }
+  void SetFieldObject(MemberOffset field_offset, const Object* new_value, bool is_volatile, bool this_is_valid = true) {
     Heap::VerifyObject(new_value);
-    byte* raw_addr = reinterpret_cast<byte*>(this) + offset.Int32Value();
-    if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
+    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile, this_is_valid);
+    if (new_value != NULL) {
+      Heap::WriteBarrier(this);
     }
-    *reinterpret_cast<const Object**>(raw_addr) = new_value;
-    // TODO: write barrier
   }
 
   uint32_t GetField32(MemberOffset field_offset, bool is_volatile) const {
@@ -388,9 +372,11 @@
     }
   }
 
-  void SetField32(MemberOffset offset, uint32_t new_value, bool is_volatile) {
-    Heap::VerifyObject(this);
-    byte* raw_addr = reinterpret_cast<byte*>(this) + offset.Int32Value();
+  void SetField32(MemberOffset field_offset, uint32_t new_value, bool is_volatile, bool this_is_valid = true) {
+    if (this_is_valid) {
+      Heap::VerifyObject(this);
+    }
+    byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
     uint32_t* word_addr = reinterpret_cast<uint32_t*>(raw_addr);
     if (is_volatile) {
       /*
@@ -409,42 +395,39 @@
   uint64_t GetField64(MemberOffset field_offset, bool is_volatile) const {
     Heap::VerifyObject(this);
     const byte* raw_addr = reinterpret_cast<const byte*>(this) + field_offset.Int32Value();
+    const int64_t* addr = reinterpret_cast<const int64_t*>(raw_addr);
     if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
+      uint64_t result = QuasiAtomicRead64(addr);
+      ANDROID_MEMBAR_FULL();
+      return result;
+    } else {
+      return *addr;
     }
-    return *reinterpret_cast<const uint64_t*>(raw_addr);
   }
 
-  void SetField64(MemberOffset offset, uint64_t new_value, bool is_volatile) {
+  void SetField64(MemberOffset field_offset, uint64_t new_value, bool is_volatile) {
     Heap::VerifyObject(this);
-    byte* raw_addr = reinterpret_cast<byte*>(this) + offset.Int32Value();
+    byte* raw_addr = reinterpret_cast<byte*>(this) + field_offset.Int32Value();
+    int64_t* addr = reinterpret_cast<int64_t*>(raw_addr);
     if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
+      ANDROID_MEMBAR_STORE();
+      QuasiAtomicSwap64(new_value, addr);
+      // Post-store barrier not required due to use of atomic op or mutex.
+    } else {
+      *addr = new_value;
     }
-    *reinterpret_cast<uint64_t*>(raw_addr) = new_value;
   }
 
  protected:
   // Accessors for non-Java type fields
   template<class T>
   T GetFieldPtr(MemberOffset field_offset, bool is_volatile) const {
-    Heap::VerifyObject(this);
-    const byte* raw_addr = reinterpret_cast<const byte*>(this) +
-        field_offset.Int32Value();
-    if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
-    }
-    return *reinterpret_cast<T const *>(raw_addr);
+    return reinterpret_cast<T>(GetField32(field_offset, is_volatile));
   }
 
   template<typename T>
-  void SetFieldPtr(MemberOffset offset, T new_value, bool is_volatile) {
-    Heap::VerifyObject(this);
-    byte* raw_addr = reinterpret_cast<byte*>(this) + offset.Int32Value();
-    if (is_volatile) {
-      UNIMPLEMENTED(WARNING);
-    }
-    *reinterpret_cast<T*>(raw_addr) = new_value;
+  void SetFieldPtr(MemberOffset field_offset, T new_value, bool is_volatile) {
+    SetField32(field_offset, reinterpret_cast<uint32_t>(new_value), is_volatile);
   }
 
  private:
@@ -1793,8 +1776,7 @@
   }
 
   void SetIfTable(ObjectArray<InterfaceEntry>* new_iftable) {
-    SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, iftable_),
-		   new_iftable, false);
+    SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable, false);
   }
 
   // Get instance fields
@@ -2091,7 +2073,7 @@
 
 inline void Object::SetClass(Class* new_klass) {
   // new_klass may be NULL prior to class linker initialization
-  SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false);
+  SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Object, klass_), new_klass, false, false);
 }
 
 inline bool Object::InstanceOf(const Class* klass) const {