Move thread state to art::Atomic.

Leaves the CAS operations as relaxed although art::Atomic treats relaxed CAS
as a strong CAS when not compiling with clang.

Change-Id: I6d37c22173540d166b624385e52e4ad05e592adc
diff --git a/runtime/atomic.h b/runtime/atomic.h
index ed83a33..5cfdbcc 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -415,7 +415,7 @@
 };
 
 template<typename T>
-class Atomic {
+class PACKED(sizeof(T)) Atomic {
  private:
   COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index b1180bd..38f1307 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -21,8 +21,6 @@
 
 #include <pthread.h>
 
-#include "cutils/atomic-inline.h"
-
 #include "base/casts.h"
 #include "base/mutex-inl.h"
 #include "gc/heap.h"
@@ -99,9 +97,12 @@
     DCHECK_EQ((old_state_and_flags.as_struct.flags & kCheckpointRequest), 0);
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags;
     new_state_and_flags.as_struct.state = new_state;
-    int status = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                       &tls32_.state_and_flags.as_int);
-    if (LIKELY(status == 0)) {
+
+    // CAS the value without a memory ordering as that is given by the lock release below.
+    bool done =
+        tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+                                                                        new_state_and_flags.as_int);
+    if (LIKELY(done)) {
       break;
     }
   }
@@ -141,9 +142,10 @@
       union StateAndFlags new_state_and_flags;
       new_state_and_flags.as_int = old_state_and_flags.as_int;
       new_state_and_flags.as_struct.state = kRunnable;
-      // CAS the value without a memory barrier, that occurred in the lock above.
-      done = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
-                                &tls32_.state_and_flags.as_int) == 0;
+      // CAS the value without a memory ordering as that is given by the lock acquisition above.
+      done =
+          tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+                                                                          new_state_and_flags.as_int);
     }
     if (UNLIKELY(!done)) {
       // Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d60fb49..4985583 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -34,8 +34,6 @@
 #include "base/mutex.h"
 #include "class_linker.h"
 #include "class_linker-inl.h"
-#include "cutils/atomic.h"
-#include "cutils/atomic-inline.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils.h"
diff --git a/runtime/thread.h b/runtime/thread.h
index 7cd86de..0640b38 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -24,6 +24,7 @@
 #include <memory>
 #include <string>
 
+#include "atomic.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "entrypoints/interpreter/interpreter_entrypoints.h"
@@ -864,6 +865,7 @@
       // change to Runnable as a GC or other operation is in progress.
       volatile uint16_t state;
     } as_struct;
+    AtomicInteger as_atomic_int;
     volatile int32_t as_int;
 
    private:
@@ -871,6 +873,7 @@
     // See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47409
     DISALLOW_COPY_AND_ASSIGN(StateAndFlags);
   };
+  COMPILE_ASSERT(sizeof(StateAndFlags) == sizeof(int32_t), weird_state_and_flags_size);
 
   static void ThreadExitCallback(void* arg);