Always access Thread state and flags as 32-bit location.

Rewrite access to Thread's state and flags to use 32-bit
atomic operations. Avoid `volatile` accesses that prevent
compiler optimizations.

Change `ThreadState` and `ThreadFlag` to `enum class`es.

Golem results for art-opt-cc (higher is better):
linux-ia32                       before after
NativeDowncallStaticNormal       28.162 35.323 (+25.43%)
NativeDowncallStaticNormal6      26.447 32.951 (+24.59%)
NativeDowncallStaticNormalRefs6
NativeDowncallVirtualNormal      27.972 35.027 (+25.22%)
NativeDowncallVirtualNormal6     26.096 32.131 (+23.13%)
NativeDowncallVirtualNormalRefs6 25.922 31.873 (+22.95%)
linux-x64                        before after
NativeDowncallStaticNormal       26.987 34.380 (+27.40%)
NativeDowncallStaticNormal6      25.424 31.096 (+22.31%)
NativeDowncallStaticNormalRefs6  25.086 30.602 (+21.99%)
NativeDowncallVirtualNormal      26.812 33.234 (+23.95%)
NativeDowncallVirtualNormal6     25.086 30.617 (+22.05%)
NativeDowncallVirtualNormalRefs6 25.086 30.602 (+21.99%)
linux-armv7                      before after
NativeDowncallStaticNormal       7.2394 7.9523 (+9.848%)
NativeDowncallStaticNormal6      6.8527 7.4888 (+9.283%)
NativeDowncallStaticNormalRefs6  6.3976 6.9444 (+8.547%)
NativeDowncallVirtualNormal      7.2081 7.9130 (+9.779%)
NativeDowncallVirtualNormal6     6.8527 7.4888 (+9.283%)
NativeDowncallVirtualNormalRefs6 6.3168 6.8527 (+8.483%)
linux-armv8                      before after
NativeDowncallStaticNormal       7.0389 7.5973 (+7.933%)
NativeDowncallStaticNormal6      6.8527 7.3783 (+7.670%)
NativeDowncallStaticNormalRefs6  6.2924 6.8226 (+8.427%)
NativeDowncallVirtualNormal      6.8527 7.3783 (+7.670%)
NativeDowncallVirtualNormal6     6.5604 7.0423 (+7.344%)
NativeDowncallVirtualNormalRefs6 6.1408 6.5329 (+6.386%)

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing --interpreter
Bug: 172332525
Bug: 143299880
Change-Id: Ib55d457ad8f5d9e1159b681dfd279d1f9cfb2af7
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index d472cc4..fff5755 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -22,9 +22,9 @@
 ASM_DEFINE(THREAD_CARD_TABLE_OFFSET,
            art::Thread::CardTableOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_CHECKPOINT_REQUEST,
-           art::kCheckpointRequest)
+           static_cast<uint32_t>(art::ThreadFlag::kCheckpointRequest))
 ASM_DEFINE(THREAD_EMPTY_CHECKPOINT_REQUEST,
-           art::kEmptyCheckpointRequest)
+           static_cast<uint32_t>(art::ThreadFlag::kEmptyCheckpointRequest))
 ASM_DEFINE(THREAD_EXCEPTION_OFFSET,
            art::Thread::ExceptionOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_FLAGS_OFFSET,
@@ -56,9 +56,11 @@
 ASM_DEFINE(THREAD_SELF_OFFSET,
            art::Thread::SelfOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST,
-           art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)
+           static_cast<uint32_t>(art::ThreadFlag::kSuspendRequest) |
+               static_cast<uint32_t>(art::ThreadFlag::kCheckpointRequest) |
+               static_cast<uint32_t>(art::ThreadFlag::kEmptyCheckpointRequest))
 ASM_DEFINE(THREAD_SUSPEND_REQUEST,
-           art::kSuspendRequest)
+           static_cast<uint32_t>(art::ThreadFlag::kSuspendRequest))
 ASM_DEFINE(THREAD_TOP_QUICK_FRAME_OFFSET,
            art::Thread::TopOfManagedStackOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_ALLOC_OBJECT_ENTRYPOINT_OFFSET,