Move thread flags and state into 32bits.

We need to ensure that transitions to Runnable are atomic wrt to a
thread modifying the suspend count. Currently this is achieved by
holding the thread_suspend_count_lock_. This change creates a set of bit
flags that summarize that the suspend_count_ is raised and also others
flags that signify the managed code should go into a slow path.

The effect of this change are two-fold:
1) transitions from suspended to runnable can CAS the thread state
rather than holding the suspend_count_lock_. This will make JNI
transitions cheaper.
2) the exception/suspend/interpreter poll needed for shadow frames can
be rolled into a single compare of the bit fields against 0.

Change-Id: I589f84e3dca396c3db448bf32d814565acf3d11f
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 99a76da..6868d0b 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1260,6 +1260,20 @@
   storeValue(cUnit, rlDest, rlResult);
 }
 
+void genMoveException(CompilationUnit* cUnit, RegLocation rlDest)
+{
+  oatFlushAllRegs(cUnit);  /* Everything to home location */
+  int funcOffset = ENTRYPOINT_OFFSET(pGetAndClearException);
+#if defined(TARGET_X86)
+  // Runtime helper will load argument for x86.
+  callRuntimeHelperReg(cUnit, funcOffset, rARG0, false);
+#else
+  callRuntimeHelperReg(cUnit, funcOffset, rSELF, false);
+#endif
+  RegLocation rlResult = oatGetReturn(cUnit, false);
+  storeValue(cUnit, rlDest, rlResult);
+}
+
 void genThrow(CompilationUnit* cUnit, RegLocation rlSrc)
 {
   oatFlushAllRegs(cUnit);
@@ -2527,7 +2541,7 @@
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
     branch = opCondBranch(cUnit, kCondEq, NULL);
 #elif defined(TARGET_X86)
-    newLIR2(cUnit, kX86Cmp32TI8, Thread::SuspendCountOffset().Int32Value(), 0);
+    newLIR2(cUnit, kX86Cmp16TI8, Thread::ThreadFlagsOffset().Int32Value(), 0);
     branch = opCondBranch(cUnit, kCondNe, NULL);
 #else
     opRegImm(cUnit, kOpSub, rSUSPEND, 1);
@@ -2557,7 +2571,7 @@
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
     opCondBranch(cUnit, kCondNe, target);
 #elif defined(TARGET_X86)
-    newLIR2(cUnit, kX86Cmp32TI8, Thread::SuspendCountOffset().Int32Value(), 0);
+    newLIR2(cUnit, kX86Cmp16TI8, Thread::ThreadFlagsOffset().Int32Value(), 0);
     opCondBranch(cUnit, kCondEq, target);
 #else
     opRegImm(cUnit, kOpSub, rSUSPEND, 1);