Move thread flags and state into 32bits.

We need to ensure that transitions to Runnable are atomic wrt to a
thread modifying the suspend count. Currently this is achieved by
holding the thread_suspend_count_lock_. This change creates a set of bit
flags that summarize that the suspend_count_ is raised and also others
flags that signify the managed code should go into a slow path.

The effect of this change are two-fold:
1) transitions from suspended to runnable can CAS the thread state
rather than holding the suspend_count_lock_. This will make JNI
transitions cheaper.
2) the exception/suspend/interpreter poll needed for shadow frames can
be rolled into a single compare of the bit fields against 0.

Change-Id: I589f84e3dca396c3db448bf32d814565acf3d11f
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 99a76da..6868d0b 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1260,6 +1260,20 @@
   storeValue(cUnit, rlDest, rlResult);
 }
 
+void genMoveException(CompilationUnit* cUnit, RegLocation rlDest)
+{
+  oatFlushAllRegs(cUnit);  /* Everything to home location */
+  int funcOffset = ENTRYPOINT_OFFSET(pGetAndClearException);
+#if defined(TARGET_X86)
+  // Runtime helper will load argument for x86.
+  callRuntimeHelperReg(cUnit, funcOffset, rARG0, false);
+#else
+  callRuntimeHelperReg(cUnit, funcOffset, rSELF, false);
+#endif
+  RegLocation rlResult = oatGetReturn(cUnit, false);
+  storeValue(cUnit, rlDest, rlResult);
+}
+
 void genThrow(CompilationUnit* cUnit, RegLocation rlSrc)
 {
   oatFlushAllRegs(cUnit);
@@ -2527,7 +2541,7 @@
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
     branch = opCondBranch(cUnit, kCondEq, NULL);
 #elif defined(TARGET_X86)
-    newLIR2(cUnit, kX86Cmp32TI8, Thread::SuspendCountOffset().Int32Value(), 0);
+    newLIR2(cUnit, kX86Cmp16TI8, Thread::ThreadFlagsOffset().Int32Value(), 0);
     branch = opCondBranch(cUnit, kCondNe, NULL);
 #else
     opRegImm(cUnit, kOpSub, rSUSPEND, 1);
@@ -2557,7 +2571,7 @@
     newLIR2(cUnit, kThumbSubRI8, rSUSPEND, 1);
     opCondBranch(cUnit, kCondNe, target);
 #elif defined(TARGET_X86)
-    newLIR2(cUnit, kX86Cmp32TI8, Thread::SuspendCountOffset().Int32Value(), 0);
+    newLIR2(cUnit, kX86Cmp16TI8, Thread::ThreadFlagsOffset().Int32Value(), 0);
     opCondBranch(cUnit, kCondEq, target);
 #else
     opRegImm(cUnit, kOpSub, rSUSPEND, 1);
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index c50d74d..682de7a 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -2663,21 +2663,8 @@
 
 void cvtMoveException(CompilationUnit* cUnit, llvm::CallInst* callInst)
 {
-  DCHECK_EQ(callInst->getNumArgOperands(), 0U);
-  int exOffset = Thread::ExceptionOffset().Int32Value();
   RegLocation rlDest = getLoc(cUnit, callInst);
-  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-#if defined(TARGET_X86)
-  newLIR2(cUnit, kX86Mov32RT, rlResult.lowReg, exOffset);
-  newLIR2(cUnit, kX86Mov32TI, exOffset, 0);
-#else
-  int resetReg = oatAllocTemp(cUnit);
-  loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
-  loadConstant(cUnit, resetReg, 0);
-  storeWordDisp(cUnit, rSELF, exOffset, resetReg);
-  oatFreeTemp(cUnit, resetReg);
-#endif
-  storeValue(cUnit, rlDest, rlResult);
+  genMoveException(cUnit, rlDest);
 }
 
 void cvtSget(CompilationUnit* cUnit, llvm::CallInst* callInst, bool isWide,
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 8269f8b..7227487 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -260,22 +260,9 @@
     case Instruction::NOP:
       break;
 
-    case Instruction::MOVE_EXCEPTION: {
-      int exOffset = Thread::ExceptionOffset().Int32Value();
-      rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-#if defined(TARGET_X86)
-      newLIR2(cUnit, kX86Mov32RT, rlResult.lowReg, exOffset);
-      newLIR2(cUnit, kX86Mov32TI, exOffset, 0);
-#else
-      int resetReg = oatAllocTemp(cUnit);
-      loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
-      loadConstant(cUnit, resetReg, 0);
-      storeWordDisp(cUnit, rSELF, exOffset, resetReg);
-      oatFreeTemp(cUnit, resetReg);
-#endif
-      storeValue(cUnit, rlDest, rlResult);
+    case Instruction::MOVE_EXCEPTION:
+      genMoveException(cUnit, rlDest);
       break;
-    }
     case Instruction::RETURN_VOID:
       if (!(cUnit->attrs & METHOD_IS_LEAF)) {
         genSuspendTest(cUnit, optFlags);
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index e88f7dc..9538931 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -372,56 +372,16 @@
 
 LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
 {
-  UNIMPLEMENTED(WARNING) << "loadMultiple";
+  UNIMPLEMENTED(FATAL) << "loadMultiple";
   newLIR0(cUnit, kX86Bkpt);
   return NULL;
-#if 0
-  int i;
-  int loadCnt = 0;
-  LIR *res = NULL ;
-  genBarrier(cUnit);
-
-  for (i = 0; i < 8; i++, rMask >>= 1) {
-    if (rMask & 0x1) {
-      newLIR3(cUnit, kX86Lw, i+r_A0, loadCnt*4, rBase);
-      loadCnt++;
-    }
-  }
-
-  if (loadCnt) {/* increment after */
-    newLIR3(cUnit, kX86Addiu, rBase, rBase, loadCnt*4);
-  }
-
-  genBarrier(cUnit);
-  return res; /* NULL always returned which should be ok since no callers use it */
-#endif
 }
 
 LIR *storeMultiple(CompilationUnit *cUnit, int rBase, int rMask)
 {
-  UNIMPLEMENTED(WARNING) << "storeMultiple";
+  UNIMPLEMENTED(FATAL) << "storeMultiple";
   newLIR0(cUnit, kX86Bkpt);
   return NULL;
-#if 0
-  int i;
-  int storeCnt = 0;
-  LIR *res = NULL ;
-  genBarrier(cUnit);
-
-  for (i = 0; i < 8; i++, rMask >>= 1) {
-    if (rMask & 0x1) {
-      newLIR3(cUnit, kX86Sw, i+r_A0, storeCnt*4, rBase);
-      storeCnt++;
-    }
-  }
-
-  if (storeCnt) { /* increment after */
-    newLIR3(cUnit, kX86Addiu, rBase, rBase, storeCnt*4);
-  }
-
-  genBarrier(cUnit);
-  return res; /* NULL always returned which should be ok since no callers use it */
-#endif
 }
 
 LIR* loadBaseIndexedDisp(CompilationUnit *cUnit,