10 files changed, 187 insertions, 189 deletions
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index e6714aa807..9f1d58e304 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -752,7 +752,7 @@ bool genInlinedAbsLong(CompilationUnit *cUnit, CallInfo* info)
 
 bool genInlinedFloatCvt(CompilationUnit *cUnit, CallInfo* info)
 {
-#if defined(TARGET_ARM)
+#if defined(TARGET_ARM) || defined(TARGET_X86)
   RegLocation rlSrc = info->args[0];
   RegLocation rlDest = inlineTarget(cUnit, info);
   storeValue(cUnit, rlDest, rlSrc);
@@ -764,7 +764,7 @@ bool genInlinedFloatCvt(CompilationUnit *cUnit, CallInfo* info)
 
 bool genInlinedDoubleCvt(CompilationUnit *cUnit, CallInfo* info)
 {
-#if defined(TARGET_ARM)
+#if defined(TARGET_ARM) || defined(TARGET_X86)
   RegLocation rlSrc = info->args[0];
   RegLocation rlDest = inlineTargetWide(cUnit, info);
   storeValueWide(cUnit, rlDest, rlSrc);
diff --git a/src/compiler/codegen/LocalOptimizations.cc b/src/compiler/codegen/LocalOptimizations.cc
index faab3e0046..2fc7ae0ccd 100644
--- a/src/compiler/codegen/LocalOptimizations.cc
+++ b/src/compiler/codegen/LocalOptimizations.cc
@@ -226,6 +226,15 @@ void applyLoadStoreElimination(CompilationUnit* cUnit, LIR* headLIR,
       }
 
       if (stopHere == true) {
+#if defined(TARGET_X86)
+        // Prevent stores from being sunk between ops that generate ccodes and
+        // ops that use them.
+        int flags = EncodingMap[checkLIR->opcode].flags;
+        if (sinkDistance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) {
+          checkLIR = PREV_LIR(checkLIR);
+          sinkDistance--;
+        }
+#endif
         DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, "REG CLOBBERED"));
         /* Only sink store instructions */
         if (sinkDistance && !isThisLIRLoad) {
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index 8e9f15fa76..a8da1cde44 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -51,7 +51,7 @@ void defineValue(CompilationUnit* cUnit, llvm::Value* val, int sReg)
   llvm::Value* placeholder = getLLVMValue(cUnit, sReg);
   if (placeholder == NULL) {
     // This can happen on instruction rewrite on verification failure
-    LOG(WARNING) << "Null placeholder";
+    LOG(WARNING) << "Null placeholder - invalid CFG";
     return;
   }
   placeholder->replaceAllUsesWith(val);
@@ -307,6 +307,7 @@ void convertThrow(CompilationUnit* cUnit, RegLocation rlSrc)
   llvm::Function* func = cUnit->intrinsic_helper->GetIntrinsicFunction(
       greenland::IntrinsicHelper::Throw);
   cUnit->irb->CreateCall(func, src);
+  cUnit->irb->CreateUnreachable();
 }
 
 void convertMonitorEnterExit(CompilationUnit* cUnit, int optFlags,
@@ -803,7 +804,6 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,
   bool res = false;   // Assume success
   RegLocation rlSrc[3];
   RegLocation rlDest = badLoc;
-  RegLocation rlResult = badLoc;
   Instruction::Code opcode = mir->dalvikInsn.opcode;
   uint32_t vA = mir->dalvikInsn.vA;
   uint32_t vB = mir->dalvikInsn.vB;
@@ -812,15 +812,6 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,
 
   bool objectDefinition = false;
 
-  if (cUnit->printMe) {
-    if ((int)opcode < kMirOpFirst) {
-      LOG(INFO) << ".. " << Instruction::Name(opcode) << " 0x"
-                << std::hex << (int)opcode;
-    } else {
-      LOG(INFO) << ".. opcode 0x" << std::hex << (int)opcode;
-    }
-  }
-
   /* Prep Src and Dest locations */
   int nextSreg = 0;
   int nextLoc = 0;
@@ -1301,24 +1292,10 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,
 
    case Instruction::THROW:
       convertThrow(cUnit, rlSrc[0]);
-      /*
-       * If this throw is standalone, terminate.
-       * If it might rethrow, force termination
-       * of the following block.
-       */
-      if (bb->fallThrough == NULL) {
-        cUnit->irb->CreateUnreachable();
-      } else {
-        bb->fallThrough->fallThrough = NULL;
-        bb->fallThrough->taken = NULL;
-      }
       break;
 
    case Instruction::THROW_VERIFICATION_ERROR:
       convertThrowVerificationError(cUnit, vA, vB);
-      UNIMPLEMENTED(WARNING) << "Need dead code elimination pass"
-                             << " - disabling bitcode verification";
-      cUnit->enableDebug &= ~(1 << kDebugVerifyBitcode);
       break;
 
     case Instruction::MOVE_RESULT_WIDE:
@@ -1329,8 +1306,7 @@ bool convertMIRNode(CompilationUnit* cUnit, MIR* mir, BasicBlock* bb,
        * Instruction rewriting on verification failure can eliminate
        * the invoke that feeds this move0result.  It won't ever be reached,
        * so we can ignore it.
-       * TODO: verify that previous instruction is THROW_VERIFICATION_ERROR,
-       * or better, add dead-code elimination.
+       * TODO: verify that previous instruction if THROW_VERIFICATION_ERROR
        */
       UNIMPLEMENTED(WARNING) << "Need to verify previous inst was rewritten";
 #else
@@ -1673,13 +1649,6 @@ void convertExtendedMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
       UNIMPLEMENTED(WARNING) << "unimp kMirOpPhi";
       break;
     }
-    case kMirOpNop:
-      if ((mir == bb->lastMIRInsn) && (bb->taken == NULL) &&
-          (bb->fallThrough == NULL)) {
-        cUnit->irb->CreateUnreachable();
-      }
-      break;
-
 #if defined(TARGET_ARM)
     case kMirOpFusedCmplFloat:
       UNIMPLEMENTED(WARNING) << "unimp kMirOpFusedCmpFloat";
@@ -1751,16 +1720,6 @@ bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
   cUnit->irb->SetInsertPoint(llvmBB);
   setDexOffset(cUnit, bb->startOffset);
 
-  if (cUnit->printMe) {
-    LOG(INFO) << "................................";
-    LOG(INFO) << "Block id " << bb->id;
-    if (llvmBB != NULL) {
-      LOG(INFO) << "label " << llvmBB->getName().str().c_str();
-    } else {
-      LOG(INFO) << "llvmBB is NULL";
-    }
-  }
-
   if (bb->blockType == kEntryBlock) {
     setMethodInfo(cUnit);
     bool *canBeRef = (bool*)  oatNew(cUnit, sizeof(bool) *
@@ -1800,6 +1759,8 @@ bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
     /*
      * Because we're deferring null checking, delete the associated empty
      * exception block.
+     * TODO: add new block type for exception blocks that we generate
+     * greenland code for.
      */
     llvmBB->eraseFromParent();
     return false;
@@ -1809,9 +1770,8 @@ bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
 
     setDexOffset(cUnit, mir->offset);
 
-    int opcode = mir->dalvikInsn.opcode;
-    Instruction::Format dalvikFormat =
-        Instruction::FormatOf(mir->dalvikInsn.opcode);
+    Instruction::Code dalvikOpcode = mir->dalvikInsn.opcode;
+    Instruction::Format dalvikFormat = Instruction::FormatOf(dalvikOpcode);
 
     /* If we're compiling for the debugger, generate an update callout */
     if (cUnit->genDebugger) {
@@ -1819,43 +1779,7 @@ bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
       //genDebuggerUpdate(cUnit, mir->offset);
     }
 
-    if (opcode == kMirOpCheck) {
-      // Combine check and work halves of throwing instruction.
-      MIR* workHalf = mir->meta.throwInsn;
-      mir->dalvikInsn.opcode = workHalf->dalvikInsn.opcode;
-      opcode = mir->dalvikInsn.opcode;
-      SSARepresentation* ssaRep = workHalf->ssaRep;
-      workHalf->ssaRep = mir->ssaRep;
-      mir->ssaRep = ssaRep;
-      workHalf->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-      if (bb->successorBlockList.blockListType == kCatch) {
-        llvm::Function* intr = cUnit->intrinsic_helper->GetIntrinsicFunction(
-            greenland::IntrinsicHelper::CatchTargets);
-        llvm::Value* switchKey =
-            cUnit->irb->CreateCall(intr, cUnit->irb->getInt32(mir->offset));
-        GrowableListIterator iter;
-        oatGrowableListIteratorInit(&bb->successorBlockList.blocks, &iter);
-        // New basic block to use for work half
-        llvm::BasicBlock* workBB =
-            llvm::BasicBlock::Create(*cUnit->context, "", cUnit->func);
-        llvm::SwitchInst* sw =
-            cUnit->irb->CreateSwitch(switchKey, workBB,
-                                     bb->successorBlockList.blocks.numUsed);
-        while (true) {
-          SuccessorBlockInfo *successorBlockInfo =
-              (SuccessorBlockInfo *) oatGrowableListIteratorNext(&iter);
-          if (successorBlockInfo == NULL) break;
-          llvm::BasicBlock *target =
-              getLLVMBlock(cUnit, successorBlockInfo->block->id);
-          int typeIndex = successorBlockInfo->key;
-          sw->addCase(cUnit->irb->getInt32(typeIndex), target);
-        }
-        llvmBB = workBB;
-        cUnit->irb->SetInsertPoint(llvmBB);
-      }
-    }
-
-    if (opcode >= kMirOpFirst) {
+    if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
       convertExtendedMIR(cUnit, bb, mir, llvmBB);
       continue;
     }
@@ -1863,9 +1787,8 @@ bool methodBlockBitcodeConversion(CompilationUnit* cUnit, BasicBlock* bb)
     bool notHandled = convertMIRNode(cUnit, mir, bb, llvmBB,
                                      NULL /* labelList */);
     if (notHandled) {
-      Instruction::Code dalvikOpcode = static_cast<Instruction::Code>(opcode);
       LOG(WARNING) << StringPrintf("%#06x: Op %#x (%s) / Fmt %d not handled",
-                                   mir->offset, opcode,
+                                   mir->offset, dalvikOpcode,
                                    Instruction::Name(dalvikOpcode),
                                    dalvikFormat);
     }
@@ -2063,14 +1986,7 @@ void oatMethodMIR2Bitcode(CompilationUnit* cUnit)
   cUnit->irb->SetInsertPoint(cUnit->entryBB);
   cUnit->irb->CreateBr(cUnit->entryTargetBB);
 
-  if (cUnit->enableDebug & (1 << kDebugVerifyBitcode)) {
-     if (llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction)) {
-       LOG(INFO) << "Bitcode verification FAILED for "
-                 << PrettyMethod(cUnit->method_idx, *cUnit->dex_file)
-                 << " of size " << cUnit->insnsSize;
-       cUnit->enableDebug |= (1 << kDebugDumpBitcodeFile);
-     }
-  }
+  //llvm::verifyFunction(*cUnit->func, llvm::PrintMessageAction);
 
   if (cUnit->enableDebug & (1 << kDebugDumpBitcodeFile)) {
     // Write bitcode to file
@@ -3218,24 +3134,6 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb)
               cvtShiftOp(cUnit, Instruction::USHR_INT, callInst);
               break;
 
-            case greenland::IntrinsicHelper::CatchTargets: {
-                llvm::SwitchInst* swInst =
-                    llvm::dyn_cast<llvm::SwitchInst>(nextIt);
-                DCHECK(swInst != NULL);
-                /*
-                 * Discard the edges and the following conditional branch.
-                 * Do a direct branch to the default target (which is the
-                 * "work" portion of the pair.
-                 * TODO: awful code layout - rework
-                 */
-                 llvm::BasicBlock* targetBB = swInst->getDefaultDest();
-                 DCHECK(targetBB != NULL);
-                 opUnconditionalBranch(cUnit,
-                                       cUnit->blockToLabelMap.Get(targetBB));
-                 ++it;
-              }
-              break;
-
             default:
               LOG(FATAL) << "Unexpected intrinsic " << (int)id << ", "
                          << cUnit->intrinsic_helper->GetName(id);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 3c5fb23259..b93cbd94b1 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -179,10 +179,6 @@ CallInfo* oatNewCallInfo(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
 {
   CallInfo* info = (CallInfo*)oatNew(cUnit, sizeof(CallInfo), true,
                                          kAllocMisc);
-//FIXME: Disable fusing  for x86
-#if defined(TARGET_X86)
-  info->result.location = kLocInvalid;
-#else
   MIR* moveResultMIR = oatFindMoveResult(cUnit, bb, mir);
   if (moveResultMIR == NULL) {
     info->result.location = kLocInvalid;
@@ -190,7 +186,6 @@ CallInfo* oatNewCallInfo(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
     info->result = oatGetRawDest(cUnit, moveResultMIR);
     moveResultMIR->dalvikInsn.opcode = Instruction::NOP;
   }
-#endif
   info->numArgWords = mir->ssaRep->numUses;
   info->args = (info->numArgWords == 0) ? NULL : (RegLocation*)
       oatNew(cUnit, sizeof(RegLocation) * info->numArgWords, false, kAllocMisc);
@@ -819,10 +814,9 @@ const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
   "kMirFusedCmpgDouble",
   "kMirFusedCmpLong",
   "kMirNop",
-  "kMirOpNullCheck",
-  "kMirOpRangeCheck",
-  "kMirOpDivZeroCheck",
-  "kMirOpCheck",
+  "kMirOpNullNRangeUpCheck",
+  "kMirOpNullNRangeDownCheck",
+  "kMirOpLowerBound",
 };
 
 /* Extended MIR instructions like PHI */
@@ -853,7 +847,6 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
       storeValue(cUnit, rlDest, rlSrc);
       break;
     }
-#if defined(TARGET_ARM)
     case kMirOpFusedCmplFloat:
       genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, false /*double*/);
       break;
@@ -869,7 +862,6 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
     case kMirOpFusedCmpLong:
       genFusedLongCmpBranch(cUnit, bb, mir);
       break;
-#endif
     default:
       break;
   }
@@ -959,17 +951,7 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb)
       newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
     }
 
-    if ((int)dalvikOpcode == (int)kMirOpCheck) {
-      // Combine check and work halves of throwing instruction.
-      MIR* workHalf = mir->meta.throwInsn;
-      mir->dalvikInsn.opcode = workHalf->dalvikInsn.opcode;
-      SSARepresentation* ssaRep = workHalf->ssaRep;
-      workHalf->ssaRep = mir->ssaRep;
-      mir->ssaRep = ssaRep;
-      workHalf->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
-    }
-
-    if ((int)dalvikOpcode >= (int)kMirOpFirst) {
+    if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
       handleExtendedMethodMIR(cUnit, bb, mir);
       continue;
     }
diff --git a/src/compiler/codegen/mips/FP/MipsFP.cc b/src/compiler/codegen/mips/FP/MipsFP.cc
index a57d34a6d0..2bf26e448b 100644
--- a/src/compiler/codegen/mips/FP/MipsFP.cc
+++ b/src/compiler/codegen/mips/FP/MipsFP.cc
@@ -210,4 +210,10 @@ static bool genCmpFP(CompilationUnit *cUnit, Instruction::Code opcode, RegLocati
   return false;
 }
 
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                                bool gtBias, bool isDouble)
+{
+  UNIMPLEMENTED(FATAL) << "Need codegen for fused fp cmp branch";
+}
+
 } //  namespace art
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index 77129851f5..c3048366b8 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -517,4 +517,9 @@ void opRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi,
 #endif
 }
 
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+  UNIMPLEMENTED(FATAL) << "Need codegen for fused long cmp branch";
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index a245660d7e..0c5d3cf481 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -298,6 +298,14 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_DEF0 | SETS_CCODES),
+
+  { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
+  { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86CmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86LockCmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "!0r,!1r" },
+  { kX86LockCmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1d],!2r" },
+  { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" },
+
   EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
   EXT_0F_ENCODING_MAP(Movsx8,  0x00, 0xBE, REG_DEF0),
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 8cd32b45fd..be628db39e 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -21,7 +21,6 @@ static bool genArithOpFloat(CompilationUnit *cUnit, Instruction::Code opcode,
                             RegLocation rlSrc2) {
   X86OpCode op = kX86Nop;
   RegLocation rlResult;
-  int tempReg;
 
   /*
    * Don't attempt to optimize register usage since these opcodes call out to
@@ -45,19 +44,9 @@ static bool genArithOpFloat(CompilationUnit *cUnit, Instruction::Code opcode,
       op = kX86MulssRR;
       break;
     case Instruction::NEG_FLOAT:
-      // TODO: Make this an XorpsRM where the memory location holds 0x80000000
-      rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
-      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-      tempReg = oatAllocTemp(cUnit);
-      loadConstant(cUnit, tempReg, 0x80000000);
-      newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
-      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
-      storeValue(cUnit, rlDest, rlResult);
-      return false;
     case Instruction::REM_FLOAT_2ADDR:
-    case Instruction::REM_FLOAT: {
+    case Instruction::REM_FLOAT:
       return genArithOpFloatPortable(cUnit, opcode, rlDest, rlSrc1, rlSrc2);
-    }
     default:
       return true;
   }
@@ -67,7 +56,7 @@ static bool genArithOpFloat(CompilationUnit *cUnit, Instruction::Code opcode,
   int rDest = rlResult.lowReg;
   int rSrc1 = rlSrc1.lowReg;
   int rSrc2 = rlSrc2.lowReg;
-  if (rSrc2 == rDest) {
+  if (rDest == rSrc2) {
     rSrc2 = oatAllocTempFloat(cUnit);
     opRegCopy(cUnit, rSrc2, rDest);
   }
@@ -83,7 +72,6 @@ static bool genArithOpDouble(CompilationUnit *cUnit, Instruction::Code opcode,
                              RegLocation rlSrc2) {
   X86OpCode op = kX86Nop;
   RegLocation rlResult;
-  int tempReg;
 
   switch (opcode) {
     case Instruction::ADD_DOUBLE_2ADDR:
@@ -103,20 +91,9 @@ static bool genArithOpDouble(CompilationUnit *cUnit, Instruction::Code opcode,
       op = kX86MulsdRR;
       break;
     case Instruction::NEG_DOUBLE:
-      // TODO: Make this an XorpdRM where the memory location holds 0x8000000000000000
-      rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
-      rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-      tempReg = oatAllocTemp(cUnit);
-      loadConstant(cUnit, tempReg, 0x80000000);
-      newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
-      newLIR2(cUnit, kX86PsllqRI, rlResult.lowReg, 32);
-      newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
-      storeValueWide(cUnit, rlDest, rlResult);
-      return false;
     case Instruction::REM_DOUBLE_2ADDR:
-    case Instruction::REM_DOUBLE: {
+    case Instruction::REM_DOUBLE:
       return genArithOpDoublePortable(cUnit, opcode, rlDest, rlSrc1, rlSrc2);
-    }
     default:
       return true;
   }
@@ -207,9 +184,7 @@ static bool genConversion(CompilationUnit *cUnit, Instruction::Code opcode,
     }
     case Instruction::LONG_TO_DOUBLE:
     case Instruction::LONG_TO_FLOAT:
-      // These can be implemented inline by using memory as a 64-bit source.
-      // However, this can't be done easily if the register has been promoted.
-      UNIMPLEMENTED(WARNING) << "inline l2[df] " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+      // TODO: inline by using memory as a 64-bit source. Be careful about promoted registers.
     case Instruction::FLOAT_TO_LONG:
     case Instruction::DOUBLE_TO_LONG:
       return genConversionPortable(cUnit, opcode, rlDest, rlSrc);
@@ -286,4 +261,74 @@ static bool genCmpFP(CompilationUnit *cUnit, Instruction::Code code, RegLocation
   return false;
 }
 
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                                bool gtBias, bool isDouble) {
+  LIR* labelList = cUnit->blockLabelList;
+  LIR* taken = &labelList[bb->taken->id];
+  LIR* notTaken = &labelList[bb->fallThrough->id];
+  LIR* branch = NULL;
+  RegLocation rlSrc1;
+  RegLocation rlSrc2;
+  if (isDouble) {
+    rlSrc1 = oatGetSrcWide(cUnit, mir, 0);
+    rlSrc2 = oatGetSrcWide(cUnit, mir, 2);
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+    newLIR2(cUnit, kX86UcomisdRR, S2D(rlSrc1.lowReg, rlSrc1.highReg),
+            S2D(rlSrc2.lowReg, rlSrc2.highReg));
+  } else {
+    rlSrc1 = oatGetSrc(cUnit, mir, 0);
+    rlSrc2 = oatGetSrc(cUnit, mir, 1);
+    rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+    rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+    newLIR2(cUnit, kX86UcomissRR, rlSrc1.lowReg, rlSrc2.lowReg);
+  }
+  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  switch (ccode) {
+    case kCondEq:
+      if (gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = notTaken;
+      }
+      break;
+    case kCondNe:
+      if (!gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = taken;
+      }
+      break;
+    case kCondLt:
+      if (gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = notTaken;
+      }
+      ccode = kCondCs;
+      break;
+    case kCondLe:
+      if (gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = notTaken;
+      }
+      ccode = kCondLs;
+      break;
+    case kCondGt:
+      if (gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = taken;
+      }
+      ccode = kCondHi;
+      break;
+    case kCondGe:
+      if (gtBias) {
+        branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
+        branch->target = taken;
+      }
+      ccode = kCondCc;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+  }
+  opCondBranch(cUnit, ccode, taken);
+}
+
 } //  namespace art
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index adad05b3f3..4bfc5310a6 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -181,60 +181,63 @@ void genFillArrayData(CompilationUnit* cUnit, uint32_t tableOffset,
 
 void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc)
 {
-  UNIMPLEMENTED(WARNING) << "genNegFloat "
-                         << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
-  newLIR0(cUnit, kX86Bkpt);
-#if 0
   RegLocation rlResult;
   rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
   rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   opRegRegImm(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, 0x80000000);
   storeValue(cUnit, rlDest, rlResult);
-#endif
 }
 
 void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc)
 {
-  UNIMPLEMENTED(WARNING) << "genNegDouble"
-                         << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
-  newLIR0(cUnit, kX86Bkpt);
-#if 0
   RegLocation rlResult;
   rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
   rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   opRegRegImm(cUnit, kOpAdd, rlResult.highReg, rlSrc.highReg, 0x80000000);
   opRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
   storeValueWide(cUnit, rlDest, rlResult);
-#endif
 }
 
 LIR* genNullCheck(CompilationUnit* cUnit, int sReg, int mReg, int optFlags);
 void callRuntimeHelperReg(CompilationUnit* cUnit, int helperOffset, int arg0);
 
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
 void genMonitorEnter(CompilationUnit* cUnit, int optFlags, RegLocation rlSrc)
 {
   oatFlushAllRegs(cUnit);
-  loadValueDirectFixed(cUnit, rlSrc, rARG0);  // Get obj
+  loadValueDirectFixed(cUnit, rlSrc, rCX);  // Get obj
   oatLockCallTemps(cUnit);  // Prepare for explicit register usage
-  genNullCheck(cUnit, rlSrc.sRegLow, rARG0, optFlags);
-  // Go expensive route - artLockObjectFromCode(self, obj);
-  callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pLockObjectFromCode), rARG0);
+  genNullCheck(cUnit, rlSrc.sRegLow, rCX, optFlags);
+  // If lock is unheld, try to grab it quickly with compare and exchange
+  // TODO: copy and clear hash state?
+  newLIR2(cUnit, kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
+  newLIR2(cUnit, kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
+  newLIR2(cUnit, kX86Xor32RR, rAX, rAX);
+  newLIR3(cUnit, kX86LockCmpxchgMR, rCX, Object::MonitorOffset().Int32Value(), rDX);
+  LIR* branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondEq);
+  // If lock is held, go the expensive route - artLockObjectFromCode(self, obj);
+  callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pLockObjectFromCode), rCX);
+  branch->target = newLIR0(cUnit, kPseudoTargetLabel);
 }
 
-/*
- * TODO: implement fast path to short-circuit thin-lock case
- */
 void genMonitorExit(CompilationUnit* cUnit, int optFlags, RegLocation rlSrc)
 {
   oatFlushAllRegs(cUnit);
-  loadValueDirectFixed(cUnit, rlSrc, rARG0);  // Get obj
+  loadValueDirectFixed(cUnit, rlSrc, rAX);  // Get obj
   oatLockCallTemps(cUnit);  // Prepare for explicit register usage
-  genNullCheck(cUnit, rlSrc.sRegLow, rARG0, optFlags);
-  // Go expensive route - UnlockObjectFromCode(obj);
-  callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rARG0);
+  genNullCheck(cUnit, rlSrc.sRegLow, rAX, optFlags);
+  // If lock is held by the current thread, clear it to quickly release it
+  // TODO: clear hash state?
+  newLIR2(cUnit, kX86Mov32RT, rDX, Thread::ThinLockIdOffset().Int32Value());
+  newLIR2(cUnit, kX86Sal32RI, rDX, LW_LOCK_OWNER_SHIFT);
+  newLIR3(cUnit, kX86Mov32RM, rCX, rAX, Object::MonitorOffset().Int32Value());
+  opRegReg(cUnit, kOpSub, rCX, rDX);
+  LIR* branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondNe);
+  newLIR3(cUnit, kX86Mov32MR, rAX, Object::MonitorOffset().Int32Value(), rCX);
+  LIR* branch2 = newLIR1(cUnit, kX86Jmp8, 0);
+  branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+  // Otherwise, go the expensive route - UnlockObjectFromCode(obj);
+  callRuntimeHelperReg(cUnit, ENTRYPOINT_OFFSET(pUnlockObjectFromCode), rAX);
+  branch2->target = newLIR0(cUnit, kPseudoTargetLabel);
 }
 
 /*
@@ -377,4 +380,44 @@ void opRegCopyWide(CompilationUnit *cUnit, int destLo, int destHi,
   }
 }
 
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir) {
+  LIR* labelList = cUnit->blockLabelList;
+  LIR* taken = &labelList[bb->taken->id];
+  RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0);
+  RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2);
+  oatFlushAllRegs(cUnit);
+  oatLockCallTemps(cUnit);  // Prepare for explicit register usage
+  loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
+  loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
+  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  // Swap operands and condition code to prevent use of zero flag.
+  if (ccode == kCondLe || ccode == kCondGt) {
+    // Compute (r3:r2) = (r3:r2) - (r1:r0)
+    opRegReg(cUnit, kOpSub, r2, r0);  // r2 = r2 - r0
+    opRegReg(cUnit, kOpSbc, r3, r1);  // r3 = r3 - r1 - CF
+  } else {
+    // Compute (r1:r0) = (r1:r0) - (r3:r2)
+    opRegReg(cUnit, kOpSub, r0, r2);  // r0 = r0 - r2
+    opRegReg(cUnit, kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
+  }
+  switch (ccode) {
+    case kCondEq:
+    case kCondNe:
+      opRegReg(cUnit, kOpOr, r0, r1);  // r0 = r0 | r1
+      break;
+    case kCondLe:
+      ccode = kCondGe;
+      break;
+    case kCondGt:
+      ccode = kCondLt;
+      break;
+    case kCondLt:
+    case kCondGe:
+      break;
+    default:
+      LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+  }
+  opCondBranch(cUnit, ccode, taken);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 5bf4dd9cf3..72c8c03dea 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -445,6 +445,8 @@ enum X86OpCode {
   kX86Mfence,                   // memory barrier
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
+  kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,// compare and exchange
+  kX86LockCmpxchgRR, kX86LockCmpxchgMR, kX86LockCmpxchgAR,// locked compare and exchange
   Binary0fOpCode(kX86Movzx8),   // zero-extend 8-bit value
   Binary0fOpCode(kX86Movzx16),  // zero-extend 16-bit value
   Binary0fOpCode(kX86Movsx8),   // sign-extend 8-bit value