Fixes to x86 compilation.

- div-long and rem-long were calling the wrong helper methods.
- genCmpFP was grabbing new src and destination registers instead of
  using the ones it was passed. Also, it wasn't writing its result back.
- gave mul-long its own assembly helper method to help it marshall its
  sources properly.
- fixed assembly of shifts for longs.
- updated IntMath's testFloatCompare to exit on the first error.

Change-Id: Iacecf07d3c7135d865728c18551c7989e7e0276b
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 9e21aea..14eaf1d 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -2193,13 +2193,13 @@
       callOut = true;
       checkZero = true;
       retReg = rRET0;
-      funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
+      funcOffset = ENTRYPOINT_OFFSET(pLdiv);
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
       callOut = true;
       checkZero = true;
-      funcOffset = ENTRYPOINT_OFFSET(pLdiv);
+      funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
 #if defined(TARGET_ARM)
       /* NOTE - result is in rARG2/rARG3 instead of rRET0/rRET1 */
       retReg = rARG2;
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index f45a099..f5a030b 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -190,21 +190,16 @@
   int srcReg1;
   int srcReg2;
   if (single) {
-    rlSrc1 = oatGetSrc(cUnit, mir, 0);
     rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
     srcReg1 = rlSrc1.lowReg;
-    rlSrc2 = oatGetSrc(cUnit, mir, 0);
-    rlSrc2 = loadValue(cUnit, rlSrc1, kFPReg);
-    srcReg2 = rlSrc1.lowReg;
+    rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+    srcReg2 = rlSrc2.lowReg;
   } else {
-    rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
     rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
     srcReg1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
-    rlSrc2 = oatGetSrcWide(cUnit, mir, 0, 1);
     rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
     srcReg2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
   }
-  rlDest = oatGetDest(cUnit, mir, 0);
   RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
   loadConstantNoClobber(cUnit, rlResult.lowReg, unorderedGt ? 1 : 0);
   if (single) {
@@ -221,6 +216,7 @@
   if (unorderedGt) {
     branch->target = newLIR0(cUnit, kPseudoTargetLabel);
   }
+  storeValue(cUnit, rlDest, rlResult);
   return false;
 }
 
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 62ff3ad..46c98ad 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -255,12 +255,12 @@
   oatFlushAllRegs(cUnit);
   oatLockCallTemps(cUnit);  // Prepare for explicit register usage
   loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
-  loadValueDirectWideFixed(cUnit, rlSrc1, r2, r3);
-  // Compute (r1:r0) = (r1:r0) - (r2:r3)
+  loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
+  // Compute (r1:r0) = (r1:r0) - (r3:r2)
   opRegReg(cUnit, kOpSub, r0, r2);  // r0 = r0 - r2
   opRegReg(cUnit, kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
   opRegReg(cUnit, kOpOr, r0, r1);   // r0 = high | low - sets ZF
-  newLIR2(cUnit, kX86Set8R, r0, kX86CondNz);  // r0 = (r1:r0) != (r2:r3) ? 1 : 0
+  newLIR2(cUnit, kX86Set8R, r0, kX86CondNz);  // r0 = (r1:r0) != (r3:r2) ? 1 : 0
   newLIR2(cUnit, kX86Movzx8RR, r0, r0);
   opRegImm(cUnit, kOpAsr, r1, 31);  // r1 = high >> 31
   opRegReg(cUnit, kOpOr, r0, r1);   // r0 holds result