5 files changed, 125 insertions, 27 deletions
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index 265e8d2020..9814cb4a7a 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc
@@ -45,7 +45,6 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
                                               false);
       rl_result = GetReturn(kFPReg);
@@ -89,7 +88,6 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      // TODO: Fix xSELF.
       {
         ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod);
         RegStorage r_tgt = CallHelperSetup(helper_offset);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index c7e289d704..3f54798b7e 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -407,10 +407,17 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" },
 
-  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" },
-  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" },
-  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" },
-  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fild32M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M",  "[!0r,!1d]" },
+  { kX86Fild64M,  kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M",  "[!0r,!1d]" },
+  { kX86Fld32M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M",   "[!0r,!1d]" },
+  { kX86Fld64M,   kMem,     IS_LOAD    | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M",   "[!0r,!1d]" },
+  { kX86Fstp32M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" },
+  { kX86Fstp64M,  kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0,  0,    0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" },
+  { kX86Fst32M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M",  "[!0r,!1d]" },
+  { kX86Fst64M,   kMem,     IS_STORE   | IS_UNARY_OP | REG_USE0,                { 0x0,  0,    0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M",  "[!0r,!1d]" },
+  { kX86Fprem,    kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xD9, 0,    0xF8, 0,    0, 0, 0, 0, false }, "Fprem64",  "" },
+  { kX86Fucompp,  kNullary, NO_OPERAND | USE_FP_STACK,                          { 0xDA, 0,    0xE9, 0,    0, 0, 0, 0, false }, "Fucompp",  "" },
+  { kX86Fstsw16R, kNullary, NO_OPERAND,                                         { 0x9B, 0xDF, 0xE0, 0,    0, 0, 0, 0, false }, "Fstsw16R", "ax" },
 
   EXT_0F_ENCODING_MAP(Mova128,    0x66, 0x6F, REG_DEF0),
   { kX86Mova128MR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 3540843705..d874aaa829 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -148,6 +148,7 @@ class X86Mir2Lir : public Mir2Lir {
                         RegLocation rl_src2);
   void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                        RegLocation rl_src2);
+  void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double);
   void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                 RegLocation rl_src2);
   void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 61623d0051..458f9c6691 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -48,16 +48,7 @@ void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode,
       break;
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturn(kFPReg);
-      StoreValue(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */);
       return;
     case Instruction::NEG_FLOAT:
       GenNegFloat(rl_dest, rl_src1);
@@ -110,16 +101,7 @@ void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode,
       break;
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
-      FlushAllRegs();   // Send everything to home location
-      if (cu_->target64) {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
-                                                false);
-      } else {
-        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                                false);
-      }
-      rl_result = GetReturnWide(kFPReg);
-      StoreValueWide(rl_dest, rl_result);
+      GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */);
       return;
     case Instruction::NEG_DOUBLE:
       GenNegDouble(rl_dest, rl_src1);
@@ -356,6 +338,110 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
   }
 }
 
+void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack.
+  int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low);
+  int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of sources.
+  rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1);
+  rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2);
+
+  // All memory accesses below reference dalvik regs.
+  ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src1.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src1.reg);
+
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(reg_info);
+      // ResetDef to prevent NullifyRange from removing stores.
+      ResetDef(rl_src1.reg);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32);
+    }
+  }
+
+  if (rl_src2.location == kLocPhysReg) {
+    RegisterInfo* reg_info = GetRegInfo(rl_src2.reg);
+    if (reg_info != nullptr && reg_info->IsTemp()) {
+      FlushSpecificReg(reg_info);
+      ResetDef(rl_src2.reg);
+    } else {
+      StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32);
+    }
+  }
+
+  int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M;
+
+  // Push the source virtual registers onto the x87 stack.
+  LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src2_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(),
+                             src1_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, is_double /* is64bit */);
+
+  FlushReg(rs_rAX);
+  Clobber(rs_rAX);
+  LockTemp(rs_rAX);
+
+  LIR* retry = NewLIR0(kPseudoTargetLabel);
+
+  // Divide ST(0) by ST(1) and place result to ST(0).
+  NewLIR0(kX86Fprem);
+
+  // Move FPU status word to AX.
+  NewLIR0(kX86Fstsw16R);
+
+  // Check if reduction is complete.
+  OpRegImm(kOpAnd, rs_rAX, 0x400);
+
+  // If no then continue to compute remainder.
+  LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+  branch->target = retry;
+
+  FreeTemp(rs_rAX);
+
+  // Now store result in the destination VR's stack location.
+  int displacement = dest_v_reg_offset + LOWORD_OFFSET;
+  int opcode = is_double ? kX86Fst64M : kX86Fst32M;
+  LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement);
+  AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */);
+
+  // Pop ST(1) and ST(0).
+  NewLIR0(kX86Fucompp);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) :
+      UpdateLocTyped(rl_dest, kFPReg);
+  if (rl_result.location == kLocPhysReg) {
+    rl_result = EvalLoc(rl_dest, kFPReg, true);
+    if (is_double) {
+      LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64);
+      StoreFinalValueWide(rl_dest, rl_result);
+    } else {
+      Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg);
+      StoreFinalValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2) {
   bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT);
@@ -502,5 +588,4 @@ bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
 }
 
 
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index f1b5811a33..28b9dca193 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -572,8 +572,15 @@ enum X86OpCode {
   kX86PsllqRI,                  // left shift of floating point registers 64 bits x 2
   kX86Fild32M,                  // push 32-bit integer on x87 stack
   kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fld32M,                   // push float on x87 stack
+  kX86Fld64M,                   // push double on x87 stack
   kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
   kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
+  kX86Fst32M,                   // do 32-bit store
+  kX86Fst64M,                   // do 64-bit store
+  kX86Fprem,                    // remainder from dividing of two floating point values
+  kX86Fucompp,                  // compare floating point values and pop x87 fp stack twice
+  kX86Fstsw16R,                 // store FPU status word
   Binary0fOpCode(kX86Mova128),  // move 128 bits aligned
   kX86Mova128MR, kX86Mova128AR,  // store 128 bit aligned from xmm1 to m128
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1