Support to generate inline long to FP bytecodes for x86

long-to-float and long-to-double are now generated inline instead of calling
a helper routine. The conversion is done by using x87.

Change-Id: I196e526afec1be212898baceca8527549c3655b6
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h
index f567b5c..c2d12f6 100644
--- a/compiler/dex/quick/mir_to_lir-inl.h
+++ b/compiler/dex/quick/mir_to_lir-inl.h
@@ -98,6 +98,16 @@
   return insn;
 }
 
+inline LIR* Mir2Lir::NewLIR2NoDest(int opcode, int src, int info) {
+  DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_UNARY_OP))
+      << GetTargetInstName(opcode) << " " << opcode << " "
+      << PrettyMethod(cu_->method_idx, *cu_->dex_file) << " "
+      << current_dalvik_offset_;
+  LIR* insn = RawLIR(current_dalvik_offset_, opcode, src, info);
+  AppendLIR(insn);
+  return insn;
+}
+
 inline LIR* Mir2Lir::NewLIR3(int opcode, int dest, int src1, int src2) {
   DCHECK(IsPseudoLirOp(opcode) || (GetTargetInstFlags(opcode) & IS_TERTIARY_OP))
       << GetTargetInstName(opcode) << " " << opcode << " "
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 6115953..5d4439f 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -401,6 +401,7 @@
     LIR* NewLIR0(int opcode);
     LIR* NewLIR1(int opcode, int dest);
     LIR* NewLIR2(int opcode, int dest, int src1);
+    LIR* NewLIR2NoDest(int opcode, int src, int info);
     LIR* NewLIR3(int opcode, int dest, int src1, int src2);
     LIR* NewLIR4(int opcode, int dest, int src1, int src2, int info);
     LIR* NewLIR5(int opcode, int dest, int src1, int src2, int info1, int info2);
@@ -480,6 +481,7 @@
     virtual void ResetDefLocWide(RegLocation rl);
     void ResetDefTracking();
     void ClobberAllRegs();
+    void FlushSpecificReg(RegisterInfo* info);
     void FlushAllRegsBody(RegisterInfo* info, int num_regs);
     void FlushAllRegs();
     bool RegClassMatches(int reg_class, int reg);
diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc
index eb70d8c..0a65171 100644
--- a/compiler/dex/quick/ralloc_util.cc
+++ b/compiler/dex/quick/ralloc_util.cc
@@ -545,15 +545,19 @@
   }
 }
 
+void Mir2Lir::FlushSpecificReg(RegisterInfo* info) {
+  if (info->pair) {
+    FlushRegWide(info->reg, info->partner);
+  } else {
+    FlushReg(info->reg);
+  }
+}
+
 // Make sure nothing is live and dirty
 void Mir2Lir::FlushAllRegsBody(RegisterInfo* info, int num_regs) {
   for (int i = 0; i < num_regs; i++) {
     if (info[i].live && info[i].dirty) {
-      if (info[i].pair) {
-        FlushRegWide(info[i].reg, info[i].partner);
-      } else {
-        FlushReg(info[i].reg);
-      }
+      FlushSpecificReg(&info[i]);
     }
   }
 }
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 321c6a7..6481589 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -295,7 +295,11 @@
   { kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
   { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
   { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
-  { kX86FstpdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+
+  { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0 }, "Fild32M", "[!0r,!1d]" },
+  { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0 }, "Fild64M", "[!0r,!1d]" },
+  { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
+  { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
 
   EXT_0F_ENCODING_MAP(Movups,    0x0, 0x10, REG_DEF0),
   { kX86MovupsMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x0, 0, 0x0F, 0x11, 0, 0, 0, 0 }, "MovupsMR", "[!0r+!1d],!2r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 22e36d5..70263d8 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -452,6 +452,7 @@
      */
     LIR* OpCmpMemImmBranch(ConditionCode cond, int temp_reg, int base_reg,
                            int offset, int check_value, LIR* target);
+
     /*
      * Can this operation be using core registers without temporaries?
      * @param rl_lhs Left hand operand.
@@ -460,6 +461,14 @@
      */
     bool IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs);
 
+    /**
+     * @brief Generates inline code for conversion of long to FP by using x87/
+     * @param rl_dest The destination of the FP.
+     * @param rl_src The source of the long.
+     * @param is_double 'true' if dealing with double, 'false' for float.
+     */
+    void GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double);
+
     /*
      * @brief Perform MIR analysis before compiling method.
      * @note Invokes Mir2LiR::Materialize after analysis.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 006fe76..4c2ecc0 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -130,6 +130,70 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
+  // Compute offsets to the source and destination VRs on stack
+  int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);
+  int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low);
+
+  // Update the in-register state of source.
+  rl_src = UpdateLocWide(rl_src);
+
+  // If the source is in physical register, then put it in its location on stack.
+  if (rl_src.location == kLocPhysReg) {
+    RegisterInfo* lo_info = GetRegInfo(rl_src.low_reg);
+
+    if (lo_info != nullptr && lo_info->is_temp) {
+      // Calling FlushSpecificReg because it will only write back VR if it is dirty.
+      FlushSpecificReg(lo_info);
+    } else {
+      // It must have been register promoted if it is not a temp but is still in physical
+      // register. Since we need it to be in memory to convert, we place it there now.
+      StoreBaseDispWide(TargetReg(kSp), src_v_reg_offset, rl_src.low_reg, rl_src.high_reg);
+    }
+  }
+
+  // Push the source virtual register onto the x87 stack.
+  LIR *fild64 = NewLIR2NoDest(kX86Fild64M, TargetReg(kSp), src_v_reg_offset + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(fild64, (src_v_reg_offset + LOWORD_OFFSET) >> 2,
+      true /* is_load */, true /* is64bit */);
+
+  // Now pop off x87 stack and store it in the destination VR's stack location.
+  int opcode = is_double ? kX86Fstp64M : kX86Fstp32M;
+  int displacement = is_double ? dest_v_reg_offset + LOWORD_OFFSET : dest_v_reg_offset;
+  LIR *fstp = NewLIR2NoDest(opcode, TargetReg(kSp), displacement);
+  AnnotateDalvikRegAccess(fstp, displacement >> 2, false /* is_load */, is_double);
+
+  /*
+   * The result is in a physical register if it was in a temp or was register
+   * promoted. For that reason it is enough to check if it is in physical
+   * register. If it is, then we must do all of the bookkeeping necessary to
+   * invalidate temp (if needed) and load in promoted register (if needed).
+   * If the result's location is in memory, then we do not need to do anything
+   * more since the fstp has already placed the correct value in memory.
+   */
+  RegLocation rl_result = is_double ? UpdateLocWide(rl_dest) : UpdateLoc(rl_dest);
+  if (rl_result.location == kLocPhysReg) {
+    /*
+     * We already know that the result is in a physical register but do not know if it is the
+     * right class. So we call EvalLoc(Wide) first which will ensure that it will get moved to the
+     * correct register class.
+     */
+    if (is_double) {
+      rl_result = EvalLocWide(rl_dest, kFPReg, true);
+
+      LoadBaseDispWide(TargetReg(kSp), dest_v_reg_offset, rl_result.low_reg, rl_result.high_reg, INVALID_SREG);
+
+      StoreValueWide(rl_dest, rl_result);
+    } else {
+      rl_result = EvalLoc(rl_dest, kFPReg, true);
+
+      LoadWordDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.low_reg);
+
+      StoreValue(rl_dest, rl_result);
+    }
+  }
+}
+
 void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                RegLocation rl_src) {
   RegisterClass rcSrc = kFPReg;
@@ -198,11 +262,10 @@
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
+      GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
-      // TODO: inline by using memory as a 64-bit source. Be careful about promoted registers.
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2f), rl_dest, rl_src);
+      GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
       GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index c49f627..e75da0d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -356,7 +356,10 @@
   kX86PsrlqRI,                  // right shift of floating point registers
   kX86PsllqRI,                  // left shift of floating point registers
   kX86SqrtsdRR,                 // sqrt of floating point register
-  kX86FstpdM,                   // Store and pop top x87 fp stack
+  kX86Fild32M,                  // push 32-bit integer on x87 stack
+  kX86Fild64M,                  // push 64-bit integer on x87 stack
+  kX86Fstp32M,                  // pop top x87 fp stack and do 32-bit store
+  kX86Fstp64M,                  // pop top x87 fp stack and do 64-bit store
   Binary0fOpCode(kX86Movups),   // load unaligned packed single FP values from xmm2/m128 to xmm1
   kX86MovupsMR, kX86MovupsAR,   // store unaligned packed single FP values from xmm1 to m128
   Binary0fOpCode(kX86Movaps),   // load aligned packed single FP values from xmm2/m128 to xmm1
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 903d755..b6ddc95 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -672,6 +672,13 @@
     has_modrm = true;
     reg_is_opcode = true;
     break;
+  case 0xDB:
+    static const char* db_opcodes[] = {"fildl", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db", "unknown-db"};
+    modrm_opcodes = db_opcodes;
+    load = true;
+    has_modrm = true;
+    reg_is_opcode = true;
+    break;
   case 0xDD:
     static const char* dd_opcodes[] = {"fldl", "fisttp", "fstl", "fstpl", "frstor", "unknown-dd", "fnsave", "fnstsw"};
     modrm_opcodes = dd_opcodes;
@@ -679,6 +686,13 @@
     has_modrm = true;
     reg_is_opcode = true;
     break;
+  case 0xDF:
+    static const char* df_opcodes[] = {"fild", "unknown-df", "unknown-df", "unknown-df", "unknown-df", "fildll", "unknown-df", "unknown-df"};
+    modrm_opcodes = df_opcodes;
+    load = true;
+    has_modrm = true;
+    reg_is_opcode = true;
+    break;
   case 0xE8: opcode << "call"; branch_bytes = 4; break;
   case 0xE9: opcode << "jmp"; branch_bytes = 4; break;
   case 0xEB: opcode << "jmp"; branch_bytes = 1; break;