Implement GenInlinedSqrt for x86
Implemented this using the hardware instruction, which handles
NaN properly.
Tested manually using host mode.
Change-Id: I082aa20041c933ae5fc78f12ddf491d1c775c683
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 96dc6ee..e4e345c 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -288,6 +288,8 @@
{ kX86PsrlqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 2, 0, 1 }, "PsrlqRI", "!0r,!1d" },
{ kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1 }, "PsllqRI", "!0r,!1d" },
+ { kX86SqrtsdRR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0xF2, 0, 0x0F, 0x51, 0, 0, 0, 0 }, "SqrtsdRR", "!0r,!1r" },
+ { kX86FSTPdM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0 }, "FstpdM", "[!0r,!1d]" },
EXT_0F_ENCODING_MAP(Movdxr, 0x66, 0x6E, REG_DEF0),
{ kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index c9d6bfc..1731703 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -369,8 +369,14 @@
}
bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) {
- DCHECK_NE(cu_->instruction_set, kThumb2);
- return false;
+ RegLocation rl_src = info->args[0];
+ RegLocation rl_dest = InlineTargetWide(info); // double place for result
+ rl_src = LoadValueWide(rl_src, kFPReg);
+ RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
+ NewLIR2(kX86SqrtsdRR, S2d(rl_result.low_reg, rl_result.high_reg),
+ S2d(rl_src.low_reg, rl_src.high_reg));
+ StoreValueWide(rl_dest, rl_result);
+ return true;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 5fe76fe..ca5a234 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -348,6 +348,8 @@
Binary0fOpCode(kX86Divss), // float divide
kX86PsrlqRI, // right shift of floating point registers
kX86PsllqRI, // left shift of floating point registers
+ kX86SqrtsdRR, // sqrt of floating point register
+ kX86FSTPdM, // Store and pop top x87 fp stack
Binary0fOpCode(kX86Movdxr), // move into xmm from gpr
kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm
kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 6a67079..b1d031a 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -176,6 +176,7 @@
// points->pCmplDouble = NULL; // Not needed on x86.
// points->pCmplFloat = NULL; // Not needed on x86.
qpoints->pFmod = art_quick_fmod;
+ // qpoints->pSqrt = NULL; // Not needed on x86.
qpoints->pL2d = art_quick_l2d;
qpoints->pFmodf = art_quick_fmodf;
qpoints->pL2f = art_quick_l2f;