diff options
| -rw-r--r-- | src/compiler/codegen/x86/Assemble.cc | 4 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/FP/X86FP.cc | 48 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86LIR.h | 2 | ||||
| -rw-r--r-- | src/oat/runtime/x86/oat_support_entrypoints_x86.cc | 8 | ||||
| -rw-r--r-- | src/oat/runtime/x86/runtime_support_x86.S | 34 |
5 files changed, 70 insertions, 26 deletions
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc index b9605cc13b..671e72863a 100644 --- a/src/compiler/codegen/x86/Assemble.cc +++ b/src/compiler/codegen/x86/Assemble.cc @@ -262,8 +262,8 @@ ENCODING_MAP(Cmp, IS_LOAD, EXT_0F_ENCODING_MAP(Addss, 0xF3, 0x58), EXT_0F_ENCODING_MAP(Mulsd, 0xF2, 0x59), EXT_0F_ENCODING_MAP(Mulss, 0xF3, 0x59), - EXT_0F_ENCODING_MAP(Cvtss2sd, 0xF2, 0x5A), - EXT_0F_ENCODING_MAP(Cvtsd2ss, 0xF3, 0x5A), + EXT_0F_ENCODING_MAP(Cvtsd2ss, 0xF2, 0x5A), + EXT_0F_ENCODING_MAP(Cvtss2sd, 0xF3, 0x5A), EXT_0F_ENCODING_MAP(Subsd, 0xF2, 0x5C), EXT_0F_ENCODING_MAP(Subss, 0xF3, 0x5C), EXT_0F_ENCODING_MAP(Divsd, 0xF2, 0x5E), diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc index 24cd7d31e3..e6b47d2a37 100644 --- a/src/compiler/codegen/x86/FP/X86FP.cc +++ b/src/compiler/codegen/x86/FP/X86FP.cc @@ -44,8 +44,12 @@ static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, op = kX86MulssRR; break; case Instruction::NEG_FLOAT: - UNIMPLEMENTED(WARNING) << "inline fneg"; // pxor xmm, [0x80000000] - // fall-through + rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg); + rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true); + newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg); + newLIR2(cUnit, kX86SubssRR, rlResult.lowReg, rlSrc1.lowReg); + storeValue(cUnit, rlDest, rlResult); + return false; case Instruction::REM_FLOAT_2ADDR: case Instruction::REM_FLOAT: { return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); @@ -91,6 +95,12 @@ static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, op = kX86MulsdRR; break; case Instruction::NEG_DOUBLE: + rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg); + rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true); + newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlResult.lowReg); + newLIR2(cUnit, kX86SubsdRR, rlResult.lowReg, rlSrc1.lowReg); + storeValueWide(cUnit, rlDest, rlResult); + return false; case Instruction::REM_DOUBLE_2ADDR: case Instruction::REM_DOUBLE: { return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1, rlSrc2); @@ -124,9 +134,7 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir) { RegLocation rlDest; X86OpCode op = kX86Nop; int srcReg; - int tempReg; RegLocation rlResult; - LIR* branch = NULL; switch (opcode) { case Instruction::INT_TO_FLOAT: longSrc = false; @@ -152,40 +160,52 @@ static bool genConversion(CompilationUnit *cUnit, MIR *mir) { rcSrc = kCoreReg; op = kX86Cvtsi2sdRR; break; - case Instruction::FLOAT_TO_INT: + case Instruction::FLOAT_TO_INT: { rlSrc = oatGetSrc(cUnit, mir, 0); rlSrc = loadValue(cUnit, rlSrc, kFPReg); srcReg = rlSrc.lowReg; rlDest = oatGetDest(cUnit, mir, 0); oatClobberSReg(cUnit, rlDest.sRegLow); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - tempReg = oatAllocTempFloat(cUnit); + int tempReg = oatAllocTempFloat(cUnit); loadConstant(cUnit, rlResult.lowReg, 0x7fffffff); newLIR2(cUnit, kX86Cvtsi2ssRR, tempReg, rlResult.lowReg); newLIR2(cUnit, kX86ComissRR, srcReg, tempReg); - branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA); - newLIR2(cUnit, kX86Cvtss2siRR, rlResult.lowReg, srcReg); - branch->target = newLIR0(cUnit, kPseudoTargetLabel); + LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA); + LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP); + newLIR2(cUnit, kX86Cvttss2siRR, rlResult.lowReg, srcReg); + LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0); + branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel); + newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg); + branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel); + branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel); storeValue(cUnit, rlDest, rlResult); return false; - case Instruction::DOUBLE_TO_INT: + } + case Instruction::DOUBLE_TO_INT: { rlSrc = oatGetSrcWide(cUnit, mir, 0, 1); rlSrc = loadValueWide(cUnit, rlSrc, kFPReg); srcReg = rlSrc.lowReg; rlDest = oatGetDest(cUnit, mir, 0); oatClobberSReg(cUnit, rlDest.sRegLow); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - tempReg = oatAllocTempDouble(cUnit); + int tempReg = oatAllocTempDouble(cUnit); loadConstant(cUnit, rlResult.lowReg, 0x7fffffff); newLIR2(cUnit, kX86Cvtsi2sdRR, tempReg, rlResult.lowReg); newLIR2(cUnit, kX86ComisdRR, srcReg, tempReg); - branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA); - newLIR2(cUnit, kX86Cvtsd2siRR, rlResult.lowReg, srcReg); - branch->target = newLIR0(cUnit, kPseudoTargetLabel); + LIR* branchPosOverflow = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA); + LIR* branchNaN = newLIR2(cUnit, kX86Jcc8, 0, kX86CondP); + newLIR2(cUnit, kX86Cvttsd2siRR, rlResult.lowReg, srcReg); + LIR* branchNormal = newLIR1(cUnit, kX86Jmp8, 0); + branchNaN->target = newLIR0(cUnit, kPseudoTargetLabel); + newLIR2(cUnit, kX86Xor32RR, rlResult.lowReg, rlResult.lowReg); + branchPosOverflow->target = newLIR0(cUnit, kPseudoTargetLabel); + branchNormal->target = newLIR0(cUnit, kPseudoTargetLabel); storeValue(cUnit, rlDest, rlResult); return false; + } case Instruction::LONG_TO_DOUBLE: case Instruction::LONG_TO_FLOAT: // These can be implemented inline by using memory as a 64-bit source. diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h index 36e459cf29..3ec1112c3d 100644 --- a/src/compiler/codegen/x86/X86LIR.h +++ b/src/compiler/codegen/x86/X86LIR.h @@ -427,8 +427,8 @@ enum X86OpCode { Binary0fOpCode(kX86Addss), // float add Binary0fOpCode(kX86Mulsd), // double multiply Binary0fOpCode(kX86Mulss), // float multiply - Binary0fOpCode(kX86Cvtss2sd), // float to double Binary0fOpCode(kX86Cvtsd2ss), // double to float + Binary0fOpCode(kX86Cvtss2sd), // float to double Binary0fOpCode(kX86Subsd), // double subtract Binary0fOpCode(kX86Subss), // float subtract Binary0fOpCode(kX86Divsd), // double divide diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc index 605024e303..a28a898cf4 100644 --- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc +++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc @@ -67,6 +67,8 @@ extern "C" void art_lock_object_from_code(void*); extern "C" void art_unlock_object_from_code(void*); // Math entrypoints. +extern "C" double art_fmod_from_code(double, double); +extern "C" float art_fmodf_from_code(float, float); extern "C" double art_l2d_from_code(int64_t); extern "C" float art_l2f_from_code(int64_t); extern "C" int64_t art_d2l_from_code(double); @@ -74,7 +76,7 @@ extern "C" int64_t art_f2l_from_code(float); extern "C" int32_t art_idivmod_from_code(int32_t, int32_t); extern "C" int64_t art_ldiv_from_code(int64_t, int64_t); extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t); -extern "C" int64_t art_lmul_from_code(int64_t a, int64_t b); +extern "C" int64_t art_lmul_from_code(int64_t, int64_t); extern "C" uint64_t art_lshl_from_code(uint64_t, uint32_t); extern "C" uint64_t art_lshr_from_code(uint64_t, uint32_t); extern "C" uint64_t art_lushr_from_code(uint64_t, uint32_t); @@ -168,13 +170,13 @@ void InitEntryPoints(EntryPoints* points) { //points->pDmul = NULL; // Not needed on x86. //points->pDsub = NULL; // Not needed on x86. //points->pF2d = NULL; - //points->pFmod = NULL; + points->pFmod = art_fmod_from_code; //points->pI2d = NULL; points->pL2d = art_l2d_from_code; //points->pD2f = NULL; //points->pFadd = NULL; // Not needed on x86. //points->pFdiv = NULL; // Not needed on x86. - //points->pFmodf = NULL; + points->pFmodf = art_fmodf_from_code; //points->pFmul = NULL; // Not needed on x86. //points->pFsub = NULL; // Not needed on x86. //points->pI2f = NULL; diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S index 028d7ec442..d37fa5cb22 100644 --- a/src/oat/runtime/x86/runtime_support_x86.S +++ b/src/oat/runtime/x86/runtime_support_x86.S @@ -395,12 +395,34 @@ TWO_ARG_DOWNCALL art_can_put_array_element_from_code, artCanPutArrayElementFromC NO_ARG_DOWNCALL art_test_suspend, artTestSuspendFromCode, ret +DEFINE_FUNCTION art_fmod_from_code + movl %ebx, -4(%esp) // put hi arg2 into memory + movl %edx, -8(%esp) // put lo arg2 into memory + fldl -8(%esp) // push arg2 onto fp stack + movl %ecx, -4(%esp) // put hi arg1 into memory + movl %eax, -8(%esp) // put lo arg1 into memory + fldl -8(%esp) // push arg1 onto fp stack + fprem1 // calculate IEEE remainder + fstpl -8(%esp) // pop return value off fp stack + movsd -8(%esp), %xmm0 // place into %xmm0 + ret + +DEFINE_FUNCTION art_fmodf_from_code + movl %ecx, -4(%esp) // put arg2 into memory + fld -4(%esp) // push arg2 onto fp stack + movl %eax, -4(%esp) // put arg1 into memory + fld -4(%esp) // push arg1 onto fp stack + fprem1 // calculate IEEE remainder + fstp -4(%esp) // pop return value off fp stack + movss -4(%esp), %xmm0 // place into %xmm0 + ret + DEFINE_FUNCTION art_l2d_from_code pushl %eax // alignment padding pushl %ecx // pass arg2 pushl %eax // pass arg1 - call SYMBOL(art_l2d) // (jlong a, Thread*, SP) - fstpl (%esp) // get return value + call SYMBOL(art_l2d) // (jlong a, Thread*, SP) + fstpl (%esp) // pop return value off fp stack movsd (%esp), %xmm0 // place into %xmm0 addl LITERAL(12), %esp // pop arguments ret @@ -409,8 +431,8 @@ DEFINE_FUNCTION art_l2f_from_code pushl %eax // alignment padding pushl %ecx // pass arg2 pushl %eax // pass arg1 - call SYMBOL(art_l2f) // (jlong a, Thread*, SP) - fstp (%esp) // get return value + call SYMBOL(art_l2f) // (jlong a, Thread*, SP) + fstp (%esp) // pop return value off fp stack movss (%esp), %xmm0 // place into %xmm0 addl LITERAL(12), %esp // pop arguments ret @@ -419,14 +441,14 @@ DEFINE_FUNCTION art_d2l_from_code pushl %eax // alignment padding pushl %ecx // pass arg2 pushl %eax // pass arg1 - call SYMBOL(art_d2l) // (jdouble a, Thread*, SP) + call SYMBOL(art_d2l) // (jdouble a, Thread*, SP) addl LITERAL(12), %esp // pop arguments ret DEFINE_FUNCTION art_f2l_from_code subl LITERAL(8), %esp // alignment padding pushl %eax // pass arg1 - call SYMBOL(art_f2l) // (jfloat a, Thread*, SP) + call SYMBOL(art_f2l) // (jfloat a, Thread*, SP) addl LITERAL(12), %esp // pop arguments ret |