diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/dex/quick/arm64/fp_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 15 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/fp_x86.cc | 127 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 7 |
5 files changed, 125 insertions, 27 deletions
diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 265e8d2020..9814cb4a7a 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -45,7 +45,6 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest case Instruction::REM_FLOAT_2ADDR: case Instruction::REM_FLOAT: FlushAllRegs(); // Send everything to home location - // TODO: Fix xSELF. CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2, false); rl_result = GetReturn(kFPReg); @@ -89,7 +88,6 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, case Instruction::REM_DOUBLE_2ADDR: case Instruction::REM_DOUBLE: FlushAllRegs(); // Send everything to home location - // TODO: Fix xSELF. { ThreadOffset<8> helper_offset = QUICK_ENTRYPOINT_OFFSET(8, pFmod); RegStorage r_tgt = CallHelperSetup(helper_offset); diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index c7e289d704..3f54798b7e 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -407,10 +407,17 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86PslldRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 6, 0, 1, false }, "PslldRI", "!0r,!1d" }, { kX86PsllqRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x73, 0, 6, 0, 1, false }, "PsllqRI", "!0r,!1d" }, - { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" }, - { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" }, - { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "FstpsM", "[!0r,!1d]" }, - { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "FstpdM", "[!0r,!1d]" }, + { kX86Fild32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDB, 0x00, 0, 0, 0, 0, false }, "Fild32M", "[!0r,!1d]" }, + { kX86Fild64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDF, 0x00, 0, 5, 0, 0, false }, "Fild64M", "[!0r,!1d]" }, + { kX86Fld32M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 0, 0, 0, false }, "Fld32M", "[!0r,!1d]" }, + { kX86Fld64M, kMem, IS_LOAD | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 0, 0, 0, false }, "Fld64M", "[!0r,!1d]" }, + { kX86Fstp32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xD9, 0x00, 0, 3, 0, 0, false }, "Fstps32M", "[!0r,!1d]" }, + { kX86Fstp64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 3, 0, 0, false }, "Fstpd64M", "[!0r,!1d]" }, + { kX86Fst32M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xD9, 0x00, 0, 2, 0, 0, false }, "Fsts32M", "[!0r,!1d]" }, + { kX86Fst64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0, { 0x0, 0, 0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M", "[!0r,!1d]" }, + { kX86Fprem, kNullary, NO_OPERAND | USE_FP_STACK, { 0xD9, 0, 0xF8, 0, 0, 0, 0, 0, false }, "Fprem64", "" }, + { kX86Fucompp, kNullary, NO_OPERAND | USE_FP_STACK, { 0xDA, 0, 0xE9, 0, 0, 0, 0, 0, false }, "Fucompp", "" }, + { kX86Fstsw16R, kNullary, NO_OPERAND, { 0x9B, 0xDF, 0xE0, 0, 0, 0, 0, 0, false }, "Fstsw16R", "ax" }, EXT_0F_ENCODING_MAP(Mova128, 0x66, 0x6F, REG_DEF0), { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" }, diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 3540843705..d874aaa829 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -148,6 +148,7 @@ class X86Mir2Lir : public Mir2Lir { RegLocation rl_src2); void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); + void GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double); void GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index 61623d0051..458f9c6691 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -48,16 +48,7 @@ void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode, break; case Instruction::REM_FLOAT_2ADDR: case Instruction::REM_FLOAT: - FlushAllRegs(); // Send everything to home location - if (cu_->target64) { - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2, - false); - } else { - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, - false); - } - rl_result = GetReturn(kFPReg); - StoreValue(rl_dest, rl_result); + GenRemFP(rl_dest, rl_src1, rl_src2, false /* is_double */); return; case Instruction::NEG_FLOAT: GenNegFloat(rl_dest, rl_src1); @@ -110,16 +101,7 @@ void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode, break; case Instruction::REM_DOUBLE_2ADDR: case Instruction::REM_DOUBLE: - FlushAllRegs(); // Send everything to home location - if (cu_->target64) { - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2, - false); - } else { - CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, - false); - } - rl_result = GetReturnWide(kFPReg); - StoreValueWide(rl_dest, rl_result); + GenRemFP(rl_dest, rl_src1, rl_src2, true /* is_double */); return; case Instruction::NEG_DOUBLE: GenNegDouble(rl_dest, rl_src1); @@ -356,6 +338,110 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, } } +void X86Mir2Lir::GenRemFP(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_double) { + // Compute offsets to the source and destination VRs on stack. + int src1_v_reg_offset = SRegOffset(rl_src1.s_reg_low); + int src2_v_reg_offset = SRegOffset(rl_src2.s_reg_low); + int dest_v_reg_offset = SRegOffset(rl_dest.s_reg_low); + + // Update the in-register state of sources. + rl_src1 = is_double ? UpdateLocWide(rl_src1) : UpdateLoc(rl_src1); + rl_src2 = is_double ? UpdateLocWide(rl_src2) : UpdateLoc(rl_src2); + + // All memory accesses below reference dalvik regs. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + + // If the source is in physical register, then put it in its location on stack. + if (rl_src1.location == kLocPhysReg) { + RegisterInfo* reg_info = GetRegInfo(rl_src1.reg); + + if (reg_info != nullptr && reg_info->IsTemp()) { + // Calling FlushSpecificReg because it will only write back VR if it is dirty. + FlushSpecificReg(reg_info); + // ResetDef to prevent NullifyRange from removing stores. + ResetDef(rl_src1.reg); + } else { + // It must have been register promoted if it is not a temp but is still in physical + // register. Since we need it to be in memory to convert, we place it there now. + StoreBaseDisp(TargetReg(kSp), src1_v_reg_offset, rl_src1.reg, is_double ? k64 : k32); + } + } + + if (rl_src2.location == kLocPhysReg) { + RegisterInfo* reg_info = GetRegInfo(rl_src2.reg); + if (reg_info != nullptr && reg_info->IsTemp()) { + FlushSpecificReg(reg_info); + ResetDef(rl_src2.reg); + } else { + StoreBaseDisp(TargetReg(kSp), src2_v_reg_offset, rl_src2.reg, is_double ? k64 : k32); + } + } + + int fld_opcode = is_double ? kX86Fld64M : kX86Fld32M; + + // Push the source virtual registers onto the x87 stack. + LIR *fld_2 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(), + src2_v_reg_offset + LOWORD_OFFSET); + AnnotateDalvikRegAccess(fld_2, (src2_v_reg_offset + LOWORD_OFFSET) >> 2, + true /* is_load */, is_double /* is64bit */); + + LIR *fld_1 = NewLIR2NoDest(fld_opcode, TargetReg(kSp).GetReg(), + src1_v_reg_offset + LOWORD_OFFSET); + AnnotateDalvikRegAccess(fld_1, (src1_v_reg_offset + LOWORD_OFFSET) >> 2, + true /* is_load */, is_double /* is64bit */); + + FlushReg(rs_rAX); + Clobber(rs_rAX); + LockTemp(rs_rAX); + + LIR* retry = NewLIR0(kPseudoTargetLabel); + + // Divide ST(0) by ST(1) and place result to ST(0). + NewLIR0(kX86Fprem); + + // Move FPU status word to AX. + NewLIR0(kX86Fstsw16R); + + // Check if reduction is complete. + OpRegImm(kOpAnd, rs_rAX, 0x400); + + // If no then continue to compute remainder. + LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); + branch->target = retry; + + FreeTemp(rs_rAX); + + // Now store result in the destination VR's stack location. + int displacement = dest_v_reg_offset + LOWORD_OFFSET; + int opcode = is_double ? kX86Fst64M : kX86Fst32M; + LIR *fst = NewLIR2NoDest(opcode, TargetReg(kSp).GetReg(), displacement); + AnnotateDalvikRegAccess(fst, displacement >> 2, false /* is_load */, is_double /* is64bit */); + + // Pop ST(1) and ST(0). + NewLIR0(kX86Fucompp); + + /* + * The result is in a physical register if it was in a temp or was register + * promoted. For that reason it is enough to check if it is in physical + * register. If it is, then we must do all of the bookkeeping necessary to + * invalidate temp (if needed) and load in promoted register (if needed). + * If the result's location is in memory, then we do not need to do anything + * more since the fstp has already placed the correct value in memory. + */ + RegLocation rl_result = is_double ? UpdateLocWideTyped(rl_dest, kFPReg) : + UpdateLocTyped(rl_dest, kFPReg); + if (rl_result.location == kLocPhysReg) { + rl_result = EvalLoc(rl_dest, kFPReg, true); + if (is_double) { + LoadBaseDisp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg, k64); + StoreFinalValueWide(rl_dest, rl_result); + } else { + Load32Disp(TargetReg(kSp), dest_v_reg_offset, rl_result.reg); + StoreFinalValue(rl_dest, rl_result); + } + } +} + void X86Mir2Lir::GenCmpFP(Instruction::Code code, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { bool single = (code == Instruction::CMPL_FLOAT) || (code == Instruction::CMPG_FLOAT); @@ -502,5 +588,4 @@ bool X86Mir2Lir::GenInlinedSqrt(CallInfo* info) { } - } // namespace art diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index f1b5811a33..28b9dca193 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -572,8 +572,15 @@ enum X86OpCode { kX86PsllqRI, // left shift of floating point registers 64 bits x 2 kX86Fild32M, // push 32-bit integer on x87 stack kX86Fild64M, // push 64-bit integer on x87 stack + kX86Fld32M, // push float on x87 stack + kX86Fld64M, // push double on x87 stack kX86Fstp32M, // pop top x87 fp stack and do 32-bit store kX86Fstp64M, // pop top x87 fp stack and do 64-bit store + kX86Fst32M, // do 32-bit store + kX86Fst64M, // do 64-bit store + kX86Fprem, // remainder from dividing of two floating point values + kX86Fucompp, // compare floating point values and pop x87 fp stack twice + kX86Fstsw16R, // store FPU status word Binary0fOpCode(kX86Mova128), // move 128 bits aligned kX86Mova128MR, kX86Mova128AR, // store 128 bit aligned from xmm1 to m128 Binary0fOpCode(kX86Movups), // load unaligned packed single FP values from xmm2/m128 to xmm1 |