Inline codegen for long-to-double on ARM.
Change-Id: I4fc443c1b942a2231d680fc2c7a1530c86104584
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 395c788..b06ebcf 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -241,7 +241,7 @@
kArmFirst = 0,
kArm16BitData = kArmFirst, // DATA [0] rd[15..0].
kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0].
- kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+ kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0].
kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0].
kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0].
kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0].
@@ -326,20 +326,23 @@
kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
+ kThumb2VmlaF64, // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
kThumb2VcvtIF, // vcvt.F32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtID, // vcvt.F64 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
+ kThumb2VcvtF64S32, // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
+ kThumb2VcvtF64U32, // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
kThumb2MovI8M, // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
- kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]*/
- kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]*/
+ kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
+ kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 820b3aa..00939ec 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -457,6 +457,10 @@
kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
"vdivd", "!0S, !1S, !2S", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VmlaF64, 0xee000b00,
+ kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
+ "vmla", "!0S, !1S, !2S", 4, kFixupNone),
ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0,
kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -481,6 +485,14 @@
kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
"vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VcvtF64S32, 0xeeb80bc0,
+ kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VcvtF64U32, 0xeeb80b40,
+ kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0,
kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 8af9cdd..1a9d9c5 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -141,9 +141,24 @@
case Instruction::DOUBLE_TO_INT:
op = kThumb2VcvtDI;
break;
- case Instruction::LONG_TO_DOUBLE:
- GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
+ case Instruction::LONG_TO_DOUBLE: {
+ rl_src = LoadValueWide(rl_src, kFPReg);
+ src_reg = S2d(rl_src.low_reg, rl_src.high_reg);
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ // TODO: clean up AllocTempDouble so that its result has the double bits set.
+ int tmp1 = AllocTempDouble();
+ int tmp2 = AllocTempDouble();
+
+ NewLIR2(kThumb2VcvtF64S32, tmp1 | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE) + 1);
+ NewLIR2(kThumb2VcvtF64U32, S2d(rl_result.low_reg, rl_result.high_reg), (src_reg & ~ARM_FP_DOUBLE));
+ LoadConstantWide(tmp2, tmp2 + 1, 0x41f0000000000000LL);
+ NewLIR3(kThumb2VmlaF64, S2d(rl_result.low_reg, rl_result.high_reg), tmp1 | ARM_FP_DOUBLE,
+ tmp2 | ARM_FP_DOUBLE);
+ FreeTemp(tmp1);
+ FreeTemp(tmp2);
+ StoreValueWide(rl_dest, rl_result);
return;
+ }
case Instruction::FLOAT_TO_LONG:
GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
return;