Inline codegen for long-to-double on ARM.
Change-Id: I4fc443c1b942a2231d680fc2c7a1530c86104584
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 395c788..b06ebcf 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -241,7 +241,7 @@
kArmFirst = 0,
kArm16BitData = kArmFirst, // DATA [0] rd[15..0].
kThumbAdcRR, // adc [0100000101] rm[5..3] rd[2..0].
- kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+ kThumbAddRRI3, // add(1) [0001110] imm_3[8..6] rn[5..3] rd[2..0].
kThumbAddRI8, // add(2) [00110] rd[10..8] imm_8[7..0].
kThumbAddRRR, // add(3) [0001100] rm[8..6] rn[5..3] rd[2..0].
kThumbAddRRLH, // add(4) [01000100] H12[01] rm[5..3] rd[2..0].
@@ -326,20 +326,23 @@
kThumb2Vaddd, // vadd vd, vn, vm [111011100011] rn[19..16] rd[15-12] [10110000] rm[3..0].
kThumb2Vdivs, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10100000] rm[3..0].
kThumb2Vdivd, // vdiv vd, vn, vm [111011101000] rn[19..16] rd[15-12] [10110000] rm[3..0].
+ kThumb2VmlaF64, // vmla.F64 vd, vn, vm [111011100000] vn[19..16] vd[15..12] [10110000] vm[3..0].
kThumb2VcvtIF, // vcvt.F32 vd, vm [1110111010111000] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtID, // vcvt.F64 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
kThumb2VcvtFI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtDI, // vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] [10111100] vm[3..0].
kThumb2VcvtFd, // vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] [10101100] vm[3..0].
kThumb2VcvtDF, // vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] [10111100] vm[3..0].
+ kThumb2VcvtF64S32, // vcvt.F64.S32 vd, vm [1110111010111000] vd[15..12] [10111100] vm[3..0].
+ kThumb2VcvtF64U32, // vcvt.F64.U32 vd, vm [1110111010111000] vd[15..12] [10110100] vm[3..0].
kThumb2Vsqrts, // vsqrt.f32 vd, vm [1110111010110001] vd[15..12] [10101100] vm[3..0].
kThumb2Vsqrtd, // vsqrt.f64 vd, vm [1110111010110001] vd[15..12] [10111100] vm[3..0].
kThumb2MovI8M, // mov(T2) rd, #<const> [11110] i [00001001111] imm3 rd[11..8] imm8.
kThumb2MovImm16, // mov(T3) rd, #<const> [11110] i [0010100] imm4 [0] imm3 rd[11..8] imm8.
kThumb2StrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
kThumb2LdrRRI12, // str(Imm,T3) rd,[rn,#imm12] [111110001100] rn[19..16] rt[15..12] imm12[11..0].
- kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0]*/
- kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0]*/
+ kThumb2StrRRI8Predec, // str(Imm,T4) rd,[rn,#-imm8] [111110000100] rn[19..16] rt[15..12] [1100] imm[7..0].
+ kThumb2LdrRRI8Predec, // ldr(Imm,T4) rd,[rn,#-imm8] [111110000101] rn[19..16] rt[15..12] [1100] imm[7..0].
kThumb2Cbnz, // cbnz rd,<label> [101110] i [1] imm5[7..3] rn[2..0].
kThumb2Cbz, // cbn rd,<label> [101100] i [1] imm5[7..3] rn[2..0].
kThumb2AddRRI12, // add rd, rn, #imm12 [11110] i [100000] rn[19..16] [0] imm3[14..12] rd[11..8] imm8[7..0].
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index 820b3aa..00939ec 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -457,6 +457,10 @@
kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
"vdivd", "!0S, !1S, !2S", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VmlaF64, 0xee000b00,
+ kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+ kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE012,
+ "vmla", "!0S, !1S, !2S", 4, kFixupNone),
ENCODING_MAP(kThumb2VcvtIF, 0xeeb80ac0,
kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
@@ -481,6 +485,14 @@
kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
"vcvt.f32.f64 ", "!0s, !1S", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VcvtF64S32, 0xeeb80bc0,
+ kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "vcvt.f64.s32 ", "!0S, !1s", 4, kFixupNone),
+ ENCODING_MAP(kThumb2VcvtF64U32, 0xeeb80b40,
+ kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+ kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+ "vcvt.f64.u32 ", "!0S, !1s", 4, kFixupNone),
ENCODING_MAP(kThumb2Vsqrts, 0xeeb10ac0,
kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 8af9cdd..1a9d9c5 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -141,9 +141,24 @@
case Instruction::DOUBLE_TO_INT:
op = kThumb2VcvtDI;
break;
- case Instruction::LONG_TO_DOUBLE:
- GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pL2d), rl_dest, rl_src);
+ case Instruction::LONG_TO_DOUBLE: {
+ rl_src = LoadValueWide(rl_src, kFPReg);
+ src_reg = S2d(rl_src.low_reg, rl_src.high_reg);
+ rl_result = EvalLoc(rl_dest, kFPReg, true);
+ // TODO: clean up AllocTempDouble so that its result has the double bits set.
+ int tmp1 = AllocTempDouble();
+ int tmp2 = AllocTempDouble();
+
+ NewLIR2(kThumb2VcvtF64S32, tmp1 | ARM_FP_DOUBLE, (src_reg & ~ARM_FP_DOUBLE) + 1);
+ NewLIR2(kThumb2VcvtF64U32, S2d(rl_result.low_reg, rl_result.high_reg), (src_reg & ~ARM_FP_DOUBLE));
+ LoadConstantWide(tmp2, tmp2 + 1, 0x41f0000000000000LL);
+ NewLIR3(kThumb2VmlaF64, S2d(rl_result.low_reg, rl_result.high_reg), tmp1 | ARM_FP_DOUBLE,
+ tmp2 | ARM_FP_DOUBLE);
+ FreeTemp(tmp1);
+ FreeTemp(tmp2);
+ StoreValueWide(rl_dest, rl_result);
return;
+ }
case Instruction::FLOAT_TO_LONG:
GenConversionCall(QUICK_ENTRYPOINT_OFFSET(pF2l), rl_dest, rl_src);
return;
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 71f70c4..68626f6 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -683,7 +683,7 @@
uint32_t coproc = (instr >> 8) & 0xF;
uint32_t op4 = (instr >> 4) & 0x1;
- if (coproc == 10 || coproc == 11) { // 101x
+ if (coproc == 0xA || coproc == 0xB) { // 101x
if (op3 < 0x20 && (op3 & ~5) != 0) { // 0xxxxx and not 000x0x
// Extension register load/store instructions
// |1111|110|00000|0000|1111|110|0|00000000|
@@ -708,6 +708,11 @@
opcode << (L == 1 ? "vldr" : "vstr");
args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-")
<< (imm8 << 2) << "]";
+ if (Rn.r == 15 && U == 1) {
+ intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr);
+ lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2);
+ args << StringPrintf(" ; 0x%llx", *reinterpret_cast<int64_t*>(lit_adr));
+ }
} else if (Rn.r == 13 && W == 1 && U == L) { // VPUSH/VPOP
opcode << (L == 1 ? "vpop" : "vpush");
args << FpRegisterRange(instr);
diff --git a/test/107-int-math2/src/Main.java b/test/107-int-math2/src/Main.java
index d737ff5..1ce4a04 100644
--- a/test/107-int-math2/src/Main.java
+++ b/test/107-int-math2/src/Main.java
@@ -297,6 +297,24 @@
l = -5678956789L;
i = (int) l;
if (i != -1383989493) { return 4; }
+
+ /* long --> double */
+ l = 0x7FFFFFFFL;
+ d = (double) l;
+ if (Double.doubleToRawLongBits(d) != 0x41dfffffffc00000L) { return 5; }
+
+ l = 0xFFFFFFFFL;
+ d = (double) l;
+ if (Double.doubleToRawLongBits(d) != 0x41efffffffe00000L) { return 6; }
+
+ l = 0x7FFFFFFFFFFFFFFFL;
+ d = (double) l;
+ if (Double.doubleToRawLongBits(d) != 0x43e0000000000000L) { return 7; }
+
+ l = 0xFFFFFFFFFFFFFFFFL;
+ d = (double) l;
+ if (Double.doubleToRawLongBits(d) != 0xbff0000000000000L) { return 8; }
+
return 0;
}