Add support for long-to-double in the optimizing compiler.
- Add support for the long-to-double Dex instruction in the
optimizing compiler.
- Enable requests of temporary FPU (double) registers during
code generation.
- Fix art::x86::X86Assembler::LoadLongConstant and extend
it to int64_t values.
- Have art::x86_64::X86_64Assembler::cvtsi2sd work with
64-bit operands.
- Generate x86, x86-64 and ARM (but not ARM64) code for
long to double HTypeConversion nodes.
- Add related tests to test/422-type-conversion.
Change-Id: Ie73d9e5e25bd2e15f585c371e8fc2dcb83438ccd
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index b261460..35360fe 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -1034,6 +1034,11 @@
break;
}
+ case Instruction::LONG_TO_DOUBLE: {
+ Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble);
+ break;
+ }
+
case Instruction::INT_TO_BYTE: {
Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte);
break;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0b59327..6cacd4f 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -228,7 +228,8 @@
DCHECK(!blocked_fpu_registers_[loc.reg()]);
blocked_fpu_registers_[loc.reg()] = true;
} else {
- DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister);
+ DCHECK(loc.GetPolicy() == Location::kRequiresRegister
+ || loc.GetPolicy() == Location::kRequiresFpuRegister);
}
}
@@ -259,10 +260,21 @@
for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
Location loc = locations->GetTemp(i);
if (loc.IsUnallocated()) {
- DCHECK_EQ(loc.GetPolicy(), Location::kRequiresRegister);
- // TODO: Adjust handling of temps. We currently consider temps to use
- // core registers. They may also use floating point registers at some point.
- loc = AllocateFreeRegister(Primitive::kPrimInt);
+ switch (loc.GetPolicy()) {
+ case Location::kRequiresRegister:
+ // Allocate a core register (large enough to fit a 32-bit integer).
+ loc = AllocateFreeRegister(Primitive::kPrimInt);
+ break;
+
+ case Location::kRequiresFpuRegister:
+ // Allocate a core register (large enough to fit a 64-bit double).
+ loc = AllocateFreeRegister(Primitive::kPrimDouble);
+ break;
+
+ default:
+ LOG(FATAL) << "Unexpected policy for temporary location "
+ << loc.GetPolicy();
+ }
locations->SetTempAt(i, loc);
}
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 890cfdd..22a8590 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1481,6 +1481,14 @@
break;
case Primitive::kPrimLong:
+ // Processing a Dex `long-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
@@ -1645,7 +1653,41 @@
break;
}
- case Primitive::kPrimLong:
+ case Primitive::kPrimLong: {
+ // Processing a Dex `long-to-double' instruction.
+ Register low = in.AsRegisterPairLow<Register>();
+ Register high = in.AsRegisterPairHigh<Register>();
+ SRegister out_s = out.AsFpuRegisterPairLow<SRegister>();
+ DRegister out_d = FromLowSToD(out_s);
+ Register constant_low = locations->GetTemp(0).As<Register>();
+ Register constant_high = locations->GetTemp(1).As<Register>();
+ SRegister temp_s = locations->GetTemp(2).AsFpuRegisterPairLow<SRegister>();
+ DRegister temp_d = FromLowSToD(temp_s);
+
+ // Binary encoding of 2^32 for type double.
+ const uint64_t c = UINT64_C(0x41F0000000000000);
+
+ // out_d = int-to-double(high)
+ __ vmovsr(out_s, high);
+ __ vcvtdi(out_d, out_s);
+ // Using vmovd to load the `c` constant as an immediate
+ // value into `temp_d` does not work, as this instruction
+ // only transfers 8 significant bits of its immediate
+ // operand. Instead, use two 32-bit core registers to
+ // load `c` into `temp_d`.
+ __ LoadImmediate(constant_low, Low32Bits(c));
+ __ LoadImmediate(constant_high, High32Bits(c));
+ __ vmovdrr(temp_d, constant_low, constant_high);
+ // out_d = out_d * 2^32
+ __ vmuld(out_d, out_d, temp_d);
+ // temp_d = unsigned-to-double(low)
+ __ vmovsr(temp_s, low);
+ __ vcvtdu(temp_d, temp_s);
+ // out_d = out_d + temp_d
+ __ vaddd(out_d, out_d, temp_d);
+ break;
+ }
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3689452..c482885 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1429,6 +1429,13 @@
break;
case Primitive::kPrimLong:
+ // Processing a Dex `long-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
@@ -1608,7 +1615,37 @@
__ cvtsi2sd(out.As<XmmRegister>(), in.As<Register>());
break;
- case Primitive::kPrimLong:
+ case Primitive::kPrimLong: {
+ // Processing a Dex `long-to-double' instruction.
+ Register low = in.AsRegisterPairLow<Register>();
+ Register high = in.AsRegisterPairHigh<Register>();
+ XmmRegister result = out.As<XmmRegister>();
+ XmmRegister temp = locations->GetTemp(0).As<XmmRegister>();
+ XmmRegister constant = locations->GetTemp(1).As<XmmRegister>();
+
+ // Binary encoding of 2^32 for type double.
+ const int64_t c1 = INT64_C(0x41F0000000000000);
+ // Binary encoding of 2^31 for type double.
+ const int64_t c2 = INT64_C(0x41E0000000000000);
+
+ // low = low - 2^31 (to prevent bit 31 of `low` to be
+ // interpreted as a sign bit)
+ __ subl(low, Immediate(0x80000000));
+ // temp = int-to-double(high)
+ __ cvtsi2sd(temp, high);
+ // temp = temp * 2^32
+ __ LoadLongConstant(constant, c1);
+ __ mulsd(temp, constant);
+ // result = int-to-double(low)
+ __ cvtsi2sd(result, low);
+ // result = result + 2^31 (restore the original value of `low`)
+ __ LoadLongConstant(constant, c2);
+ __ addsd(result, constant);
+ // result = result + temp
+ __ addsd(result, temp);
+ break;
+ }
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 34fa1e7..63938f3 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1430,6 +1430,11 @@
break;
case Primitive::kPrimLong:
+ // Processing a Dex `long-to-double' instruction.
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
@@ -1609,10 +1614,14 @@
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimChar:
- __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>());
+ __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>(), false);
break;
case Primitive::kPrimLong:
+ // Processing a Dex `long-to-double' instruction.
+ __ cvtsi2sd(out.As<XmmRegister>(), in.As<CpuRegister>(), true);
+ break;
+
case Primitive::kPrimFloat:
LOG(FATAL) << "Type conversion from " << input_type
<< " to " << result_type << " not yet implemented";
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index a297ea3..dbad386 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1318,13 +1318,19 @@
}
+void X86Assembler::LoadLongConstant(XmmRegister dst, int64_t value) {
+ // TODO: Need to have a code constants table.
+ pushl(Immediate(High32Bits(value)));
+ pushl(Immediate(Low32Bits(value)));
+ movsd(dst, Address(ESP, 0));
+ addl(ESP, Immediate(2 * sizeof(int32_t)));
+}
+
+
void X86Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
// TODO: Need to have a code constants table.
int64_t constant = bit_cast<int64_t, double>(value);
- pushl(Immediate(High32Bits(constant)));
- pushl(Immediate(Low32Bits(constant)));
- movsd(dst, Address(ESP, 0));
- addl(ESP, Immediate(2 * sizeof(intptr_t)));
+ LoadLongConstant(dst, constant);
}
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 6ea66a5..0d82487 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -42,8 +42,6 @@
private:
const int32_t value_;
-
- DISALLOW_COPY_AND_ASSIGN(Immediate);
};
@@ -441,6 +439,7 @@
void AddImmediate(Register reg, const Immediate& imm);
+ void LoadLongConstant(XmmRegister dst, int64_t value);
void LoadDoubleConstant(XmmRegister dst, double value);
void DoubleNegate(XmmRegister d);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 5d8a3b1..d901673 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -16,7 +16,8 @@
#include "assembler_x86.h"
-#include "gtest/gtest.h"
+#include "base/stl_util.h"
+#include "utils/assembler_test.h"
namespace art {
@@ -29,4 +30,89 @@
ASSERT_EQ(static_cast<size_t>(5), buffer.Size());
}
+class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register,
+ x86::XmmRegister, x86::Immediate> {
+ protected:
+ std::string GetArchitectureString() OVERRIDE {
+ return "x86";
+ }
+
+ std::string GetAssemblerParameters() OVERRIDE {
+ return " --32";
+ }
+
+ std::string GetDisassembleParameters() OVERRIDE {
+ return " -D -bbinary -mi386 --no-show-raw-insn";
+ }
+
+ void SetUpHelpers() OVERRIDE {
+ if (registers_.size() == 0) {
+ registers_.insert(end(registers_),
+ { // NOLINT(whitespace/braces)
+ new x86::Register(x86::EAX),
+ new x86::Register(x86::EBX),
+ new x86::Register(x86::ECX),
+ new x86::Register(x86::EDX),
+ new x86::Register(x86::EBP),
+ new x86::Register(x86::ESP),
+ new x86::Register(x86::ESI),
+ new x86::Register(x86::EDI)
+ });
+ }
+
+ if (fp_registers_.size() == 0) {
+ fp_registers_.insert(end(fp_registers_),
+ { // NOLINT(whitespace/braces)
+ new x86::XmmRegister(x86::XMM0),
+ new x86::XmmRegister(x86::XMM1),
+ new x86::XmmRegister(x86::XMM2),
+ new x86::XmmRegister(x86::XMM3),
+ new x86::XmmRegister(x86::XMM4),
+ new x86::XmmRegister(x86::XMM5),
+ new x86::XmmRegister(x86::XMM6),
+ new x86::XmmRegister(x86::XMM7)
+ });
+ }
+ }
+
+ void TearDown() OVERRIDE {
+ AssemblerTest::TearDown();
+ STLDeleteElements(®isters_);
+ STLDeleteElements(&fp_registers_);
+ }
+
+ std::vector<x86::Register*> GetRegisters() OVERRIDE {
+ return registers_;
+ }
+
+ std::vector<x86::XmmRegister*> GetFPRegisters() OVERRIDE {
+ return fp_registers_;
+ }
+
+ x86::Immediate CreateImmediate(int64_t imm_value) OVERRIDE {
+ return x86::Immediate(imm_value);
+ }
+
+ private:
+ std::vector<x86::Register*> registers_;
+ std::vector<x86::XmmRegister*> fp_registers_;
+};
+
+
+TEST_F(AssemblerX86Test, Movl) {
+ GetAssembler()->movl(x86::EAX, x86::EBX);
+ const char* expected = "mov %ebx, %eax\n";
+ DriverStr(expected, "movl");
+}
+
+TEST_F(AssemblerX86Test, LoadLongConstant) {
+ GetAssembler()->LoadLongConstant(x86::XMM0, 51);
+ const char* expected =
+ "push $0x0\n"
+ "push $0x33\n"
+ "movsd 0(%esp), %xmm0\n"
+ "add $8, %esp\n";
+ DriverStr(expected, "LoadLongConstant");
+}
+
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index dff3849..381ee33 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -603,9 +603,19 @@
void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
+ cvtsi2sd(dst, src, false);
+}
+
+
+void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF2);
- EmitOptionalRex32(dst, src);
+ if (is64bit) {
+ // Emit a REX.W prefix if the operand size is 64 bits.
+ EmitRex64(dst, src);
+ } else {
+ EmitOptionalRex32(dst, src);
+ }
EmitUint8(0x0F);
EmitUint8(0x2A);
EmitOperand(dst.LowBits(), Operand(src));
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index ab1bc9e..9f31818 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -330,6 +330,7 @@
void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
void cvtsi2sd(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
+ void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
void cvtss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
void cvtss2sd(XmmRegister dst, XmmRegister src);