SIMD type conversion for x86
Rationale:
Break-out CL of ART Vectorizer: 1 OF many
Needed to demonstrate that same-length
type conversions easily fit the model.
Bug: 34083438
Test: assembler_x86[_64]_test
Change-Id: Ib9959a7e18485ab1629978c61032fcda16792951
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6eab302..6a57f45 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -958,6 +958,14 @@
}
+void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5B);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2999599..e3c123c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -457,6 +457,7 @@
void cvttss2si(Register dst, XmmRegister src);
void cvttsd2si(Register dst, XmmRegister src);
+ void cvtdq2ps(XmmRegister dst, XmmRegister src);
void cvtdq2pd(XmmRegister dst, XmmRegister src);
void comiss(XmmRegister a, XmmRegister b);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a74bea2..110d0dc 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -322,6 +322,14 @@
DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
}
+TEST_F(AssemblerX86Test, Cvtdq2ps) {
+ DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
+TEST_F(AssemblerX86Test, Cvtdq2pd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
+}
+
TEST_F(AssemblerX86Test, ComissAddr) {
GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
const char* expected = "comiss 0(%EAX), %xmm0\n";
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 458204a..688fdcc 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1153,6 +1153,15 @@
}
+void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0dc11d8..480e711 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -486,6 +486,7 @@
void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
+ void cvtdq2ps(XmmRegister dst, XmmRegister src);
void cvtdq2pd(XmmRegister dst, XmmRegister src);
void comiss(XmmRegister a, XmmRegister b);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index fe94497..ba011c9 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1205,6 +1205,10 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss");
}
+TEST_F(AssemblerX86_64Test, Cvtdq2ps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
TEST_F(AssemblerX86_64Test, Cvtdq2pd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
}