Introduce a number of SIMD extensions for x86/x86_64 (SSE).
Rationale:
As a first step exploring how useful an ART vectorizer may be,
introducing a number of floating-point SIMD instructions.
Test: assembler_x86[_64]_test
Bug: 34083438
Change-Id: I0285dd9fca51f31875a6bbe728f873c48089940d
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index cd30872..d3b15ac 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -350,6 +350,38 @@
}
+void X86Assembler::movaps(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x28);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movups(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x10);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movaps(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x29);
+ EmitOperand(src, dst);
+}
+
+
+void X86Assembler::movups(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x11);
+ EmitOperand(src, dst);
+}
+
+
void X86Assembler::movss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -467,6 +499,83 @@
}
+void X86Assembler::addps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::subps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::mulps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x59);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::divps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5E);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::movapd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x28);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::movapd(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x28);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movupd(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x10);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movapd(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x29);
+ EmitOperand(src, dst);
+}
+
+
+void X86Assembler::movupd(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x11);
+ EmitOperand(src, dst);
+}
+
+
void X86Assembler::flds(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xD9);
@@ -638,6 +747,42 @@
}
+void X86Assembler::addpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::subpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::mulpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x59);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::divpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x5E);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 114986b..a93616c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -371,7 +371,12 @@
void setb(Condition condition, Register dst);
- void movaps(XmmRegister dst, XmmRegister src);
+ void movaps(XmmRegister dst, XmmRegister src); // move
+ void movaps(XmmRegister dst, const Address& src); // load aligned
+ void movups(XmmRegister dst, const Address& src); // load unaligned
+ void movaps(const Address& dst, XmmRegister src); // store aligned
+ void movups(const Address& dst, XmmRegister src); // store unaligned
+
void movss(XmmRegister dst, const Address& src);
void movss(const Address& dst, XmmRegister src);
void movss(XmmRegister dst, XmmRegister src);
@@ -388,6 +393,17 @@
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
+ void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void subps(XmmRegister dst, XmmRegister src);
+ void mulps(XmmRegister dst, XmmRegister src);
+ void divps(XmmRegister dst, XmmRegister src);
+
+ void movapd(XmmRegister dst, XmmRegister src); // move
+ void movapd(XmmRegister dst, const Address& src); // load aligned
+ void movupd(XmmRegister dst, const Address& src); // load unaligned
+ void movapd(const Address& dst, XmmRegister src); // store aligned
+ void movupd(const Address& dst, XmmRegister src); // store unaligned
+
void movsd(XmmRegister dst, const Address& src);
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
@@ -409,6 +425,11 @@
void divsd(XmmRegister dst, XmmRegister src);
void divsd(XmmRegister dst, const Address& src);
+ void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void subpd(XmmRegister dst, XmmRegister src);
+ void mulpd(XmmRegister dst, XmmRegister src);
+ void divpd(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 9bae6c2..4d60a12 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -423,6 +423,98 @@
DriverStr(expected, "TestlAddressImmediate");
}
+TEST_F(AssemblerX86Test, Movaps) {
+ DriverStr(RepeatFF(&x86::X86Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps");
+}
+
+TEST_F(AssemblerX86Test, MovapsAddr) {
+ GetAssembler()->movaps(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movaps(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movaps 0x4(%ESP), %xmm0\n"
+ "movaps %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movaps_address");
+}
+
+TEST_F(AssemblerX86Test, MovupsAddr) {
+ GetAssembler()->movups(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movups(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movups 0x4(%ESP), %xmm0\n"
+ "movups %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movups_address");
+}
+
+TEST_F(AssemblerX86Test, Movapd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::movapd, "movapd %{reg2}, %{reg1}"), "movapd");
+}
+
+TEST_F(AssemblerX86Test, MovapdAddr) {
+ GetAssembler()->movapd(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movapd(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movapd 0x4(%ESP), %xmm0\n"
+ "movapd %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movapd_address");
+}
+
+TEST_F(AssemblerX86Test, MovupdAddr) {
+ GetAssembler()->movupd(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movupd(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movupd 0x4(%ESP), %xmm0\n"
+ "movupd %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movupd_address");
+}
+
+TEST_F(AssemblerX86Test, AddPS) {
+ GetAssembler()->addps(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "addps %xmm1, %xmm0\n";
+ DriverStr(expected, "addps");
+}
+
+TEST_F(AssemblerX86Test, AddPD) {
+ GetAssembler()->addpd(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "addpd %xmm1, %xmm0\n";
+ DriverStr(expected, "addpd");
+}
+
+TEST_F(AssemblerX86Test, SubPS) {
+ GetAssembler()->subps(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "subps %xmm1, %xmm0\n";
+ DriverStr(expected, "subps");
+}
+
+TEST_F(AssemblerX86Test, SubPD) {
+ GetAssembler()->subpd(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "subpd %xmm1, %xmm0\n";
+ DriverStr(expected, "subpd");
+}
+
+TEST_F(AssemblerX86Test, MulPS) {
+ GetAssembler()->mulps(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "mulps %xmm1, %xmm0\n";
+ DriverStr(expected, "mulps");
+}
+
+TEST_F(AssemblerX86Test, MulPD) {
+ GetAssembler()->mulpd(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "mulpd %xmm1, %xmm0\n";
+ DriverStr(expected, "mulpd");
+}
+
+TEST_F(AssemblerX86Test, DivPS) {
+ GetAssembler()->divps(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "divps %xmm1, %xmm0\n";
+ DriverStr(expected, "divps");
+}
+
+TEST_F(AssemblerX86Test, DivPD) {
+ GetAssembler()->divpd(x86::XmmRegister(x86::XMM0), x86::XmmRegister(x86::XMM1));
+ const char* expected = "divpd %xmm1, %xmm0\n";
+ DriverStr(expected, "divpd");
+}
+
/////////////////
// Near labels //
/////////////////