Added a few integral SIMD extensions for x86/x86_64 (SSE).
Rationale:
ART vectorizer needs SIMD for integer operations too.
Test: assembler_x86[_64]_test
Bug: 34083438
Change-Id: Id6fec558c617d38cb643839eafcd10e59dcd6e0a
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index a24d49e..5a466e1 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -783,6 +783,79 @@
}
+void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6F);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::movdqa(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6F);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movdqu(XmmRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0x6F);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::movdqa(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x7F);
+ EmitOperand(src, dst);
+}
+
+
+void X86Assembler::movdqu(const Address& dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0x7F);
+ EmitOperand(src, dst);
+}
+
+
+void X86Assembler::paddd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFE);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::psubd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xFA);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x40);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
@@ -990,10 +1063,27 @@
}
-void X86Assembler::andps(XmmRegister dst, XmmRegister src) {
+void X86Assembler::xorps(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
- EmitUint8(0x54);
+ EmitUint8(0x57);
+ EmitOperand(dst, src);
+}
+
+
+void X86Assembler::xorps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x57);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pxor(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEF);
EmitXmmRegisterOperand(dst, src);
}
@@ -1007,35 +1097,19 @@
}
-void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
+void X86Assembler::andpd(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
- EmitUint8(0x56);
- EmitXmmRegisterOperand(dst, src);
-}
-
-
-void X86Assembler::xorps(XmmRegister dst, const Address& src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0x57);
+ EmitUint8(0x54);
EmitOperand(dst, src);
}
-void X86Assembler::orps(XmmRegister dst, XmmRegister src) {
+void X86Assembler::andps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
- EmitUint8(0x56);
- EmitXmmRegisterOperand(dst, src);
-}
-
-
-void X86Assembler::xorps(XmmRegister dst, XmmRegister src) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- EmitUint8(0x0F);
- EmitUint8(0x57);
+ EmitUint8(0x54);
EmitXmmRegisterOperand(dst, src);
}
@@ -1048,12 +1122,38 @@
}
-void X86Assembler::andpd(XmmRegister dst, const Address& src) {
+void X86Assembler::pand(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
- EmitUint8(0x54);
- EmitOperand(dst, src);
+ EmitUint8(0xDB);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x56);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::orps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x56);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::por(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xEB);
+ EmitXmmRegisterOperand(dst, src);
}
@@ -1076,6 +1176,16 @@
}
+void X86Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x70);
+ EmitXmmRegisterOperand(dst, src);
+ EmitUint8(imm.value());
+}
+
+
void X86Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4056ca6..4343e2e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -430,6 +430,16 @@
void mulpd(XmmRegister dst, XmmRegister src);
void divpd(XmmRegister dst, XmmRegister src);
+ void movdqa(XmmRegister dst, XmmRegister src); // move
+ void movdqa(XmmRegister dst, const Address& src); // load aligned
+ void movdqu(XmmRegister dst, const Address& src); // load unaligned
+ void movdqa(const Address& dst, XmmRegister src); // store aligned
+ void movdqu(const Address& dst, XmmRegister src); // store unaligned
+
+ void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubd(XmmRegister dst, XmmRegister src);
+ void pmulld(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
@@ -463,17 +473,21 @@
void xorpd(XmmRegister dst, XmmRegister src);
void xorps(XmmRegister dst, const Address& src);
void xorps(XmmRegister dst, XmmRegister src);
+ void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void andpd(XmmRegister dst, XmmRegister src);
void andpd(XmmRegister dst, const Address& src);
void andps(XmmRegister dst, XmmRegister src);
void andps(XmmRegister dst, const Address& src);
+ void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now)
- void orpd(XmmRegister dst, XmmRegister src);
+ void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void orps(XmmRegister dst, XmmRegister src);
+ void por(XmmRegister dst, XmmRegister src);
void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void flds(const Address& src);
void fstps(const Address& dst);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 1768d8b..c6ab893 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -467,6 +467,28 @@
DriverStr(expected, "movupd_address");
}
+TEST_F(AssemblerX86Test, Movdqa) {
+ DriverStr(RepeatFF(&x86::X86Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movdqa");
+}
+
+TEST_F(AssemblerX86Test, MovdqaAddr) {
+ GetAssembler()->movdqa(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movdqa(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movdqa 0x4(%ESP), %xmm0\n"
+ "movdqa %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movdqa_address");
+}
+
+TEST_F(AssemblerX86Test, MovdquAddr) {
+ GetAssembler()->movdqu(x86::XmmRegister(x86::XMM0), x86::Address(x86::Register(x86::ESP), 4));
+ GetAssembler()->movdqu(x86::Address(x86::Register(x86::ESP), 2), x86::XmmRegister(x86::XMM1));
+ const char* expected =
+ "movdqu 0x4(%ESP), %xmm0\n"
+ "movdqu %xmm1, 0x2(%ESP)\n";
+ DriverStr(expected, "movdqu_address");
+}
+
TEST_F(AssemblerX86Test, AddPS) {
DriverStr(RepeatFF(&x86::X86Assembler::addps, "addps %{reg2}, %{reg1}"), "addps");
}
@@ -499,6 +521,54 @@
DriverStr(RepeatFF(&x86::X86Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd");
}
+TEST_F(AssemblerX86Test, PAddD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
+}
+
+TEST_F(AssemblerX86Test, PSubD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psubd, "psubd %{reg2}, %{reg1}"), "psubd");
+}
+
+TEST_F(AssemblerX86Test, PMullD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
+}
+
+TEST_F(AssemblerX86Test, XorPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd");
+}
+
+TEST_F(AssemblerX86Test, XorPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::xorps, "xorps %{reg2}, %{reg1}"), "xorps");
+}
+
+TEST_F(AssemblerX86Test, PXor) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pxor, "pxor %{reg2}, %{reg1}"), "pxor");
+}
+
+TEST_F(AssemblerX86Test, AndPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd");
+}
+
+TEST_F(AssemblerX86Test, AndPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::andps, "andps %{reg2}, %{reg1}"), "andps");
+}
+
+TEST_F(AssemblerX86Test, PAnd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
+}
+
+TEST_F(AssemblerX86Test, OrPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
+}
+
+TEST_F(AssemblerX86Test, OrPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
+}
+
+TEST_F(AssemblerX86Test, POr) {
+ DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
+}
+
TEST_F(AssemblerX86Test, ShufPS) {
DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}
@@ -507,6 +577,10 @@
DriverStr(RepeatFFI(&x86::X86Assembler::shufpd, 1, "shufpd ${imm}, %{reg2}, %{reg1}"), "shufpd");
}
+TEST_F(AssemblerX86Test, PShufD) {
+ DriverStr(RepeatFFI(&x86::X86Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
+}
+
/////////////////
// Near labels //
/////////////////