Added a few idiomatic x86 SSE instructions.
Test: test-art-host-gtest-assembler_x86[_64]_test
Change-Id: I4f98cb6c9be82f1cb62276ee9331734b86111b5c
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index bef32f8..b50f1af 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1238,6 +1238,101 @@
EmitXmmRegisterOperand(dst, src);
}
+
+void X86Assembler::psadbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF6);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xF5);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::phaddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x01);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::phaddd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x02);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::haddps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF2);
+ EmitUint8(0x0F);
+ EmitUint8(0x7C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::haddpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x7C);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::phsubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x05);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::phsubd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x06);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::hsubps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF2);
+ EmitUint8(0x0F);
+ EmitUint8(0x7D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x7D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::pminsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index c4bb9ee..8578340 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -497,6 +497,16 @@
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void psadbw(XmmRegister dst, XmmRegister src);
+ void pmaddwd(XmmRegister dst, XmmRegister src);
+ void phaddw(XmmRegister dst, XmmRegister src);
+ void phaddd(XmmRegister dst, XmmRegister src);
+ void haddps(XmmRegister dst, XmmRegister src);
+ void haddpd(XmmRegister dst, XmmRegister src);
+ void phsubw(XmmRegister dst, XmmRegister src);
+ void phsubd(XmmRegister dst, XmmRegister src);
+ void hsubps(XmmRegister dst, XmmRegister src);
+ void hsubpd(XmmRegister dst, XmmRegister src);
void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pmaxsb(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 34f2a47..3e1244e 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -613,6 +613,46 @@
DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86Test, PSadBW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::psadbw, "psadbw %{reg2}, %{reg1}"), "psadbw");
+}
+
+TEST_F(AssemblerX86Test, PMAddWD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pmaddwd, "pmaddwd %{reg2}, %{reg1}"), "pmaddwd");
+}
+
+TEST_F(AssemblerX86Test, PHAddW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
+}
+
+TEST_F(AssemblerX86Test, PHAddD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::phaddd, "phaddd %{reg2}, %{reg1}"), "phaddd");
+}
+
+TEST_F(AssemblerX86Test, HAddPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::haddps, "haddps %{reg2}, %{reg1}"), "haddps");
+}
+
+TEST_F(AssemblerX86Test, HAddPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::haddpd, "haddpd %{reg2}, %{reg1}"), "haddpd");
+}
+
+TEST_F(AssemblerX86Test, PHSubW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::phsubw, "phsubw %{reg2}, %{reg1}"), "phsubw");
+}
+
+TEST_F(AssemblerX86Test, PHSubD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::phsubd, "phsubd %{reg2}, %{reg1}"), "phsubd");
+}
+
+TEST_F(AssemblerX86Test, HSubPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::hsubps, "hsubps %{reg2}, %{reg1}"), "hsubps");
+}
+
+TEST_F(AssemblerX86Test, HSubPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::hsubpd, "hsubpd %{reg2}, %{reg1}"), "hsubpd");
+}
+
TEST_F(AssemblerX86Test, PMinSB) {
DriverStr(RepeatFF(&x86::X86Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 82d1174..ea69a1c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1445,6 +1445,100 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF6);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xF5);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x01);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x02);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF2);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x7C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x7C);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x05);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x06);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF2);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x7D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x7D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 6e584fe..41450bf 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -525,6 +525,16 @@
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
+ void psadbw(XmmRegister dst, XmmRegister src);
+ void pmaddwd(XmmRegister dst, XmmRegister src);
+ void phaddw(XmmRegister dst, XmmRegister src);
+ void phaddd(XmmRegister dst, XmmRegister src);
+ void haddps(XmmRegister dst, XmmRegister src);
+ void haddpd(XmmRegister dst, XmmRegister src);
+ void phsubw(XmmRegister dst, XmmRegister src);
+ void phsubd(XmmRegister dst, XmmRegister src);
+ void hsubps(XmmRegister dst, XmmRegister src);
+ void hsubpd(XmmRegister dst, XmmRegister src);
void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pmaxsb(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index b574003..ec14e7a 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1301,6 +1301,46 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
}
+TEST_F(AssemblerX86_64Test, Psadbw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::psadbw, "psadbw %{reg2}, %{reg1}"), "psadbw");
+}
+
+TEST_F(AssemblerX86_64Test, Pmaddwd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmaddwd, "pmaddwd %{reg2}, %{reg1}"), "pmadwd");
+}
+
+TEST_F(AssemblerX86_64Test, Phaddw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::phaddw, "phaddw %{reg2}, %{reg1}"), "phaddw");
+}
+
+TEST_F(AssemblerX86_64Test, Phaddd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::phaddd, "phaddd %{reg2}, %{reg1}"), "phaddd");
+}
+
+TEST_F(AssemblerX86_64Test, Haddps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::haddps, "haddps %{reg2}, %{reg1}"), "haddps");
+}
+
+TEST_F(AssemblerX86_64Test, Haddpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::haddpd, "haddpd %{reg2}, %{reg1}"), "haddpd");
+}
+
+TEST_F(AssemblerX86_64Test, Phsubw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::phsubw, "phsubw %{reg2}, %{reg1}"), "phsubw");
+}
+
+TEST_F(AssemblerX86_64Test, Phsubd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::phsubd, "phsubd %{reg2}, %{reg1}"), "phsubd");
+}
+
+TEST_F(AssemblerX86_64Test, Hsubps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::hsubps, "hsubps %{reg2}, %{reg1}"), "hsubps");
+}
+
+TEST_F(AssemblerX86_64Test, Hsubpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::hsubpd, "hsubpd %{reg2}, %{reg1}"), "hsubpd");
+}
+
TEST_F(AssemblerX86_64Test, Pminsb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pminsb, "pminsb %{reg2}, %{reg1}"), "pminsb");
}