SIMD and-not for x86/x86_64
Rationale:
Break-out CL of ART Vectorizer.
Enables and-not optimization.
Bug: 34083438
Test: assembler_x86[_64]_test
Change-Id: I8fa61d88f9f014973b0d9707d39be56a7f995db8
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6a57f45..0a6ceef 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1169,6 +1169,32 @@
}
+void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x55);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::andnps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x55);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pandn(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xDF);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e3c123c..9d7ca77 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -487,6 +487,10 @@
void andps(XmmRegister dst, const Address& src);
void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void andnps(XmmRegister dst, XmmRegister src);
+ void pandn(XmmRegister dst, XmmRegister src);
+
void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 110d0dc..52c7507 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -581,6 +581,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
+TEST_F(AssemblerX86Test, AndnPD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86Test, AndnPS) {
+ DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86Test, PAndn) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
TEST_F(AssemblerX86Test, OrPD) {
DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 688fdcc..d7fed5b 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1375,6 +1375,32 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x55);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x55);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xDF);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 480e711..93c24b8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -515,6 +515,10 @@
void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pand(XmmRegister dst, XmmRegister src);
+ void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void andnps(XmmRegister dst, XmmRegister src);
+ void pandn(XmmRegister dst, XmmRegister src);
+
void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index ba011c9..9d62fd1 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1269,6 +1269,18 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
+TEST_F(AssemblerX86_64Test, andnpd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86_64Test, andnps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86_64Test, Pandn) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
TEST_F(AssemblerX86_64Test, Orps) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
}