SIMD cmpeq for x86/x86_64
Rationale:
Break-out CL of ART Vectorizer.
Enables fast all-ones optimization.
Bug: 34083438
Test: assembler_x86[_64]_test
Change-Id: I70bd71305f2ecc322ccada5471c197a578c0526e
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 0a6ceef..5307dc0 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1221,6 +1221,43 @@
}
+void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x74);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x75);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x76);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x29);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 9d7ca77..f52cf16 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -495,6 +495,11 @@
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pcmpeqb(XmmRegister dst, XmmRegister src);
+ void pcmpeqw(XmmRegister dst, XmmRegister src);
+ void pcmpeqd(XmmRegister dst, XmmRegister src);
+ void pcmpeqq(XmmRegister dst, XmmRegister src);
+
void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 52c7507..2304907 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -605,6 +605,22 @@
DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86Test, PCmpeqB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq");
+}
+
TEST_F(AssemblerX86Test, ShufPS) {
DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d7fed5b..d20a696 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1427,6 +1427,43 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x74);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x75);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x76);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x29);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 93c24b8..08e17e8 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -523,6 +523,11 @@
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pcmpeqb(XmmRegister dst, XmmRegister src);
+ void pcmpeqw(XmmRegister dst, XmmRegister src);
+ void pcmpeqd(XmmRegister dst, XmmRegister src);
+ void pcmpeqq(XmmRegister dst, XmmRegister src);
+
void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 9d62fd1..20062fd 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1293,6 +1293,22 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86_64Test, PCmpeqb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq");
+}
+
TEST_F(AssemblerX86_64Test, Shufps) {
DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}