summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-04-03 14:09:01 -0700
committer Aart Bik <ajcbik@google.com> 2017-04-04 09:15:45 -0700
commit8939c6474a34eb6d642db8fecb8b3a5c3194e464 (patch)
treea0a2db32a5147416d796907a3977dd29e04e9328
parent08ae45625d059891754e3c3ad63a5e6cae80b96b (diff)
SIMD pcmpgtb,w,d,q for x86/x86_64
Rationale: Enables fast compare gt. Test: assembler_x86[_64]_test Change-Id: I0a069649480529f3fec2c2b100e2aaaa2cd79820
-rw-r--r--compiler/utils/x86/assembler_x86.cc37
-rw-r--r--compiler/utils/x86/assembler_x86.h5
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc16
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc37
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h5
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc16
-rw-r--r--disassembler/disassembler_x86.cc32
7 files changed, 148 insertions, 0 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 9c934b7f39..1736618363 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1276,6 +1276,43 @@ void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x64);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x65);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x66);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x37);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index b87522a017..a747cda7bd 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -503,6 +503,11 @@ class X86Assembler FINAL : public Assembler {
void pcmpeqd(XmmRegister dst, XmmRegister src);
void pcmpeqq(XmmRegister dst, XmmRegister src);
+ void pcmpgtb(XmmRegister dst, XmmRegister src);
+ void pcmpgtw(XmmRegister dst, XmmRegister src);
+ void pcmpgtd(XmmRegister dst, XmmRegister src);
+ void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2
+
void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a01eb6dc23..f75f972265 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -629,6 +629,22 @@ TEST_F(AssemblerX86Test, PCmpeqQ) {
DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq");
}
+TEST_F(AssemblerX86Test, PCmpgtB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "cmpgtb");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "cmpgtw");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtD) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "cmpgtd");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtQ) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "cmpgtq");
+}
+
TEST_F(AssemblerX86Test, ShufPS) {
DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 488c75de41..1b7a4850db 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1482,6 +1482,43 @@ void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x64);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x65);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x66);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x38);
+ EmitUint8(0x37);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index fc2b117f71..0ddc46ca44 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -531,6 +531,11 @@ class X86_64Assembler FINAL : public Assembler {
void pcmpeqd(XmmRegister dst, XmmRegister src);
void pcmpeqq(XmmRegister dst, XmmRegister src);
+ void pcmpgtb(XmmRegister dst, XmmRegister src);
+ void pcmpgtw(XmmRegister dst, XmmRegister src);
+ void pcmpgtd(XmmRegister dst, XmmRegister src);
+ void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2
+
void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 4adf210e47..e7d8401e29 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1317,6 +1317,22 @@ TEST_F(AssemblerX86_64Test, PCmpeqq) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq");
}
+TEST_F(AssemblerX86_64Test, PCmpgtb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "pcmpgtb");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "pcmpgtw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "pcmpgtd");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "pcmpgtq");
+}
+
TEST_F(AssemblerX86_64Test, Shufps) {
DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index f5c3ad20cc..e12bcec776 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -574,6 +574,20 @@ DISASSEMBLER_ENTRY(cmp,
load = true;
src_reg_file = dst_reg_file = SSE;
break;
+ case 0x29:
+ opcode1 = "pcmpeqq";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
+ case 0x39:
+ opcode1 = "pcmpgtq";
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ src_reg_file = dst_reg_file = SSE;
+ break;
case 0x40:
opcode1 = "pmulld";
prefix[2] = 0;
@@ -737,6 +751,24 @@ DISASSEMBLER_ENTRY(cmp,
load = true;
has_modrm = true;
break;
+ case 0x64:
+ case 0x65:
+ case 0x66:
+ if (prefix[2] == 0x66) {
+ src_reg_file = dst_reg_file = SSE;
+ prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode
+ } else {
+ src_reg_file = dst_reg_file = MMX;
+ }
+ switch (*instr) {
+ case 0x64: opcode1 = "pcmpgtb"; break;
+ case 0x65: opcode1 = "pcmpgtw"; break;
+ case 0x66: opcode1 = "pcmpgtd"; break;
+ }
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ break;
case 0x6E:
if (prefix[2] == 0x66) {
dst_reg_file = SSE;