summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-03-31 15:11:53 -0700
committer Aart Bik <ajcbik@google.com> 2017-03-31 15:11:53 -0700
commit67d3fd77d1572e46f537dea2fd4ded3ecfd7c202 (patch)
tree168e7ddf85cbe0710266dc501dac6d7717f25cf8
parent5b92c48f99391ae764e1699a22881f9d5cbce721 (diff)
SIMD pavgb,w for x86/x86_64
Rationale: Break-out CL of ART Vectorizer. Enables fast halving add with rounding Bug: 34083438 Test: assembler_x86[_64]_test Change-Id: I09173376b803d671a6b05a33e630f45f778cea52
-rw-r--r--compiler/utils/x86/assembler_x86.cc18
-rw-r--r--compiler/utils/x86/assembler_x86.h3
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc8
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc18
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc8
-rw-r--r--disassembler/disassembler_x86.cc16
7 files changed, 74 insertions, 0 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5307dc09d9..9c934b7f39 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1221,6 +1221,24 @@ void X86Assembler::por(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE0);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE3);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index f52cf16c8b..b87522a017 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -495,6 +495,9 @@ class X86Assembler FINAL : public Assembler {
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pavgw(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 23049079e0..a01eb6dc23 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -605,6 +605,14 @@ TEST_F(AssemblerX86Test, POr) {
DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86Test, PAvgB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86Test, PAvgW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
TEST_F(AssemblerX86Test, PCmpeqB) {
DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d20a6965c3..488c75de41 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1427,6 +1427,24 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE0);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE3);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 08e17e81e5..fc2b117f71 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -523,6 +523,9 @@ class X86_64Assembler FINAL : public Assembler {
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pavgw(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 20062fdb07..4adf210e47 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1293,6 +1293,14 @@ TEST_F(AssemblerX86_64Test, Por) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86_64Test, Pavgb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86_64Test, Pavgw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
TEST_F(AssemblerX86_64Test, PCmpeqb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
}
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index 77ed3c6a22..f5c3ad20cc 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -1101,6 +1101,22 @@ DISASSEMBLER_ENTRY(cmp,
opcode1 = opcode_tmp.c_str();
}
break;
+ case 0xE0:
+ case 0xE3:
+ if (prefix[2] == 0x66) {
+ src_reg_file = dst_reg_file = SSE;
+ prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode
+ } else {
+ src_reg_file = dst_reg_file = MMX;
+ }
+ switch (*instr) {
+ case 0xE0: opcode1 = "pavgb"; break;
+ case 0xE3: opcode1 = "pavgw"; break;
+ }
+ prefix[2] = 0;
+ has_modrm = true;
+ load = true;
+ break;
case 0xEB:
if (prefix[2] == 0x66) {
src_reg_file = dst_reg_file = SSE;