Bunch of SIMD for x86 and x86_64
Rationale:
Few instructions needed to implement SIMD reductions.
Test: assembler_x86_[64_]test
Bug: 64091002
Change-Id: I785acfc6c8c4ad4f290ddeab32da9b767f944e24
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b50f1af..b89af10 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1606,6 +1606,42 @@
}
+void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x68);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x69);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6A);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
DCHECK(shift_count.is_uint8());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8578340..511eeb9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -546,6 +546,11 @@
void punpckldq(XmmRegister dst, XmmRegister src);
void punpcklqdq(XmmRegister dst, XmmRegister src);
+ void punpckhbw(XmmRegister dst, XmmRegister src);
+ void punpckhwd(XmmRegister dst, XmmRegister src);
+ void punpckhdq(XmmRegister dst, XmmRegister src);
+ void punpckhqdq(XmmRegister dst, XmmRegister src);
+
void psllw(XmmRegister reg, const Immediate& shift_count);
void pslld(XmmRegister reg, const Immediate& shift_count);
void psllq(XmmRegister reg, const Immediate& shift_count);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 3e1244e..d2122db 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -777,6 +777,22 @@
DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
}
+TEST_F(AssemblerX86Test, Punpckhbw) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86Test, Punpckhwd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckhdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86Test, Punpckhqdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
TEST_F(AssemblerX86Test, psllw) {
GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
DriverStr("psllw $0x10, %xmm0\n", "psllwi");
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ea69a1c..3bff67d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1835,6 +1835,46 @@
}
+void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x68);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x69);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
DCHECK(shift_count.is_uint8());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1931,6 +1971,18 @@
}
+void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(3, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86_64Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 41450bf..3dab235 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -574,6 +574,11 @@
void punpckldq(XmmRegister dst, XmmRegister src);
void punpcklqdq(XmmRegister dst, XmmRegister src);
+ void punpckhbw(XmmRegister dst, XmmRegister src);
+ void punpckhwd(XmmRegister dst, XmmRegister src);
+ void punpckhdq(XmmRegister dst, XmmRegister src);
+ void punpckhqdq(XmmRegister dst, XmmRegister src);
+
void psllw(XmmRegister reg, const Immediate& shift_count);
void pslld(XmmRegister reg, const Immediate& shift_count);
void psllq(XmmRegister reg, const Immediate& shift_count);
@@ -585,6 +590,7 @@
void psrlw(XmmRegister reg, const Immediate& shift_count);
void psrld(XmmRegister reg, const Immediate& shift_count);
void psrlq(XmmRegister reg, const Immediate& shift_count);
+ void psrldq(XmmRegister reg, const Immediate& shift_count);
void flds(const Address& src);
void fstps(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index ec14e7a..651d326 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1465,6 +1465,22 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
}
+TEST_F(AssemblerX86_64Test, Punpckhbw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhwd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhqdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
TEST_F(AssemblerX86_64Test, Psllw) {
GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
@@ -1521,6 +1537,13 @@
"psrlq $2, %xmm15\n", "pslrqi");
}
+TEST_F(AssemblerX86_64Test, Psrldq) {
+ GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrldq $1, %xmm0\n"
+ "psrldq $2, %xmm15\n", "pslrdqi");
+}
+
TEST_F(AssemblerX86_64Test, UcomissAddress) {
GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));