diff options
author | 2017-08-11 15:10:30 -0700 | |
---|---|---|
committer | 2017-08-11 15:10:30 -0700 | |
commit | 3332db8345de39eb5067d99987fcae140184672b (patch) | |
tree | 411e0be297cb288b18511bef5f4cb11c52fde546 | |
parent | 73de4a8f0936bfb8b74db0465f277a2b68d16905 (diff) |
Bunch of SIMD for x86 and x86_64
Rationale:
Few instructions needed to implement SIMD reductions.
Test: assembler_x86_[64_]test
Bug: 64091002
Change-Id: I785acfc6c8c4ad4f290ddeab32da9b767f944e24
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 36 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 5 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 16 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 52 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 6 | ||||
-rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 23 | ||||
-rw-r--r-- | disassembler/disassembler_x86.cc | 5 |
7 files changed, 143 insertions, 0 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index b50f1af8f9..b89af10749 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1606,6 +1606,42 @@ void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { } +void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x68); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x69); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6A); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6D); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); AssemblerBuffer::EnsureCapacity ensured(&buffer_); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 8578340ea7..511eeb9973 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -546,6 +546,11 @@ class X86Assembler FINAL : public Assembler { void punpckldq(XmmRegister dst, XmmRegister src); void punpcklqdq(XmmRegister dst, XmmRegister src); + void punpckhbw(XmmRegister dst, XmmRegister src); + void punpckhwd(XmmRegister dst, XmmRegister src); + void punpckhdq(XmmRegister dst, XmmRegister src); + void punpckhqdq(XmmRegister dst, XmmRegister src); + void psllw(XmmRegister reg, const Immediate& shift_count); void pslld(XmmRegister reg, const Immediate& shift_count); void psllq(XmmRegister reg, const Immediate& shift_count); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 3e1244ed5d..d2122db3fa 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -777,6 +777,22 @@ TEST_F(AssemblerX86Test, Punpcklqdq) { DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); } +TEST_F(AssemblerX86Test, Punpckhbw) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw"); +} + +TEST_F(AssemblerX86Test, Punpckhwd) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd"); +} + +TEST_F(AssemblerX86Test, Punpckhdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq"); +} + +TEST_F(AssemblerX86Test, Punpckhqdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq"); +} + TEST_F(AssemblerX86Test, psllw) { GetAssembler()->psllw(x86::XMM0, CreateImmediate(16)); DriverStr("psllw $0x10, %xmm0\n", "psllwi"); diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ea69a1c9be..3bff67d2f2 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1835,6 +1835,46 @@ void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x68); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x69); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6A); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6D); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { DCHECK(shift_count.is_uint8()); AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1931,6 +1971,18 @@ void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { } +void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); +} + + void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 41450bff4f..3dab235d1c 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -574,6 +574,11 @@ class X86_64Assembler FINAL : public Assembler { void punpckldq(XmmRegister dst, XmmRegister src); void punpcklqdq(XmmRegister dst, XmmRegister src); + void punpckhbw(XmmRegister dst, XmmRegister src); + void punpckhwd(XmmRegister dst, XmmRegister src); + void punpckhdq(XmmRegister dst, XmmRegister src); + void punpckhqdq(XmmRegister dst, XmmRegister src); + void psllw(XmmRegister reg, const Immediate& shift_count); void pslld(XmmRegister reg, const Immediate& shift_count); void psllq(XmmRegister reg, const Immediate& shift_count); @@ -585,6 +590,7 @@ class X86_64Assembler FINAL : public Assembler { void psrlw(XmmRegister reg, const Immediate& shift_count); void psrld(XmmRegister reg, const Immediate& shift_count); void psrlq(XmmRegister reg, const Immediate& shift_count); + void psrldq(XmmRegister reg, const Immediate& shift_count); void flds(const Address& src); void fstps(const Address& dst); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index ec14e7a825..651d326f03 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1465,6 +1465,22 @@ TEST_F(AssemblerX86_64Test, Punpcklqdq) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); } +TEST_F(AssemblerX86_64Test, Punpckhbw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw"); +} + +TEST_F(AssemblerX86_64Test, Punpckhwd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd"); +} + +TEST_F(AssemblerX86_64Test, Punpckhdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq"); +} + +TEST_F(AssemblerX86_64Test, Punpckhqdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq"); +} + TEST_F(AssemblerX86_64Test, Psllw) { GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); @@ -1521,6 +1537,13 @@ TEST_F(AssemblerX86_64Test, Psrlq) { "psrlq $2, %xmm15\n", "pslrqi"); } +TEST_F(AssemblerX86_64Test, Psrldq) { + GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrldq $1, %xmm0\n" + "psrldq $2, %xmm15\n", "pslrdqi"); +} + TEST_F(AssemblerX86_64Test, UcomissAddress) { GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 4824f70a28..bbc8e370ea 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -792,6 +792,7 @@ DISASSEMBLER_ENTRY(cmp, src_reg_file = dst_reg_file = SSE; break; case 0x60: case 0x61: case 0x62: case 0x6C: + case 0x68: case 0x69: case 0x6A: case 0x6D: if (prefix[2] == 0x66) { src_reg_file = dst_reg_file = SSE; prefix[2] = 0; // Clear prefix now. It has served its purpose as part of the opcode. @@ -803,6 +804,10 @@ DISASSEMBLER_ENTRY(cmp, case 0x61: opcode1 = "punpcklwd"; break; case 0x62: opcode1 = "punpckldq"; break; case 0x6c: opcode1 = "punpcklqdq"; break; + case 0x68: opcode1 = "punpckhbw"; break; + case 0x69: opcode1 = "punpckhwd"; break; + case 0x6A: opcode1 = "punpckhdq"; break; + case 0x6D: opcode1 = "punpckhqdq"; break; } load = true; has_modrm = true; |