diff options
author | 2019-03-08 15:08:17 +0530 | |
---|---|---|
committer | 2019-04-25 09:05:09 +0000 | |
commit | 20d1c942c0e841920eac92f68c6d3e7f2a2135b4 (patch) | |
tree | 931aed7f4639a4b0a6ab8e3cd7765295a1883be3 /compiler/utils/x86/assembler_x86.cc | |
parent | d32f8aadd37aab5b89ffccc86f7d8d07447a213a (diff) |
Patch supports Intel(R) AVX/AVX2 MOV Instruction
This patch enhances the existing ART-Compiler
to generate Intel(R) AVX/AVX2 MOV Instructions for
doing SIMD Operations on Intel(R) Architecture CPUs.
It also provides the framework for AVX/AVX2 Instruction
encoding and dissassembly
BUG: 127881558
Test: run-test gtest
Change-Id: I9386aecc134941a2d907f9ec6b2d5522ec5ff8b5
Diffstat (limited to 'compiler/utils/x86/assembler_x86.cc')
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 659 |
1 files changed, 525 insertions, 134 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 4b073bde0b..01eb160fa7 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -59,96 +59,11 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) { } } -uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { - uint8_t vex_zero = 0xC0; - if (!is_two_byte) { - vex_zero |= 0xC4; - } else { - vex_zero |= 0xC5; +bool X86Assembler::CpuHasAVXorAVX2FeatureFlag() { + if (has_AVX_ || has_AVX2_) { + return true; } - return vex_zero; -} - -uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { - // VEX Byte 1 - uint8_t vex_prefix = 0; - if (!r) { - vex_prefix |= 0x80; // VEX.R - } - if (!x) { - vex_prefix |= 0x40; // VEX.X - } - if (!b) { - vex_prefix |= 0x20; // VEX.B - } - - // VEX.mmmmm - switch (mmmmm) { - case 1: - // implied 0F leading opcode byte - vex_prefix |= 0x01; - break; - case 2: - // implied leading 0F 38 opcode byte - vex_prefix |= 0x02; - break; - case 3: - // implied leading OF 3A opcode byte - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown opcode bytes"; - } - return vex_prefix; -} - -uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { - uint8_t vex_prefix = 0; - // VEX Byte 2 - if (w) { - vex_prefix |= 0x80; - } - // VEX.vvvv - if (operand.IsXmmRegister()) { - XmmRegister vvvv = operand.AsXmmRegister(); - int inverted_reg = 15-static_cast<int>(vvvv); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } else if (operand.IsCpuRegister()) { - Register vvvv = operand.AsCpuRegister(); - int inverted_reg = 15 - static_cast<int>(vvvv); - uint8_t reg = static_cast<uint8_t>(inverted_reg); - vex_prefix |= ((reg & 0x0F) << 3); - } - - // VEX.L - if (l == 256) { - vex_prefix |= 0x04; - } - - // VEX.pp - switch (pp) { - case 0: - // SIMD Pefix - None - vex_prefix |= 0x00; - break; - case 1: - // SIMD Prefix - 66 - vex_prefix |= 0x01; - break; - case 2: - // SIMD Prefix - F3 - vex_prefix |= 0x02; - break; - case 3: - // SIMD Prefix - F2 - vex_prefix |= 0x03; - break; - default: - LOG(FATAL) << "unknown SIMD Prefix"; - } - - return vex_prefix; + return false; } void X86Assembler::call(Register reg) { @@ -273,15 +188,11 @@ void X86Assembler::movntl(const Address& dst, Register src) { void X86Assembler::blsi(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); - uint8_t byte_one = EmitVexByte1(/*r=*/ false, - /*x=*/ false, - /*b=*/ false, - /*mmmmm=*/ 2); - uint8_t byte_two = EmitVexByte2(/*w=*/ false, - /*l=*/ 128, - X86ManagedRegister::FromCpuRegister(dst), - /*pp=*/ 0); + uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38); + uint8_t byte_two = EmitVexPrefixByteTwo(false, + X86ManagedRegister::FromCpuRegister(dst), + SET_VEX_L_128, SET_VEX_PP_NONE); EmitUint8(byte_zero); EmitUint8(byte_one); EmitUint8(byte_two); @@ -291,15 +202,11 @@ void X86Assembler::blsi(Register dst, Register src) { void X86Assembler::blsmsk(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); - uint8_t byte_one = EmitVexByte1(/*r=*/ false, - /*x=*/ false, - /*b=*/ false, - /*mmmmm=*/ 2); - uint8_t byte_two = EmitVexByte2(/*w=*/ false, - /*l=*/ 128, - X86ManagedRegister::FromCpuRegister(dst), - /*pp=*/ 0); + uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38); + uint8_t byte_two = EmitVexPrefixByteTwo(false, + X86ManagedRegister::FromCpuRegister(dst), + SET_VEX_L_128, SET_VEX_PP_NONE); EmitUint8(byte_zero); EmitUint8(byte_one); EmitUint8(byte_two); @@ -309,15 +216,11 @@ void X86Assembler::blsmsk(Register dst, Register src) { void X86Assembler::blsr(Register dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); - uint8_t byte_one = EmitVexByte1(/*r=*/ false, - /*x=*/ false, - /*b=*/ false, - /*mmmmm=*/ 2); - uint8_t byte_two = EmitVexByte2(/*w=*/ false, - /*l=*/ 128, - X86ManagedRegister::FromCpuRegister(dst), - /*pp=*/ 0); + uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38); + uint8_t byte_two = EmitVexPrefixByteTwo(false, + X86ManagedRegister::FromCpuRegister(dst), + SET_VEX_L_128, SET_VEX_PP_NONE); EmitUint8(byte_zero); EmitUint8(byte_one); EmitUint8(byte_two); @@ -516,44 +419,165 @@ void X86Assembler::setb(Condition condition, Register dst) { void X86Assembler::movaps(XmmRegister dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovaps(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); EmitUint8(0x28); EmitXmmRegisterOperand(dst, src); } +/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2*/ +void X86Assembler::vmovaps(XmmRegister dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(byte_zero); + EmitUint8(byte_one); + /**Instruction Opcode*/ + EmitUint8(0x28); + /**Instruction Operands*/ + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::movaps(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovaps(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); EmitUint8(0x28); EmitOperand(dst, src); } +/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128*/ +void X86Assembler::vmovaps(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + /**Instruction Opcode*/ + EmitUint8(0x28); + /**Instruction Operands*/ + EmitOperand(dst, src); +} void X86Assembler::movups(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovups(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); EmitUint8(0x10); EmitOperand(dst, src); } +/**VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128*/ +void X86Assembler::vmovups(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + /*Instruction Opcode*/ + EmitUint8(0x10); + /*Instruction Operands*/ + EmitOperand(dst, src); +} void X86Assembler::movaps(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovaps(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); EmitUint8(0x29); EmitOperand(src, dst); } +/**VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1*/ +void X86Assembler::vmovaps(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + /**Instruction Opcode*/ + EmitUint8(0x29); + /**Instruction Operands*/ + EmitOperand(src, dst); +} void X86Assembler::movups(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovups(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); EmitUint8(0x11); EmitOperand(src, dst); } +/**VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1*/ +void X86Assembler::vmovups(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x11); + // Instruction Operands + EmitOperand(src, dst); +} + void X86Assembler::movss(XmmRegister dst, const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -705,6 +729,10 @@ void X86Assembler::divps(XmmRegister dst, XmmRegister src) { void X86Assembler::movapd(XmmRegister dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovapd(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -712,8 +740,32 @@ void X86Assembler::movapd(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2*/ +void X86Assembler::vmovapd(XmmRegister dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg , + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x28); + // Instruction Operands + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::movapd(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovapd(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -721,8 +773,32 @@ void X86Assembler::movapd(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128*/ +void X86Assembler::vmovapd(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x28); + // Instruction Operands + EmitOperand(dst, src); +} void X86Assembler::movupd(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovupd(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -730,8 +806,33 @@ void X86Assembler::movupd(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +/**VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128*/ +void X86Assembler::vmovupd(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix*/ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x10); + // Instruction Operands + EmitOperand(dst, src); +} + void X86Assembler::movapd(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovapd(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -739,8 +840,32 @@ void X86Assembler::movapd(const Address& dst, XmmRegister src) { EmitOperand(src, dst); } +/**VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */ +void X86Assembler::vmovapd(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.*/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x29); + // Instruction Operands + EmitOperand(src, dst); +} void X86Assembler::movupd(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovupd(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -748,6 +873,26 @@ void X86Assembler::movupd(const Address& dst, XmmRegister src) { EmitOperand(src, dst); } +/**VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */ +void X86Assembler::vmovupd(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + /**a REX prefix is necessary only if an instruction references one of the + extended registers or uses a 64-bit operand.**/ + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x11); + // Instruction Operands + EmitOperand(src, dst); +} void X86Assembler::flds(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -924,6 +1069,10 @@ void X86Assembler::divpd(XmmRegister dst, XmmRegister src) { void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovdqa(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -931,8 +1080,30 @@ void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */ +void X86Assembler::vmovdqa(XmmRegister dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x6F); + // Instruction Operands + EmitXmmRegisterOperand(dst, src); +} void X86Assembler::movdqa(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovdqa(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -940,8 +1111,30 @@ void X86Assembler::movdqa(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */ +void X86Assembler::vmovdqa(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x6F); + // Instruction Operands + EmitOperand(dst, src); +} void X86Assembler::movdqu(XmmRegister dst, const Address& src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovdqu(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); EmitUint8(0x0F); @@ -949,8 +1142,30 @@ void X86Assembler::movdqu(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +/**VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128 */ +void X86Assembler::vmovdqu(XmmRegister dst, const Address& src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_F3); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x6F); + // Instruction Operands + EmitOperand(dst, src); +} void X86Assembler::movdqa(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovdqa(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); EmitUint8(0x0F); @@ -958,8 +1173,31 @@ void X86Assembler::movdqa(const Address& dst, XmmRegister src) { EmitOperand(src, dst); } +/**VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */ +void X86Assembler::vmovdqa(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + /**Instruction VEX Prefix */ + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_66); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x7F); + // Instruction Operands + EmitOperand(src, dst); +} + void X86Assembler::movdqu(const Address& dst, XmmRegister src) { + if (CpuHasAVXorAVX2FeatureFlag()) { + vmovdqu(dst, src); + return; + } AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); EmitUint8(0x0F); @@ -967,6 +1205,24 @@ void X86Assembler::movdqu(const Address& dst, XmmRegister src) { EmitOperand(src, dst); } +/**VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */ +void X86Assembler::vmovdqu(const Address& dst, XmmRegister src) { + DCHECK(CpuHasAVXorAVX2FeatureFlag()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + // Instruction VEX Prefix + uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true); + X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86(); + uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false, + vvvv_reg, + SET_VEX_L_128, + SET_VEX_PP_F3); + EmitUint8(ByteZero); + EmitUint8(ByteOne); + // Instruction Opcode + EmitUint8(0x7F); + // Instruction Operands + EmitOperand(src, dst); +} void X86Assembler::paddb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1413,24 +1669,7 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } -void X86Assembler::andn(Register dst, Register src1, Register src2) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); - uint8_t byte_one = EmitVexByte1(/*r=*/ false, - /*x=*/ false, - /*b=*/ false, - /*mmmmm=*/ 2); - uint8_t byte_two = EmitVexByte2(/*w=*/ false, - /*l=*/ 128, - X86ManagedRegister::FromCpuRegister(src1), - /*pp=*/ 0); - EmitUint8(byte_zero); - EmitUint8(byte_one); - EmitUint8(byte_two); - // Opcode field - EmitUint8(0xF2); - EmitRegisterOperand(dst, src2); -} + void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { @@ -1475,6 +1714,24 @@ void X86Assembler::orps(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::andn(Register dst, Register src1, Register src2) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */false); + uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false, + /**X = */false, + /**B = */false, + SET_VEX_M_0F_38); + uint8_t byte_two = EmitVexPrefixByteTwo(/**W= */false, + X86ManagedRegister::FromCpuRegister(src1), + SET_VEX_L_128, + SET_VEX_PP_NONE); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xF2); + EmitRegisterOperand(dst, src2); +} void X86Assembler::por(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -3143,5 +3400,139 @@ size_t ConstantArea::AddFloat(float v) { return AddInt32(bit_cast<int32_t, float>(v)); } +uint8_t X86Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) { + /**Vex Byte 0, + Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex + Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex */ + uint8_t vex_prefix = 0xC0; + if (is_twobyte_form) { + // 2-Byte Vex + vex_prefix |= TWO_BYTE_VEX; + } else { + // 3-Byte Vex + vex_prefix |= THREE_BYTE_VEX; + } + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexPrefixByteOne(bool R, + bool X, + bool B, + int SET_VEX_M) { + /**Vex Byte 1, */ + uint8_t vex_prefix = VEX_INIT; + /** Bit[7] This bit needs to be set to '1' + otherwise the instruction is LES or LDS */ + if (!R) { + // R . + vex_prefix |= SET_VEX_R; + } + /** Bit[6] This bit needs to be set to '1' + otherwise the instruction is LES or LDS */ + if (!X) { + // X . + vex_prefix |= SET_VEX_X; + } + /** Bit[5] This bit needs to be set to '1' */ + if (!B) { + // B . + vex_prefix |= SET_VEX_B; + } + /** Bits[4:0], */ + vex_prefix |= SET_VEX_M; + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexPrefixByteOne(bool R, + X86ManagedRegister operand, + int SET_VEX_L, + int SET_VEX_PP) { + /**Vex Byte 1, */ + uint8_t vex_prefix = VEX_INIT; + /** Bit[7] This bit needs to be set to '1' + otherwise the instruction is LES or LDS */ + if (!R) { + // R . + vex_prefix |= SET_VEX_R; + } + /**Bits[6:3] - 'vvvv' the source or dest register specifier */ + if (operand.IsNoRegister()) { + vex_prefix |= 0x78; + } else if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation , + VEX.L = 0 indicates 128 bit vector operation */ + vex_prefix |= SET_VEX_L; + /** Bits[1:0] - "pp" */ + vex_prefix |= SET_VEX_PP; + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W, + X86ManagedRegister operand, + int SET_VEX_L, + int SET_VEX_PP) { + /** Vex Byte 2, */ + uint8_t vex_prefix = VEX_INIT; + /** Bit[7] This bits needs to be set to '1' with default value. + When using C4H form of VEX prefix, W value is ignored */ + if (W) { + vex_prefix |= SET_VEX_W; + } + /** Bits[6:3] - 'vvvv' the source or dest register specifier */ + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation , + VEX.L = 0 indicates 128 bit vector operation */ + vex_prefix |= SET_VEX_L; + // Bits[1:0] - "pp" + vex_prefix |= SET_VEX_PP; + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W, + int SET_VEX_L, + int SET_VEX_PP) { + /**Vex Byte 2, */ + uint8_t vex_prefix = VEX_INIT; + + /** Bit[7] This bits needs to be set to '1' with default value. + When using C4H form of VEX prefix, W value is ignored */ + if (W) { + vex_prefix |= SET_VEX_W; + } + /** Bits[6:3] - 'vvvv' the source or dest register specifier, + if unused set 1111 */ + vex_prefix |= (0x0F << 3); + + /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation , + VEX.L = 0 indicates 128 bit vector operation */ + vex_prefix |= SET_VEX_L; + + /** Bits[1:0] - "pp" */ + if (SET_VEX_PP != SET_VEX_PP_NONE) { + vex_prefix |= SET_VEX_PP; + } + return vex_prefix; +} + } // namespace x86 } // namespace art |