diff options
author | 2018-06-29 13:06:35 +0530 | |
---|---|---|
committer | 2018-07-02 15:37:38 +0530 | |
commit | 61908880e6565acfadbafe93fa64de000014f1a6 (patch) | |
tree | 40b535db9175f3d959364d5bc30eaab4e2c4b4c4 /compiler/utils/x86/assembler_x86.cc | |
parent | b5271dd44a30f498689e503340d3c8d01bf31f07 (diff) |
Emit vector mulitply and accumulate instructions for x86.
This patch adds a new cpu vaiant named kabylake and performs
instruction simplification to generate VectorMulitplyAccumulate.
Test: ./test.py --host --64
Change-Id: Ie6cc882dadf1322dd4d3ae49bfdb600b0c447765
Signed-off-by: Gupta Kumar, Sanjiv <sanjiv.kumar.gupta@intel.com>
Diffstat (limited to 'compiler/utils/x86/assembler_x86.cc')
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 145 |
1 files changed, 145 insertions, 0 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 86f9010ea3..c2ce03b1f2 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -525,6 +525,58 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) { EmitOperand(dst, src); } +void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xB8); + EmitXmmRegisterOperand(acc, mul_right); +} + +void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/); + uint8_t byte_one = EmitVexByte1(false, false, false, 2); + uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field. + EmitUint8(0xBA); + EmitXmmRegisterOperand(acc, mul_right); +} + void X86Assembler::addps(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -2898,6 +2950,99 @@ void X86Assembler::EmitLabelLink(NearLabel* label) { } +uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { + // VEX Byte 1. + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R . + } + if (!x) { + vex_prefix |= 0x40; // VEX.X . + } + if (!b) { + vex_prefix |= 0x20; // VEX.B . + } + + // VEX.mmmmm. + switch (mmmmm) { + case 1: + // Implied 0F leading opcode byte. + vex_prefix |= 0x01; + break; + case 2: + // Implied leading 0F 38 opcode byte. + vex_prefix |= 0x02; + break; + case 3: + // Implied leading OF 3A opcode byte. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { + uint8_t vex_prefix = 0; + // VEX Byte 2. + if (w) { + vex_prefix |= 0x80; + } + + // VEX.vvvv. + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L. + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp. + switch (pp) { + case 0: + // SIMD Pefix - None. + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66. + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3. + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2. + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86Assembler::EmitGenericShift(int reg_or_opcode, const Operand& operand, const Immediate& imm) { |