summaryrefslogtreecommitdiff
path: root/compiler/utils/x86/assembler_x86.cc
diff options
context:
space:
mode:
author Gupta Kumar, Sanjiv <sanjiv.kumar.gupta@intel.com> 2018-06-29 13:06:35 +0530
committer Gupta Kumar, Sanjiv <sanjiv.kumar.gupta@intel.com> 2018-07-02 15:37:38 +0530
commit61908880e6565acfadbafe93fa64de000014f1a6 (patch)
tree40b535db9175f3d959364d5bc30eaab4e2c4b4c4 /compiler/utils/x86/assembler_x86.cc
parentb5271dd44a30f498689e503340d3c8d01bf31f07 (diff)
Emit vector mulitply and accumulate instructions for x86.
This patch adds a new cpu vaiant named kabylake and performs instruction simplification to generate VectorMulitplyAccumulate. Test: ./test.py --host --64 Change-Id: Ie6cc882dadf1322dd4d3ae49bfdb600b0c447765 Signed-off-by: Gupta Kumar, Sanjiv <sanjiv.kumar.gupta@intel.com>
Diffstat (limited to 'compiler/utils/x86/assembler_x86.cc')
-rw-r--r--compiler/utils/x86/assembler_x86.cc145
1 files changed, 145 insertions, 0 deletions
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 86f9010ea3..c2ce03b1f2 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -525,6 +525,58 @@ void X86Assembler::divss(XmmRegister dst, const Address& src) {
EmitOperand(dst, src);
}
+void X86Assembler::vfmadd231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmsub231ps(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(false, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmadd231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xB8);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
+void X86Assembler::vfmsub231pd(XmmRegister acc, XmmRegister mul_left, XmmRegister mul_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexByte1(false, false, false, 2);
+ uint8_t byte_two = EmitVexByte2(true, 128, X86ManagedRegister::FromXmmRegister(mul_left), 1);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field.
+ EmitUint8(0xBA);
+ EmitXmmRegisterOperand(acc, mul_right);
+}
+
void X86Assembler::addps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -2898,6 +2950,99 @@ void X86Assembler::EmitLabelLink(NearLabel* label) {
}
+uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
+ // VEX Byte 1.
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R .
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X .
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B .
+ }
+
+ // VEX.mmmmm.
+ switch (mmmmm) {
+ case 1:
+ // Implied 0F leading opcode byte.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // Implied leading 0F 38 opcode byte.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // Implied leading OF 3A opcode byte.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
+ uint8_t vex_prefix = 0;
+ // VEX Byte 2.
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+
+ // VEX.vvvv.
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L.
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp.
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None.
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66.
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3.
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2.
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86Assembler::EmitGenericShift(int reg_or_opcode,
const Operand& operand,
const Immediate& imm) {