Add support for vex coding scheme in x86 assembler
This patch adds support to emit VEX prefix which is needed
to emit instructions namely andn, blsmsk, blsr, blsi
on a cpu that has AVX2.
Test: ./test.py --host --64, test-art-host-gtest
Change-Id: I6b4902caf8560e4406c5053b142686ed28ba5404
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 86f9010..2d1e451 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -59,6 +59,98 @@
}
}
+uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
+ // VEX Byte 1
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B
+ }
+
+ // VEX.mmmmm
+ switch (mmmmm) {
+ case 1:
+ // implied 0F leading opcode byte
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // implied leading 0F 38 opcode byte
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // implied leading OF 3A opcode byte
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
+ uint8_t vex_prefix = 0;
+ // VEX Byte 2
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+ // VEX.vvvv
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86Assembler::call(Register reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xFF);
@@ -179,6 +271,60 @@
EmitOperand(src, dst);
}
+void X86Assembler::blsi(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(3, src);
+}
+
+void X86Assembler::blsmsk(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(2, src);
+}
+
+void X86Assembler::blsr(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(1, src);
+}
+
void X86Assembler::bswapl(Register dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1267,6 +1413,25 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::andn(Register dst, Register src1, Register src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(src1),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xF2);
+ EmitRegisterOperand(dst, src2);
+}
+
void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);