Add support for vex coding scheme in x86 assembler
This patch adds support to emit VEX prefix which is needed
to emit instructions namely andn, blsmsk, blsr, blsi
on a cpu that has AVX2.
Test: ./test.py --host --64, test-art-host-gtest
Change-Id: I6b4902caf8560e4406c5053b142686ed28ba5404
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 86f9010..2d1e451 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -59,6 +59,98 @@
}
}
+uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
+ uint8_t vex_zero = 0xC0;
+ if (!is_two_byte) {
+ vex_zero |= 0xC4;
+ } else {
+ vex_zero |= 0xC5;
+ }
+ return vex_zero;
+}
+
+uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
+ // VEX Byte 1
+ uint8_t vex_prefix = 0;
+ if (!r) {
+ vex_prefix |= 0x80; // VEX.R
+ }
+ if (!x) {
+ vex_prefix |= 0x40; // VEX.X
+ }
+ if (!b) {
+ vex_prefix |= 0x20; // VEX.B
+ }
+
+ // VEX.mmmmm
+ switch (mmmmm) {
+ case 1:
+ // implied 0F leading opcode byte
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // implied leading 0F 38 opcode byte
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // implied leading OF 3A opcode byte
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown opcode bytes";
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
+ uint8_t vex_prefix = 0;
+ // VEX Byte 2
+ if (w) {
+ vex_prefix |= 0x80;
+ }
+ // VEX.vvvv
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15-static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+
+ // VEX.L
+ if (l == 256) {
+ vex_prefix |= 0x04;
+ }
+
+ // VEX.pp
+ switch (pp) {
+ case 0:
+ // SIMD Pefix - None
+ vex_prefix |= 0x00;
+ break;
+ case 1:
+ // SIMD Prefix - 66
+ vex_prefix |= 0x01;
+ break;
+ case 2:
+ // SIMD Prefix - F3
+ vex_prefix |= 0x02;
+ break;
+ case 3:
+ // SIMD Prefix - F2
+ vex_prefix |= 0x03;
+ break;
+ default:
+ LOG(FATAL) << "unknown SIMD Prefix";
+ }
+
+ return vex_prefix;
+}
+
void X86Assembler::call(Register reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xFF);
@@ -179,6 +271,60 @@
EmitOperand(src, dst);
}
+void X86Assembler::blsi(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(3, src);
+}
+
+void X86Assembler::blsmsk(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(2, src);
+}
+
+void X86Assembler::blsr(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(dst),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ EmitUint8(0xF3);
+ EmitRegisterOperand(1, src);
+}
+
void X86Assembler::bswapl(Register dst) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
@@ -1267,6 +1413,25 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::andn(Register dst, Register src1, Register src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
+ uint8_t byte_one = EmitVexByte1(/*r=*/ false,
+ /*x=*/ false,
+ /*b=*/ false,
+ /*mmmmm=*/ 2);
+ uint8_t byte_two = EmitVexByte2(/*w=*/ false,
+ /*l=*/ 128,
+ X86ManagedRegister::FromCpuRegister(src1),
+ /*pp=*/ 0);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xF2);
+ EmitRegisterOperand(dst, src2);
+}
+
void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 5ac9236..275e5c1 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -337,6 +337,10 @@
void movntl(const Address& dst, Register src);
+ void blsi(Register dst, Register src); // no addr variant (for now)
+ void blsmsk(Register dst, Register src); // no addr variant (for now)
+ void blsr(Register dst, Register src); // no addr varianr (for now)
+
void bswapl(Register dst);
void bsfl(Register dst, Register src);
@@ -500,6 +504,7 @@
void andps(XmmRegister dst, const Address& src);
void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void andn(Register dst, Register src1, Register src2); // no addr variant (for now)
void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void andnps(XmmRegister dst, XmmRegister src);
void pandn(XmmRegister dst, XmmRegister src);
@@ -837,6 +842,11 @@
void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
void EmitGenericShift(int rm, const Operand& operand, Register shifter);
+ // Emit a 3 byte VEX Prefix
+ uint8_t EmitVexByteZero(bool is_two_byte);
+ uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
+ uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister operand, int pp);
+
ConstantArea constant_area_;
DISALLOW_COPY_AND_ASSIGN(X86Assembler);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index ad75174..1d8bfe7 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -349,6 +349,18 @@
DriverStr(expected, "rep_movsw");
}
+TEST_F(AssemblerX86Test, Blsmask) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk");
+}
+
+TEST_F(AssemblerX86Test, Blsi) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi");
+}
+
+TEST_F(AssemblerX86Test, Blsr) {
+ DriverStr(RepeatRR(&x86::X86Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr");
+}
+
TEST_F(AssemblerX86Test, Bsfl) {
DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl");
}
@@ -657,6 +669,10 @@
DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
+TEST_F(AssemblerX86Test, Andn) {
+ DriverStr(RepeatRRR(&x86::X86Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn");
+}
+
TEST_F(AssemblerX86Test, AndnPD) {
DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
}