Patch supports Intel(R) AVX/AVX2 MOV Instruction
This patch enhances the existing ART-Compiler
to generate Intel(R) AVX/AVX2 MOV Instructions for
doing SIMD Operations on Intel(R) Architecture CPUs.
It also provides the framework for AVX/AVX2 Instruction
encoding and dissassembly
BUG: 127881558
Test: run-test gtest
Change-Id: I9386aecc134941a2d907f9ec6b2d5522ec5ff8b5
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 4b073bd..01eb160 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -59,96 +59,11 @@
}
}
-uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
- uint8_t vex_zero = 0xC0;
- if (!is_two_byte) {
- vex_zero |= 0xC4;
- } else {
- vex_zero |= 0xC5;
+bool X86Assembler::CpuHasAVXorAVX2FeatureFlag() {
+ if (has_AVX_ || has_AVX2_) {
+ return true;
}
- return vex_zero;
-}
-
-uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
- // VEX Byte 1
- uint8_t vex_prefix = 0;
- if (!r) {
- vex_prefix |= 0x80; // VEX.R
- }
- if (!x) {
- vex_prefix |= 0x40; // VEX.X
- }
- if (!b) {
- vex_prefix |= 0x20; // VEX.B
- }
-
- // VEX.mmmmm
- switch (mmmmm) {
- case 1:
- // implied 0F leading opcode byte
- vex_prefix |= 0x01;
- break;
- case 2:
- // implied leading 0F 38 opcode byte
- vex_prefix |= 0x02;
- break;
- case 3:
- // implied leading OF 3A opcode byte
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown opcode bytes";
- }
- return vex_prefix;
-}
-
-uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
- uint8_t vex_prefix = 0;
- // VEX Byte 2
- if (w) {
- vex_prefix |= 0x80;
- }
- // VEX.vvvv
- if (operand.IsXmmRegister()) {
- XmmRegister vvvv = operand.AsXmmRegister();
- int inverted_reg = 15-static_cast<int>(vvvv);
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- } else if (operand.IsCpuRegister()) {
- Register vvvv = operand.AsCpuRegister();
- int inverted_reg = 15 - static_cast<int>(vvvv);
- uint8_t reg = static_cast<uint8_t>(inverted_reg);
- vex_prefix |= ((reg & 0x0F) << 3);
- }
-
- // VEX.L
- if (l == 256) {
- vex_prefix |= 0x04;
- }
-
- // VEX.pp
- switch (pp) {
- case 0:
- // SIMD Pefix - None
- vex_prefix |= 0x00;
- break;
- case 1:
- // SIMD Prefix - 66
- vex_prefix |= 0x01;
- break;
- case 2:
- // SIMD Prefix - F3
- vex_prefix |= 0x02;
- break;
- case 3:
- // SIMD Prefix - F2
- vex_prefix |= 0x03;
- break;
- default:
- LOG(FATAL) << "unknown SIMD Prefix";
- }
-
- return vex_prefix;
+ return false;
}
void X86Assembler::call(Register reg) {
@@ -273,15 +188,11 @@
void X86Assembler::blsi(Register dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
- uint8_t byte_one = EmitVexByte1(/*r=*/ false,
- /*x=*/ false,
- /*b=*/ false,
- /*mmmmm=*/ 2);
- uint8_t byte_two = EmitVexByte2(/*w=*/ false,
- /*l=*/ 128,
- X86ManagedRegister::FromCpuRegister(dst),
- /*pp=*/ 0);
+ uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38);
+ uint8_t byte_two = EmitVexPrefixByteTwo(false,
+ X86ManagedRegister::FromCpuRegister(dst),
+ SET_VEX_L_128, SET_VEX_PP_NONE);
EmitUint8(byte_zero);
EmitUint8(byte_one);
EmitUint8(byte_two);
@@ -291,15 +202,11 @@
void X86Assembler::blsmsk(Register dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
- uint8_t byte_one = EmitVexByte1(/*r=*/ false,
- /*x=*/ false,
- /*b=*/ false,
- /*mmmmm=*/ 2);
- uint8_t byte_two = EmitVexByte2(/*w=*/ false,
- /*l=*/ 128,
- X86ManagedRegister::FromCpuRegister(dst),
- /*pp=*/ 0);
+ uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38);
+ uint8_t byte_two = EmitVexPrefixByteTwo(false,
+ X86ManagedRegister::FromCpuRegister(dst),
+ SET_VEX_L_128, SET_VEX_PP_NONE);
EmitUint8(byte_zero);
EmitUint8(byte_one);
EmitUint8(byte_two);
@@ -309,15 +216,11 @@
void X86Assembler::blsr(Register dst, Register src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
- uint8_t byte_one = EmitVexByte1(/*r=*/ false,
- /*x=*/ false,
- /*b=*/ false,
- /*mmmmm=*/ 2);
- uint8_t byte_two = EmitVexByte2(/*w=*/ false,
- /*l=*/ 128,
- X86ManagedRegister::FromCpuRegister(dst),
- /*pp=*/ 0);
+ uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+ uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38);
+ uint8_t byte_two = EmitVexPrefixByteTwo(false,
+ X86ManagedRegister::FromCpuRegister(dst),
+ SET_VEX_L_128, SET_VEX_PP_NONE);
EmitUint8(byte_zero);
EmitUint8(byte_one);
EmitUint8(byte_two);
@@ -516,44 +419,165 @@
void X86Assembler::movaps(XmmRegister dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovaps(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitXmmRegisterOperand(dst, src);
}
+/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2*/
+void X86Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ /**Instruction Opcode*/
+ EmitUint8(0x28);
+ /**Instruction Operands*/
+ EmitXmmRegisterOperand(dst, src);
+}
void X86Assembler::movaps(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovaps(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x28);
EmitOperand(dst, src);
}
+/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128*/
+void X86Assembler::vmovaps(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ /**Instruction Opcode*/
+ EmitUint8(0x28);
+ /**Instruction Operands*/
+ EmitOperand(dst, src);
+}
void X86Assembler::movups(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovups(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x10);
EmitOperand(dst, src);
}
+/**VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128*/
+void X86Assembler::vmovups(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ /*Instruction Opcode*/
+ EmitUint8(0x10);
+ /*Instruction Operands*/
+ EmitOperand(dst, src);
+}
void X86Assembler::movaps(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovaps(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x29);
EmitOperand(src, dst);
}
+/**VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1*/
+void X86Assembler::vmovaps(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ /**Instruction Opcode*/
+ EmitUint8(0x29);
+ /**Instruction Operands*/
+ EmitOperand(src, dst);
+}
void X86Assembler::movups(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovups(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
EmitUint8(0x11);
EmitOperand(src, dst);
}
+/**VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1*/
+void X86Assembler::vmovups(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x11);
+ // Instruction Operands
+ EmitOperand(src, dst);
+}
+
void X86Assembler::movss(XmmRegister dst, const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -705,6 +729,10 @@
void X86Assembler::movapd(XmmRegister dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovapd(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -712,8 +740,32 @@
EmitXmmRegisterOperand(dst, src);
}
+/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2*/
+void X86Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg ,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x28);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src);
+}
void X86Assembler::movapd(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovapd(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -721,8 +773,32 @@
EmitOperand(dst, src);
}
+/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128*/
+void X86Assembler::vmovapd(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x28);
+ // Instruction Operands
+ EmitOperand(dst, src);
+}
void X86Assembler::movupd(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovupd(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -730,8 +806,33 @@
EmitOperand(dst, src);
}
+/**VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128*/
+void X86Assembler::vmovupd(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix*/
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x10);
+ // Instruction Operands
+ EmitOperand(dst, src);
+}
+
void X86Assembler::movapd(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovapd(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -739,8 +840,32 @@
EmitOperand(src, dst);
}
+/**VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
+void X86Assembler::vmovapd(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.*/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x29);
+ // Instruction Operands
+ EmitOperand(src, dst);
+}
void X86Assembler::movupd(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovupd(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -748,6 +873,26 @@
EmitOperand(src, dst);
}
+/**VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
+void X86Assembler::vmovupd(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ /**a REX prefix is necessary only if an instruction references one of the
+ extended registers or uses a 64-bit operand.**/
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x11);
+ // Instruction Operands
+ EmitOperand(src, dst);
+}
void X86Assembler::flds(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -924,6 +1069,10 @@
void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovdqa(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -931,8 +1080,30 @@
EmitXmmRegisterOperand(dst, src);
}
+/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
+void X86Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x6F);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src);
+}
void X86Assembler::movdqa(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovdqa(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -940,8 +1111,30 @@
EmitOperand(dst, src);
}
+/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
+void X86Assembler::vmovdqa(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x6F);
+ // Instruction Operands
+ EmitOperand(dst, src);
+}
void X86Assembler::movdqu(XmmRegister dst, const Address& src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovdqu(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitUint8(0x0F);
@@ -949,8 +1142,30 @@
EmitOperand(dst, src);
}
+/**VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128 */
+void X86Assembler::vmovdqu(XmmRegister dst, const Address& src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_F3);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x6F);
+ // Instruction Operands
+ EmitOperand(dst, src);
+}
void X86Assembler::movdqa(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovdqa(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
EmitUint8(0x0F);
@@ -958,8 +1173,31 @@
EmitOperand(src, dst);
}
+/**VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
+void X86Assembler::vmovdqa(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ /**Instruction VEX Prefix */
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x7F);
+ // Instruction Operands
+ EmitOperand(src, dst);
+}
+
void X86Assembler::movdqu(const Address& dst, XmmRegister src) {
+ if (CpuHasAVXorAVX2FeatureFlag()) {
+ vmovdqu(dst, src);
+ return;
+ }
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
EmitUint8(0x0F);
@@ -967,6 +1205,24 @@
EmitOperand(src, dst);
}
+/**VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
+void X86Assembler::vmovdqu(const Address& dst, XmmRegister src) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ // Instruction VEX Prefix
+ uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+ X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+ uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+ vvvv_reg,
+ SET_VEX_L_128,
+ SET_VEX_PP_F3);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x7F);
+ // Instruction Operands
+ EmitOperand(src, dst);
+}
void X86Assembler::paddb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1413,24 +1669,7 @@
EmitXmmRegisterOperand(dst, src);
}
-void X86Assembler::andn(Register dst, Register src1, Register src2) {
- AssemblerBuffer::EnsureCapacity ensured(&buffer_);
- uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
- uint8_t byte_one = EmitVexByte1(/*r=*/ false,
- /*x=*/ false,
- /*b=*/ false,
- /*mmmmm=*/ 2);
- uint8_t byte_two = EmitVexByte2(/*w=*/ false,
- /*l=*/ 128,
- X86ManagedRegister::FromCpuRegister(src1),
- /*pp=*/ 0);
- EmitUint8(byte_zero);
- EmitUint8(byte_one);
- EmitUint8(byte_two);
- // Opcode field
- EmitUint8(0xF2);
- EmitRegisterOperand(dst, src2);
-}
+
void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
@@ -1475,6 +1714,24 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::andn(Register dst, Register src1, Register src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */false);
+ uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false,
+ /**X = */false,
+ /**B = */false,
+ SET_VEX_M_0F_38);
+ uint8_t byte_two = EmitVexPrefixByteTwo(/**W= */false,
+ X86ManagedRegister::FromCpuRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(byte_two);
+ // Opcode field
+ EmitUint8(0xF2);
+ EmitRegisterOperand(dst, src2);
+}
void X86Assembler::por(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -3143,5 +3400,139 @@
return AddInt32(bit_cast<int32_t, float>(v));
}
+uint8_t X86Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
+ /**Vex Byte 0,
+ Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
+ Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex */
+ uint8_t vex_prefix = 0xC0;
+ if (is_twobyte_form) {
+ // 2-Byte Vex
+ vex_prefix |= TWO_BYTE_VEX;
+ } else {
+ // 3-Byte Vex
+ vex_prefix |= THREE_BYTE_VEX;
+ }
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteOne(bool R,
+ bool X,
+ bool B,
+ int SET_VEX_M) {
+ /**Vex Byte 1, */
+ uint8_t vex_prefix = VEX_INIT;
+ /** Bit[7] This bit needs to be set to '1'
+ otherwise the instruction is LES or LDS */
+ if (!R) {
+ // R .
+ vex_prefix |= SET_VEX_R;
+ }
+ /** Bit[6] This bit needs to be set to '1'
+ otherwise the instruction is LES or LDS */
+ if (!X) {
+ // X .
+ vex_prefix |= SET_VEX_X;
+ }
+ /** Bit[5] This bit needs to be set to '1' */
+ if (!B) {
+ // B .
+ vex_prefix |= SET_VEX_B;
+ }
+ /** Bits[4:0], */
+ vex_prefix |= SET_VEX_M;
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteOne(bool R,
+ X86ManagedRegister operand,
+ int SET_VEX_L,
+ int SET_VEX_PP) {
+ /**Vex Byte 1, */
+ uint8_t vex_prefix = VEX_INIT;
+ /** Bit[7] This bit needs to be set to '1'
+ otherwise the instruction is LES or LDS */
+ if (!R) {
+ // R .
+ vex_prefix |= SET_VEX_R;
+ }
+ /**Bits[6:3] - 'vvvv' the source or dest register specifier */
+ if (operand.IsNoRegister()) {
+ vex_prefix |= 0x78;
+ } else if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ VEX.L = 0 indicates 128 bit vector operation */
+ vex_prefix |= SET_VEX_L;
+ /** Bits[1:0] - "pp" */
+ vex_prefix |= SET_VEX_PP;
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W,
+ X86ManagedRegister operand,
+ int SET_VEX_L,
+ int SET_VEX_PP) {
+ /** Vex Byte 2, */
+ uint8_t vex_prefix = VEX_INIT;
+ /** Bit[7] This bits needs to be set to '1' with default value.
+ When using C4H form of VEX prefix, W value is ignored */
+ if (W) {
+ vex_prefix |= SET_VEX_W;
+ }
+ /** Bits[6:3] - 'vvvv' the source or dest register specifier */
+ if (operand.IsXmmRegister()) {
+ XmmRegister vvvv = operand.AsXmmRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ } else if (operand.IsCpuRegister()) {
+ Register vvvv = operand.AsCpuRegister();
+ int inverted_reg = 15 - static_cast<int>(vvvv);
+ uint8_t reg = static_cast<uint8_t>(inverted_reg);
+ vex_prefix |= ((reg & 0x0F) << 3);
+ }
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ VEX.L = 0 indicates 128 bit vector operation */
+ vex_prefix |= SET_VEX_L;
+ // Bits[1:0] - "pp"
+ vex_prefix |= SET_VEX_PP;
+ return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W,
+ int SET_VEX_L,
+ int SET_VEX_PP) {
+ /**Vex Byte 2, */
+ uint8_t vex_prefix = VEX_INIT;
+
+ /** Bit[7] This bits needs to be set to '1' with default value.
+ When using C4H form of VEX prefix, W value is ignored */
+ if (W) {
+ vex_prefix |= SET_VEX_W;
+ }
+ /** Bits[6:3] - 'vvvv' the source or dest register specifier,
+ if unused set 1111 */
+ vex_prefix |= (0x0F << 3);
+
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ VEX.L = 0 indicates 128 bit vector operation */
+ vex_prefix |= SET_VEX_L;
+
+ /** Bits[1:0] - "pp" */
+ if (SET_VEX_PP != SET_VEX_PP_NONE) {
+ vex_prefix |= SET_VEX_PP;
+ }
+ return vex_prefix;
+}
+
} // namespace x86
} // namespace art
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 275e5c1..e84294a 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -19,6 +19,7 @@
#include <vector>
+#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_containers.h"
#include "base/array_ref.h"
#include "base/bit_utils.h"
@@ -308,8 +309,12 @@
class X86Assembler final : public Assembler {
public:
- explicit X86Assembler(ArenaAllocator* allocator)
- : Assembler(allocator), constant_area_(allocator) {}
+ explicit X86Assembler(ArenaAllocator* allocator,
+ const X86InstructionSetFeatures* instruction_set_features = nullptr)
+ : Assembler(allocator),
+ constant_area_(allocator),
+ has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX() : false),
+ has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() :false) {}
virtual ~X86Assembler() {}
/*
@@ -385,6 +390,12 @@
void movaps(const Address& dst, XmmRegister src); // store aligned
void movups(const Address& dst, XmmRegister src); // store unaligned
+ void vmovaps(XmmRegister dst, XmmRegister src); // move
+ void vmovaps(XmmRegister dst, const Address& src); // load aligned
+ void vmovups(XmmRegister dst, const Address& src); // load unaligned
+ void vmovaps(const Address& dst, XmmRegister src); // store aligned
+ void vmovups(const Address& dst, XmmRegister src); // store unaligned
+
void movss(XmmRegister dst, const Address& src);
void movss(const Address& dst, XmmRegister src);
void movss(XmmRegister dst, XmmRegister src);
@@ -412,6 +423,12 @@
void movapd(const Address& dst, XmmRegister src); // store aligned
void movupd(const Address& dst, XmmRegister src); // store unaligned
+ void vmovapd(XmmRegister dst, XmmRegister src); // move
+ void vmovapd(XmmRegister dst, const Address& src); // load aligned
+ void vmovupd(XmmRegister dst, const Address& src); // load unaligned
+ void vmovapd(const Address& dst, XmmRegister src); // store aligned
+ void vmovupd(const Address& dst, XmmRegister src); // store unaligned
+
void movsd(XmmRegister dst, const Address& src);
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
@@ -439,6 +456,12 @@
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
+ void vmovdqa(XmmRegister dst, XmmRegister src); // move
+ void vmovdqa(XmmRegister dst, const Address& src); // load aligned
+ void vmovdqu(XmmRegister dst, const Address& src); // load unaligned
+ void vmovdqa(const Address& dst, XmmRegister src); // store aligned
+ void vmovdqu(const Address& dst, XmmRegister src); // store unaligned
+
void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void psubb(XmmRegister dst, XmmRegister src);
@@ -823,6 +846,8 @@
// Return the current size of the constant area.
size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
+ bool CpuHasAVXorAVX2FeatureFlag();
+
private:
inline void EmitUint8(uint8_t value);
inline void EmitInt32(int32_t value);
@@ -842,12 +867,22 @@
void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
void EmitGenericShift(int rm, const Operand& operand, Register shifter);
- // Emit a 3 byte VEX Prefix
- uint8_t EmitVexByteZero(bool is_two_byte);
- uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm);
- uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister operand, int pp);
-
+ uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
+ uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
+ uint8_t EmitVexPrefixByteOne(bool R,
+ X86ManagedRegister operand,
+ int SET_VEX_L,
+ int SET_VEX_PP);
+ uint8_t EmitVexPrefixByteTwo(bool W,
+ X86ManagedRegister operand,
+ int SET_VEX_L,
+ int SET_VEX_PP);
+ uint8_t EmitVexPrefixByteTwo(bool W,
+ int SET_VEX_L,
+ int SET_VEX_PP);
ConstantArea constant_area_;
+ bool has_AVX_; // x86 256bit SIMD AVX.
+ bool has_AVX2_; // x86 256bit SIMD AVX 2.0.
DISALLOW_COPY_AND_ASSIGN(X86Assembler);
};
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 1d8bfe7..5197156 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -148,6 +148,18 @@
std::vector<x86::XmmRegister*> fp_registers_;
};
+class AssemblerX86AVXTest : public AssemblerX86Test {
+ public:
+ AssemblerX86AVXTest()
+ : instruction_set_features_(X86InstructionSetFeatures::FromVariant("kabylake", nullptr)) {}
+ protected:
+ x86::X86Assembler* CreateAssembler(ArenaAllocator* allocator) override {
+ return new (allocator) x86::X86Assembler(allocator, instruction_set_features_.get());
+ }
+ private:
+ std::unique_ptr<const X86InstructionSetFeatures> instruction_set_features_;
+};
+
//
// Test some repeat drivers used in the tests.
//
@@ -485,62 +497,122 @@
DriverStr(RepeatFF(&x86::X86Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps");
}
+TEST_F(AssemblerX86AVXTest, VMovaps) {
+ DriverStr(RepeatFF(&x86::X86Assembler::vmovaps, "vmovaps %{reg2}, %{reg1}"), "vmovaps");
+}
+
TEST_F(AssemblerX86Test, MovapsLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movaps, "movaps {mem}, %{reg}"), "movaps_load");
}
+TEST_F(AssemblerX86AVXTest, VMovapsLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovaps, "vmovaps {mem}, %{reg}"), "vmovaps_load");
+}
+
TEST_F(AssemblerX86Test, MovapsStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movaps, "movaps %{reg}, {mem}"), "movaps_store");
}
+TEST_F(AssemblerX86AVXTest, VMovapsStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovaps, "vmovaps %{reg}, {mem}"), "vmovaps_store");
+}
+
TEST_F(AssemblerX86Test, MovupsLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movups, "movups {mem}, %{reg}"), "movups_load");
}
+TEST_F(AssemblerX86AVXTest, VMovupsLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovups, "vmovups {mem}, %{reg}"), "vmovups_load");
+}
+
TEST_F(AssemblerX86Test, MovupsStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movups, "movups %{reg}, {mem}"), "movups_store");
}
+TEST_F(AssemblerX86AVXTest, VMovupsStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovups, "vmovups %{reg}, {mem}"), "vmovups_store");
+}
+
TEST_F(AssemblerX86Test, Movapd) {
DriverStr(RepeatFF(&x86::X86Assembler::movapd, "movapd %{reg2}, %{reg1}"), "movapd");
}
+TEST_F(AssemblerX86AVXTest, VMovapd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::vmovapd, "vmovapd %{reg2}, %{reg1}"), "vmovapd");
+}
+
TEST_F(AssemblerX86Test, MovapdLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movapd, "movapd {mem}, %{reg}"), "movapd_load");
}
+TEST_F(AssemblerX86AVXTest, VMovapdLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovapd, "vmovapd {mem}, %{reg}"), "vmovapd_load");
+}
+
TEST_F(AssemblerX86Test, MovapdStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movapd, "movapd %{reg}, {mem}"), "movapd_store");
}
+TEST_F(AssemblerX86AVXTest, VMovapdStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovapd, "vmovapd %{reg}, {mem}"), "vmovapd_store");
+}
+
TEST_F(AssemblerX86Test, MovupdLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movupd, "movupd {mem}, %{reg}"), "movupd_load");
}
+TEST_F(AssemblerX86AVXTest, VMovupdLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovupd, "vmovupd {mem}, %{reg}"), "vmovupd_load");
+}
+
TEST_F(AssemblerX86Test, MovupdStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movupd, "movupd %{reg}, {mem}"), "movupd_store");
}
+TEST_F(AssemblerX86AVXTest, VMovupdStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovupd, "vmovupd %{reg}, {mem}"), "vmovupd_store");
+}
+
TEST_F(AssemblerX86Test, Movdqa) {
DriverStr(RepeatFF(&x86::X86Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movdqa");
}
+TEST_F(AssemblerX86AVXTest, VMovdqa) {
+ DriverStr(RepeatFF(&x86::X86Assembler::vmovdqa, "vmovdqa %{reg2}, %{reg1}"), "vmovdqa");
+}
+
TEST_F(AssemblerX86Test, MovdqaLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movdqa, "movdqa {mem}, %{reg}"), "movdqa_load");
}
+TEST_F(AssemblerX86AVXTest, VMovdqaLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovdqa, "vmovdqa {mem}, %{reg}"), "vmovdqa_load");
+}
+
TEST_F(AssemblerX86Test, MovdqaStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movdqa, "movdqa %{reg}, {mem}"), "movdqa_store");
}
+TEST_F(AssemblerX86AVXTest, VMovdqaStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovdqa, "vmovdqa %{reg}, {mem}"), "vmovdqa_store");
+}
+
TEST_F(AssemblerX86Test, MovdquLoad) {
DriverStr(RepeatFA(&x86::X86Assembler::movdqu, "movdqu {mem}, %{reg}"), "movdqu_load");
}
+TEST_F(AssemblerX86AVXTest, VMovdquLoad) {
+ DriverStr(RepeatFA(&x86::X86Assembler::vmovdqu, "vmovdqu {mem}, %{reg}"), "vmovdqu_load");
+}
+
TEST_F(AssemblerX86Test, MovdquStore) {
DriverStr(RepeatAF(&x86::X86Assembler::movdqu, "movdqu %{reg}, {mem}"), "movdqu_store");
}
+TEST_F(AssemblerX86AVXTest, VMovdquStore) {
+ DriverStr(RepeatAF(&x86::X86Assembler::vmovdqu, "vmovdqu %{reg}, {mem}"), "vmovdqu_store");
+}
+
TEST_F(AssemblerX86Test, AddPS) {
DriverStr(RepeatFF(&x86::X86Assembler::addps, "addps %{reg2}, %{reg1}"), "addps");
}