Patch supports Intel(R) AVX/AVX2 MOV Instruction

This patch enhances the existing ART-Compiler
to generate Intel(R) AVX/AVX2 MOV Instructions for
doing SIMD Operations on Intel(R) Architecture CPUs.
It also provides the framework for AVX/AVX2 Instruction
encoding and dissassembly

BUG: 127881558
Test: run-test gtest
Change-Id: I9386aecc134941a2d907f9ec6b2d5522ec5ff8b5
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 4b073bd..01eb160 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -59,96 +59,11 @@
   }
 }
 
-uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) {
-  uint8_t vex_zero = 0xC0;
-  if (!is_two_byte) {
-    vex_zero |= 0xC4;
-  } else {
-    vex_zero |= 0xC5;
+bool X86Assembler::CpuHasAVXorAVX2FeatureFlag() {
+  if (has_AVX_ || has_AVX2_) {
+    return true;
   }
-  return vex_zero;
-}
-
-uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) {
-  // VEX Byte 1
-  uint8_t vex_prefix = 0;
-  if (!r) {
-    vex_prefix |= 0x80;  // VEX.R
-  }
-  if (!x) {
-    vex_prefix |= 0x40;  // VEX.X
-  }
-  if (!b) {
-    vex_prefix |= 0x20;  // VEX.B
-  }
-
-  // VEX.mmmmm
-  switch (mmmmm) {
-  case 1:
-    // implied 0F leading opcode byte
-    vex_prefix |= 0x01;
-    break;
-  case 2:
-    // implied leading 0F 38 opcode byte
-    vex_prefix |= 0x02;
-    break;
-  case 3:
-    // implied leading OF 3A opcode byte
-    vex_prefix |= 0x03;
-    break;
-  default:
-    LOG(FATAL) << "unknown opcode bytes";
-  }
-  return vex_prefix;
-}
-
-uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) {
-  uint8_t vex_prefix = 0;
-  // VEX Byte 2
-  if (w) {
-    vex_prefix |= 0x80;
-  }
-  // VEX.vvvv
-  if (operand.IsXmmRegister()) {
-    XmmRegister vvvv = operand.AsXmmRegister();
-    int inverted_reg = 15-static_cast<int>(vvvv);
-    uint8_t reg = static_cast<uint8_t>(inverted_reg);
-    vex_prefix |= ((reg & 0x0F) << 3);
-  } else if (operand.IsCpuRegister()) {
-    Register vvvv = operand.AsCpuRegister();
-    int inverted_reg = 15 - static_cast<int>(vvvv);
-    uint8_t reg = static_cast<uint8_t>(inverted_reg);
-    vex_prefix |= ((reg & 0x0F) << 3);
-  }
-
-  // VEX.L
-  if (l == 256) {
-    vex_prefix |= 0x04;
-  }
-
-  // VEX.pp
-  switch (pp) {
-  case 0:
-    // SIMD Pefix - None
-    vex_prefix |= 0x00;
-    break;
-  case 1:
-    // SIMD Prefix - 66
-    vex_prefix |= 0x01;
-    break;
-  case 2:
-    // SIMD Prefix - F3
-    vex_prefix |= 0x02;
-    break;
-  case 3:
-    // SIMD Prefix - F2
-    vex_prefix |= 0x03;
-    break;
-  default:
-    LOG(FATAL) << "unknown SIMD Prefix";
-  }
-
-  return vex_prefix;
+  return false;
 }
 
 void X86Assembler::call(Register reg) {
@@ -273,15 +188,11 @@
 
 void X86Assembler::blsi(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
-  uint8_t byte_one = EmitVexByte1(/*r=*/ false,
-                                  /*x=*/ false,
-                                  /*b=*/ false,
-                                  /*mmmmm=*/ 2);
-  uint8_t byte_two = EmitVexByte2(/*w=*/ false,
-                                  /*l=*/ 128,
-                                  X86ManagedRegister::FromCpuRegister(dst),
-                                  /*pp=*/ 0);
+  uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+  uint8_t byte_one  = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38);
+  uint8_t byte_two  = EmitVexPrefixByteTwo(false,
+                                           X86ManagedRegister::FromCpuRegister(dst),
+                                            SET_VEX_L_128, SET_VEX_PP_NONE);
   EmitUint8(byte_zero);
   EmitUint8(byte_one);
   EmitUint8(byte_two);
@@ -291,15 +202,11 @@
 
 void X86Assembler::blsmsk(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
-  uint8_t byte_one = EmitVexByte1(/*r=*/ false,
-                                  /*x=*/ false,
-                                  /*b=*/ false,
-                                  /*mmmmm=*/ 2);
-  uint8_t byte_two = EmitVexByte2(/*w=*/ false,
-                                  /*l=*/ 128,
-                                  X86ManagedRegister::FromCpuRegister(dst),
-                                  /*pp=*/ 0);
+  uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+  uint8_t byte_one = EmitVexPrefixByteOne(false, false, false, SET_VEX_M_0F_38);
+  uint8_t byte_two = EmitVexPrefixByteTwo(false,
+                                         X86ManagedRegister::FromCpuRegister(dst),
+                                         SET_VEX_L_128, SET_VEX_PP_NONE);
   EmitUint8(byte_zero);
   EmitUint8(byte_one);
   EmitUint8(byte_two);
@@ -309,15 +216,11 @@
 
 void X86Assembler::blsr(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
-  uint8_t byte_one = EmitVexByte1(/*r=*/ false,
-                                  /*x=*/ false,
-                                  /*b=*/ false,
-                                  /*mmmmm=*/ 2);
-  uint8_t byte_two = EmitVexByte2(/*w=*/ false,
-                                  /*l=*/ 128,
-                                  X86ManagedRegister::FromCpuRegister(dst),
-                                  /*pp=*/ 0);
+  uint8_t byte_zero = EmitVexPrefixByteZero(false /*is_two_byte*/);
+  uint8_t byte_one = EmitVexPrefixByteOne(false, false, false,  SET_VEX_M_0F_38);
+  uint8_t byte_two = EmitVexPrefixByteTwo(false,
+                                          X86ManagedRegister::FromCpuRegister(dst),
+                                          SET_VEX_L_128, SET_VEX_PP_NONE);
   EmitUint8(byte_zero);
   EmitUint8(byte_one);
   EmitUint8(byte_two);
@@ -516,44 +419,165 @@
 
 
 void X86Assembler::movaps(XmmRegister dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovaps(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0x28);
   EmitXmmRegisterOperand(dst, src);
 }
 
+/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2*/
+void X86Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false,
+                                          vvvv_reg,
+                                          SET_VEX_L_128,
+                                          SET_VEX_PP_NONE);
+  EmitUint8(byte_zero);
+  EmitUint8(byte_one);
+  /**Instruction Opcode*/
+  EmitUint8(0x28);
+  /**Instruction Operands*/
+  EmitXmmRegisterOperand(dst, src);
+}
 
 void X86Assembler::movaps(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovaps(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0x28);
   EmitOperand(dst, src);
 }
 
+/**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128*/
+void X86Assembler::vmovaps(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_NONE);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  /**Instruction Opcode*/
+  EmitUint8(0x28);
+  /**Instruction Operands*/
+  EmitOperand(dst, src);
+}
 
 void X86Assembler::movups(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovups(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0x10);
   EmitOperand(dst, src);
 }
 
+/**VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128*/
+void X86Assembler::vmovups(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_NONE);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  /*Instruction Opcode*/
+  EmitUint8(0x10);
+  /*Instruction Operands*/
+  EmitOperand(dst, src);
+}
 
 void X86Assembler::movaps(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovaps(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0x29);
   EmitOperand(src, dst);
 }
 
+/**VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1*/
+void X86Assembler::vmovaps(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_NONE);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  /**Instruction Opcode*/
+  EmitUint8(0x29);
+  /**Instruction Operands*/
+  EmitOperand(src, dst);
+}
 
 void X86Assembler::movups(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovups(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
   EmitUint8(0x11);
   EmitOperand(src, dst);
 }
 
+/**VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1*/
+void X86Assembler::vmovups(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_NONE);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x11);
+  // Instruction Operands
+  EmitOperand(src, dst);
+}
+
 
 void X86Assembler::movss(XmmRegister dst, const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -705,6 +729,10 @@
 
 
 void X86Assembler::movapd(XmmRegister dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovapd(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -712,8 +740,32 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2*/
+void X86Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg ,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x28);
+  // Instruction Operands
+  EmitXmmRegisterOperand(dst, src);
+}
 
 void X86Assembler::movapd(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovapd(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -721,8 +773,32 @@
   EmitOperand(dst, src);
 }
 
+/**VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128*/
+void X86Assembler::vmovapd(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x28);
+  // Instruction Operands
+  EmitOperand(dst, src);
+}
 
 void X86Assembler::movupd(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovupd(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -730,8 +806,33 @@
   EmitOperand(dst, src);
 }
 
+/**VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128*/
+void X86Assembler::vmovupd(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix*/
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x10);
+  // Instruction Operands
+  EmitOperand(dst, src);
+}
+
 
 void X86Assembler::movapd(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovapd(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -739,8 +840,32 @@
   EmitOperand(src, dst);
 }
 
+/**VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
+void X86Assembler::vmovapd(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.*/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x29);
+  // Instruction Operands
+  EmitOperand(src, dst);
+}
 
 void X86Assembler::movupd(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovupd(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -748,6 +873,26 @@
   EmitOperand(src, dst);
 }
 
+/**VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
+void X86Assembler::vmovupd(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  /**a REX prefix is necessary only if an instruction references one of the
+  extended registers or uses a 64-bit operand.**/
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x11);
+  // Instruction Operands
+  EmitOperand(src, dst);
+}
 
 void X86Assembler::flds(const Address& src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -924,6 +1069,10 @@
 
 
 void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovdqa(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -931,8 +1080,30 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
+void X86Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x6F);
+  // Instruction Operands
+  EmitXmmRegisterOperand(dst, src);
+}
 
 void X86Assembler::movdqa(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovdqa(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -940,8 +1111,30 @@
   EmitOperand(dst, src);
 }
 
+/**VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
+void X86Assembler::vmovdqa(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x6F);
+  // Instruction Operands
+  EmitOperand(dst, src);
+}
 
 void X86Assembler::movdqu(XmmRegister dst, const Address& src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovdqu(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
   EmitUint8(0x0F);
@@ -949,8 +1142,30 @@
   EmitOperand(dst, src);
 }
 
+/**VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128 */
+void X86Assembler::vmovdqu(XmmRegister dst, const Address& src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_F3);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x6F);
+  // Instruction Operands
+  EmitOperand(dst, src);
+}
 
 void X86Assembler::movdqa(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovdqa(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
   EmitUint8(0x0F);
@@ -958,8 +1173,31 @@
   EmitOperand(src, dst);
 }
 
+/**VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
+void X86Assembler::vmovdqa(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  /**Instruction VEX Prefix */
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_66);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x7F);
+  // Instruction Operands
+  EmitOperand(src, dst);
+}
+
 
 void X86Assembler::movdqu(const Address& dst, XmmRegister src) {
+  if (CpuHasAVXorAVX2FeatureFlag()) {
+    vmovdqu(dst, src);
+    return;
+  }
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
   EmitUint8(0x0F);
@@ -967,6 +1205,24 @@
   EmitOperand(src, dst);
 }
 
+/**VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
+void X86Assembler::vmovdqu(const Address& dst, XmmRegister src) {
+  DCHECK(CpuHasAVXorAVX2FeatureFlag());
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  // Instruction VEX Prefix
+  uint8_t ByteZero = EmitVexPrefixByteZero(/**is_twobyte_form= */true);
+  X86ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86();
+  uint8_t ByteOne = EmitVexPrefixByteOne(/**R = */false,
+                                         vvvv_reg,
+                                         SET_VEX_L_128,
+                                         SET_VEX_PP_F3);
+  EmitUint8(ByteZero);
+  EmitUint8(ByteOne);
+  // Instruction Opcode
+  EmitUint8(0x7F);
+  // Instruction Operands
+  EmitOperand(src, dst);
+}
 
 void X86Assembler::paddb(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1413,24 +1669,7 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
-void X86Assembler::andn(Register dst, Register src1, Register src2) {
-  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
-  uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false);
-  uint8_t byte_one = EmitVexByte1(/*r=*/ false,
-                                  /*x=*/ false,
-                                  /*b=*/ false,
-                                  /*mmmmm=*/ 2);
-  uint8_t byte_two = EmitVexByte2(/*w=*/ false,
-                                  /*l=*/ 128,
-                                  X86ManagedRegister::FromCpuRegister(src1),
-                                  /*pp=*/ 0);
-  EmitUint8(byte_zero);
-  EmitUint8(byte_one);
-  EmitUint8(byte_two);
-  // Opcode field
-  EmitUint8(0xF2);
-  EmitRegisterOperand(dst, src2);
-}
+
 
 
 void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
@@ -1475,6 +1714,24 @@
   EmitXmmRegisterOperand(dst, src);
 }
 
+void X86Assembler::andn(Register dst, Register src1, Register src2) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  uint8_t byte_zero = EmitVexPrefixByteZero(/**is_twobyte_form= */false);
+  uint8_t byte_one = EmitVexPrefixByteOne(/**R = */false,
+                                          /**X = */false,
+                                          /**B = */false,
+                                          SET_VEX_M_0F_38);
+  uint8_t byte_two = EmitVexPrefixByteTwo(/**W= */false,
+                                          X86ManagedRegister::FromCpuRegister(src1),
+                                          SET_VEX_L_128,
+                                          SET_VEX_PP_NONE);
+  EmitUint8(byte_zero);
+  EmitUint8(byte_one);
+  EmitUint8(byte_two);
+  // Opcode field
+  EmitUint8(0xF2);
+  EmitRegisterOperand(dst, src2);
+}
 
 void X86Assembler::por(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -3143,5 +3400,139 @@
   return AddInt32(bit_cast<int32_t, float>(v));
 }
 
+uint8_t X86Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
+  /**Vex Byte 0,
+  Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
+  Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex */
+  uint8_t vex_prefix = 0xC0;
+  if (is_twobyte_form) {
+    // 2-Byte Vex
+    vex_prefix |= TWO_BYTE_VEX;
+  } else {
+    // 3-Byte Vex
+    vex_prefix |= THREE_BYTE_VEX;
+  }
+  return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteOne(bool R,
+                                           bool X,
+                                           bool B,
+                                           int SET_VEX_M) {
+  /**Vex Byte 1, */
+  uint8_t vex_prefix = VEX_INIT;
+  /** Bit[7] This bit needs to be set to '1'
+  otherwise the instruction is LES or LDS */
+  if (!R) {
+    // R .
+    vex_prefix |= SET_VEX_R;
+  }
+  /** Bit[6] This bit needs to be set to '1'
+  otherwise the instruction is LES or LDS */
+  if (!X) {
+    // X .
+    vex_prefix |= SET_VEX_X;
+  }
+  /** Bit[5] This bit needs to be set to '1' */
+  if (!B) {
+    // B .
+    vex_prefix |= SET_VEX_B;
+  }
+  /** Bits[4:0], */
+  vex_prefix |= SET_VEX_M;
+  return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteOne(bool R,
+                                           X86ManagedRegister operand,
+                                           int SET_VEX_L,
+                                           int SET_VEX_PP) {
+  /**Vex Byte 1, */
+  uint8_t vex_prefix = VEX_INIT;
+  /** Bit[7] This bit needs to be set to '1'
+  otherwise the instruction is LES or LDS */
+  if (!R) {
+    // R .
+    vex_prefix |= SET_VEX_R;
+  }
+  /**Bits[6:3] - 'vvvv' the source or dest register specifier */
+  if (operand.IsNoRegister()) {
+    vex_prefix |= 0x78;
+  } else if (operand.IsXmmRegister()) {
+    XmmRegister vvvv = operand.AsXmmRegister();
+    int inverted_reg = 15 - static_cast<int>(vvvv);
+    uint8_t reg = static_cast<uint8_t>(inverted_reg);
+    vex_prefix |= ((reg & 0x0F) << 3);
+  } else if (operand.IsCpuRegister()) {
+    Register vvvv = operand.AsCpuRegister();
+    int inverted_reg = 15 - static_cast<int>(vvvv);
+    uint8_t reg = static_cast<uint8_t>(inverted_reg);
+    vex_prefix |= ((reg & 0x0F) << 3);
+  }
+  /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+  VEX.L = 0 indicates 128 bit vector operation */
+  vex_prefix |= SET_VEX_L;
+  /** Bits[1:0] -  "pp" */
+  vex_prefix |= SET_VEX_PP;
+  return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W,
+                                           X86ManagedRegister operand,
+                                           int SET_VEX_L,
+                                           int SET_VEX_PP) {
+  /** Vex Byte 2, */
+  uint8_t vex_prefix = VEX_INIT;
+  /** Bit[7] This bits needs to be set to '1' with default value.
+  When using C4H form of VEX prefix, W value is ignored */
+  if (W) {
+    vex_prefix |= SET_VEX_W;
+  }
+  /** Bits[6:3] - 'vvvv' the source or dest register specifier */
+  if (operand.IsXmmRegister()) {
+    XmmRegister vvvv = operand.AsXmmRegister();
+    int inverted_reg = 15 - static_cast<int>(vvvv);
+    uint8_t reg = static_cast<uint8_t>(inverted_reg);
+    vex_prefix |= ((reg & 0x0F) << 3);
+  } else if (operand.IsCpuRegister()) {
+    Register vvvv = operand.AsCpuRegister();
+    int inverted_reg = 15 - static_cast<int>(vvvv);
+    uint8_t reg = static_cast<uint8_t>(inverted_reg);
+    vex_prefix |= ((reg & 0x0F) << 3);
+  }
+  /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+  VEX.L = 0 indicates 128 bit vector operation */
+  vex_prefix |= SET_VEX_L;
+  // Bits[1:0] -  "pp"
+  vex_prefix |= SET_VEX_PP;
+  return vex_prefix;
+}
+
+uint8_t X86Assembler::EmitVexPrefixByteTwo(bool W,
+                                           int SET_VEX_L,
+                                           int SET_VEX_PP) {
+  /**Vex Byte 2, */
+  uint8_t vex_prefix = VEX_INIT;
+
+  /** Bit[7] This bits needs to be set to '1' with default value.
+  When using C4H form of VEX prefix, W value is ignored */
+  if (W) {
+    vex_prefix |= SET_VEX_W;
+  }
+  /** Bits[6:3] - 'vvvv' the source or dest register specifier,
+  if unused set 1111 */
+  vex_prefix |= (0x0F << 3);
+
+  /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+  VEX.L = 0 indicates 128 bit vector operation */
+  vex_prefix |= SET_VEX_L;
+
+  /** Bits[1:0] -  "pp" */
+  if (SET_VEX_PP != SET_VEX_PP_NONE) {
+    vex_prefix |= SET_VEX_PP;
+  }
+  return vex_prefix;
+}
+
 }  // namespace x86
 }  // namespace art