AVX support for bitwise instructions (Xor, Or, And, Andn)
Test: ./test.py --host --64, test-art-host-gtest
Change-Id: Ia8302d12d3ebb8447d73db576fb5b945485c11e1
Signed-off-by: Neeraj Solanki <neeraj.solanki@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 84a8564..166aec8 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1872,6 +1872,68 @@
EmitXmmRegisterOperand(dst, src);
}
+/* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0xEF);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
+void X86Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x57);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
+void X86Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x57);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::andpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1915,8 +1977,66 @@
EmitXmmRegisterOperand(dst, src);
}
+/* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0xDB);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+/* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
+void X86Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x54);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+/* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x54);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1943,6 +2063,68 @@
EmitXmmRegisterOperand(dst, src);
}
+/* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0xDF);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x55);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x55);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1987,6 +2169,68 @@
EmitXmmRegisterOperand(dst, src);
}
+/* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
+void X86Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0xEB);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
+void X86Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x56);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+/* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
+void X86Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ /* Instruction VEX Prefix */
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ /* REX prefix is necessary only if an instruction references one of extended
+ registers or uses a 64-bit operand. */
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ // Instruction Opcode
+ EmitUint8(0x56);
+ // Instruction Operands
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index dce546e..1b6941c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -545,21 +545,33 @@
void xorps(XmmRegister dst, const Address& src);
void xorps(XmmRegister dst, XmmRegister src);
void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void andpd(XmmRegister dst, XmmRegister src);
void andpd(XmmRegister dst, const Address& src);
void andps(XmmRegister dst, XmmRegister src);
void andps(XmmRegister dst, const Address& src);
void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void andn(Register dst, Register src1, Register src2); // no addr variant (for now)
void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void andnps(XmmRegister dst, XmmRegister src);
void pandn(XmmRegister dst, XmmRegister src);
+ void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void pavgw(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index bce0346..12d9646 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -861,6 +861,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::pxor, "pxor %{reg2}, %{reg1}"), "pxor");
}
+TEST_F(AssemblerX86AVXTest, VPXor) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpxor, "vpxor %{reg3}, %{reg2}, %{reg1}"), "vpxor");
+}
+
+TEST_F(AssemblerX86AVXTest, VXorPS) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vxorps, "vxorps %{reg3}, %{reg2}, %{reg1}"), "vxorps");
+}
+
+TEST_F(AssemblerX86AVXTest, VXorPD) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vxorpd, "vxorpd %{reg3}, %{reg2}, %{reg1}"), "vxorpd");
+}
+
TEST_F(AssemblerX86Test, AndPD) {
DriverStr(RepeatFF(&x86::X86Assembler::andpd, "andpd %{reg2}, %{reg1}"), "andpd");
}
@@ -873,6 +885,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
}
+TEST_F(AssemblerX86AVXTest, VPAnd) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpand, "vpand %{reg3}, %{reg2}, %{reg1}"), "vpand");
+}
+
+TEST_F(AssemblerX86AVXTest, VAndPS) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vandps, "vandps %{reg3}, %{reg2}, %{reg1}"), "vandps");
+}
+
+TEST_F(AssemblerX86AVXTest, VAndPD) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vandpd, "vandpd %{reg3}, %{reg2}, %{reg1}"), "vandpd");
+}
+
TEST_F(AssemblerX86Test, Andn) {
DriverStr(RepeatRRR(&x86::X86Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn");
}
@@ -889,6 +913,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
}
+TEST_F(AssemblerX86AVXTest, VPAndn) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpandn, "vpandn %{reg3}, %{reg2}, %{reg1}"), "vpandn");
+}
+
+TEST_F(AssemblerX86AVXTest, VAndnPS) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vandnps, "vandnps %{reg3}, %{reg2}, %{reg1}"), "vandnps");
+}
+
+TEST_F(AssemblerX86AVXTest, VAndnPD) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vandnpd, "vandnpd %{reg3}, %{reg2}, %{reg1}"), "vandnpd");
+}
+
TEST_F(AssemblerX86Test, OrPD) {
DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
}
@@ -901,6 +937,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86AVXTest, VPor) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpor, "vpor %{reg3}, %{reg2}, %{reg1}"), "vpor");
+}
+
+TEST_F(AssemblerX86AVXTest, VorPS) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vorps, "vorps %{reg3}, %{reg2}, %{reg1}"), "vorps");
+}
+
+TEST_F(AssemblerX86AVXTest, VorPD) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vorpd, "vorpd %{reg3}, %{reg2}, %{reg1}"), "vorpd");
+}
+
TEST_F(AssemblerX86Test, PAvgB) {
DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
}