Add AVX support for packed add/sub instructions on x86
Test: ./test.py --host, test-art-host-gtest
Change-Id: I48d05e6f6befd54657d962119a543b27a8a51d71
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index bcc197b..3eaf93a 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -703,6 +703,20 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(add_left),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::subps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -711,6 +725,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t byte_zero = 0x00, byte_one = 0x00;
+ byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(src1);
+ byte_one = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::mulps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1041,6 +1067,21 @@
}
+void X86Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(add_left),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst, add_right);
+}
+
+
void X86Assembler::subpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1050,6 +1091,20 @@
}
+void X86Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst, src2);
+}
+
void X86Assembler::mulpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1232,6 +1287,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteOne = 0x00, ByteZero = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::psubb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1241,6 +1308,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::paddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1250,6 +1329,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::psubw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1259,6 +1350,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::pmullw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1277,6 +1380,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xFE);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::psubd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1287,6 +1402,20 @@
}
+void X86Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xFA);
+ EmitXmmRegisterOperand(dst, add_right);
+}
+
+
void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1305,6 +1434,19 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst, add_right);
+}
+
void X86Assembler::psubq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1314,6 +1456,18 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ X86ManagedRegister vvvv_reg = X86ManagedRegister::FromXmmRegister(add_left);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst, add_right);
+}
void X86Assembler::paddusb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index e84294a..17039f0 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -417,6 +417,11 @@
void mulps(XmmRegister dst, XmmRegister src);
void divps(XmmRegister dst, XmmRegister src);
+ void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void movapd(XmmRegister dst, XmmRegister src); // move
void movapd(XmmRegister dst, const Address& src); // load aligned
void movupd(XmmRegister dst, const Address& src); // load unaligned
@@ -465,17 +470,29 @@
void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void psubb(XmmRegister dst, XmmRegister src);
+ void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void paddw(XmmRegister dst, XmmRegister src);
void psubw(XmmRegister dst, XmmRegister src);
void pmullw(XmmRegister dst, XmmRegister src);
+ void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+
void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+
void paddq(XmmRegister dst, XmmRegister src);
void psubq(XmmRegister dst, XmmRegister src);
+ void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void paddusb(XmmRegister dst, XmmRegister src);
void paddsb(XmmRegister dst, XmmRegister src);
void paddusw(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index ee29482..42ee383 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -677,18 +677,36 @@
DriverStr(RepeatFF(&x86::X86Assembler::addps, "addps %{reg2}, %{reg1}"), "addps");
}
+TEST_F(AssemblerX86AVXTest, VAddps) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vaddps, "vaddps %{reg3}, %{reg2}, %{reg1}"), "vaddps");
+}
+
TEST_F(AssemblerX86Test, AddPD) {
DriverStr(RepeatFF(&x86::X86Assembler::addpd, "addpd %{reg2}, %{reg1}"), "addpd");
}
+TEST_F(AssemblerX86AVXTest, VAddpd) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vaddpd, "vaddpd %{reg3}, %{reg2}, %{reg1}"), "vaddpd");
+}
+
TEST_F(AssemblerX86Test, SubPS) {
DriverStr(RepeatFF(&x86::X86Assembler::subps, "subps %{reg2}, %{reg1}"), "subps");
}
+TEST_F(AssemblerX86AVXTest, VSubps) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vsubps, "vsubps %{reg3},%{reg2}, %{reg1}"), "vsubps");
+}
+
+
TEST_F(AssemblerX86Test, SubPD) {
DriverStr(RepeatFF(&x86::X86Assembler::subpd, "subpd %{reg2}, %{reg1}"), "subpd");
}
+TEST_F(AssemblerX86AVXTest, VSubpd) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vsubpd, "vsubpd %{reg3}, %{reg2}, %{reg1}"), "vsubpd");
+}
+
+
TEST_F(AssemblerX86Test, MulPS) {
DriverStr(RepeatFF(&x86::X86Assembler::mulps, "mulps %{reg2}, %{reg1}"), "mulps");
}
@@ -709,18 +727,34 @@
DriverStr(RepeatFF(&x86::X86Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
}
+TEST_F(AssemblerX86AVXTest, VPaddb) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpaddb, "vpaddb %{reg3}, %{reg2}, %{reg1}"), "vpaddb");
+}
+
TEST_F(AssemblerX86Test, PSubB) {
DriverStr(RepeatFF(&x86::X86Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
}
+TEST_F(AssemblerX86AVXTest, VPsubb) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpsubb, "vpsubb %{reg3},%{reg2}, %{reg1}"), "vpsubb");
+}
+
TEST_F(AssemblerX86Test, PAddW) {
DriverStr(RepeatFF(&x86::X86Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
}
+TEST_F(AssemblerX86AVXTest, VPaddw) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpaddw, "vpaddw %{reg3}, %{reg2}, %{reg1}"), "vpaddw");
+}
+
TEST_F(AssemblerX86Test, PSubW) {
DriverStr(RepeatFF(&x86::X86Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
}
+TEST_F(AssemblerX86AVXTest, VPsubw) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpsubw, "vpsubw %{reg3}, %{reg2}, %{reg1}"), "vpsubw");
+}
+
TEST_F(AssemblerX86Test, PMullW) {
DriverStr(RepeatFF(&x86::X86Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw");
}
@@ -729,10 +763,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
+TEST_F(AssemblerX86AVXTest, VPaddd) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpaddd, "vpaddd %{reg3}, %{reg2}, %{reg1}"), "vpaddd");
+}
+
TEST_F(AssemblerX86Test, PSubD) {
DriverStr(RepeatFF(&x86::X86Assembler::psubd, "psubd %{reg2}, %{reg1}"), "psubd");
}
+TEST_F(AssemblerX86AVXTest, VPsubd) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpsubd, "vpsubd %{reg3}, %{reg2}, %{reg1}"), "vpsubd");
+}
+
TEST_F(AssemblerX86Test, PMullD) {
DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
@@ -741,10 +783,18 @@
DriverStr(RepeatFF(&x86::X86Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
}
+TEST_F(AssemblerX86AVXTest, VPaddq) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpaddq, "vpaddq %{reg3}, %{reg2}, %{reg1}"), "vpaddq");
+}
+
TEST_F(AssemblerX86Test, PSubQ) {
DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
}
+TEST_F(AssemblerX86AVXTest, VPsubq) {
+ DriverStr(RepeatFFF(&x86::X86Assembler::vpsubq, "vpsubq %{reg3}, %{reg2}, %{reg1}"), "vpsubq");
+}
+
TEST_F(AssemblerX86Test, PAddUSB) {
DriverStr(RepeatFF(&x86::X86Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 336ecbf..72b7ae0 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -614,8 +614,8 @@
SET_VEX_PP_NONE);
} else {
ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
- Rex_x ,
- Rex_b ,
+ Rex_x,
+ Rex_b,
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
SET_VEX_L_128,
@@ -856,6 +856,60 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
+ if (!src2.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
+ if (is_twobyte_form) {
+ byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
+ } else {
+ byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
+ byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
+ }
+ EmitUint8(byte_zero);
+ EmitUint8(byte_one);
+ if (!is_twobyte_form) {
+ EmitUint8(byte_two);
+ }
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst.LowBits(), src2);
+}
+
void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -931,7 +985,7 @@
SET_VEX_PP_66);
} else {
ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
- /*X=*/ false ,
+ /*X=*/ false,
src.NeedsRex(),
SET_VEX_M_0F);
ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
@@ -1292,6 +1346,35 @@
}
+void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0x58);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+
void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1302,6 +1385,35 @@
}
+void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!src2.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ src2.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0x5C);
+ EmitXmmRegisterOperand(dst.LowBits(), src2);
+}
+
+
void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1609,6 +1721,36 @@
}
+void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
+ bool is_twobyte_form = true;
+ if (add_right.NeedsRex()) {
+ is_twobyte_form = false;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xFC);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+
void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1619,6 +1761,36 @@
}
+void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xF8);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+
void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1628,6 +1800,35 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xFD);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1638,6 +1839,35 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xF9);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1658,6 +1888,34 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xFE);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1690,6 +1948,36 @@
}
+void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xD4);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+
void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -1699,6 +1987,35 @@
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xFB);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1760,6 +2077,36 @@
}
+void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ bool is_twobyte_form = false;
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ if (!add_right.NeedsRex()) {
+ is_twobyte_form = true;
+ }
+ ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
+ X86_64ManagedRegister vvvv_reg =
+ X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
+ if (is_twobyte_form) {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ } else {
+ ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
+ /*X=*/ false,
+ add_right.NeedsRex(),
+ SET_VEX_M_0F);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
+ }
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ if (!is_twobyte_form) {
+ EmitUint8(ByteTwo);
+ }
+ EmitUint8(0xFA);
+ EmitXmmRegisterOperand(dst.LowBits(), add_right);
+}
+
+
void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
@@ -4597,7 +4944,7 @@
uint8_t reg = static_cast<uint8_t>(inverted_reg);
vex_prefix |= ((reg & 0x0F) << 3);
}
- /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
VEX.L = 0 indicates 128 bit vector operation */
vex_prefix |= SET_VEX_L;
// Bits[1:0] - "pp"
@@ -4629,7 +4976,7 @@
uint8_t reg = static_cast<uint8_t>(inverted_reg);
vex_prefix |= ((reg & 0x0F) << 3);
}
- /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
VEX.L = 0 indicates 128 bit vector operation */
vex_prefix |= SET_VEX_L;
// Bits[1:0] - "pp"
@@ -4650,7 +4997,7 @@
}
/** Bits[6:3] - 'vvvv' the source or dest register specifier */
vex_prefix |= (0x0F << 3);
- /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation ,
+ /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
VEX.L = 0 indicates 128 bit vector operation */
vex_prefix |= SET_VEX_L;
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 471b314..8fc69f6 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -452,6 +452,11 @@
void mulps(XmmRegister dst, XmmRegister src);
void divps(XmmRegister dst, XmmRegister src);
+ void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void movapd(XmmRegister dst, XmmRegister src); // move
void movapd(XmmRegister dst, const Address& src); // load aligned
void movupd(XmmRegister dst, const Address& src); // load unaligned
@@ -497,17 +502,29 @@
void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
void psubb(XmmRegister dst, XmmRegister src);
+ void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void paddw(XmmRegister dst, XmmRegister src);
void psubw(XmmRegister dst, XmmRegister src);
void pmullw(XmmRegister dst, XmmRegister src);
+ void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+ void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+
void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
+
void paddq(XmmRegister dst, XmmRegister src);
void psubq(XmmRegister dst, XmmRegister src);
+ void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+ void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
+
void paddusb(XmmRegister dst, XmmRegister src);
void paddsb(XmmRegister dst, XmmRegister src);
void paddusw(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 411c30b..24a6c3c 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1323,10 +1323,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::addps, "addps %{reg2}, %{reg1}"), "addps");
}
+TEST_F(AssemblerX86_64AVXTest, VAddps) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vaddps, "vaddps %{reg3}, %{reg2}, %{reg1}"), "vaddps");
+}
+
TEST_F(AssemblerX86_64Test, Addpd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::addpd, "addpd %{reg2}, %{reg1}"), "addpd");
}
+TEST_F(AssemblerX86_64AVXTest, VAddpd) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vaddpd, "vaddpd %{reg3}, %{reg2}, %{reg1}"), "vaddpd");
+}
+
TEST_F(AssemblerX86_64Test, Subss) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::subss, "subss %{reg2}, %{reg1}"), "subss");
}
@@ -1339,10 +1349,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::subps, "subps %{reg2}, %{reg1}"), "subps");
}
+TEST_F(AssemblerX86_64AVXTest, VSubps) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vsubps, "vsubps %{reg3},%{reg2}, %{reg1}"), "vsubps");
+}
+
TEST_F(AssemblerX86_64Test, Subpd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::subpd, "subpd %{reg2}, %{reg1}"), "subpd");
}
+TEST_F(AssemblerX86_64AVXTest, VSubpd) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vsubpd, "vsubpd %{reg3}, %{reg2}, %{reg1}"), "vsubpd");
+}
+
TEST_F(AssemblerX86_64Test, Mulss) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::mulss, "mulss %{reg2}, %{reg1}"), "mulss");
}
@@ -1379,14 +1399,35 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb");
}
+TEST_F(AssemblerX86_64AVXTest, VPaddb) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpaddb, "vpaddb %{reg3}, %{reg2}, %{reg1}"), "vpaddb");
+}
+
TEST_F(AssemblerX86_64Test, Psubb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb");
}
+TEST_F(AssemblerX86_64AVXTest, VPsubb) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpsubb, "vpsubb %{reg3},%{reg2}, %{reg1}"), "vpsubb");
+}
+
TEST_F(AssemblerX86_64Test, Paddw) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw");
}
+TEST_F(AssemblerX86_64AVXTest, VPsubw) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpsubw, "vpsubw %{reg3}, %{reg2}, %{reg1}"), "vpsubw");
+}
+
+TEST_F(AssemblerX86_64AVXTest, VPaddw) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpaddw, "vpaddw %{reg3}, %{reg2}, %{reg1}"), "vpaddw");
+}
+
+
TEST_F(AssemblerX86_64Test, Psubw) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw");
}
@@ -1399,10 +1440,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd");
}
+TEST_F(AssemblerX86_64AVXTest, VPaddd) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpaddd, "vpaddd %{reg3}, %{reg2}, %{reg1}"), "vpaddd");
+}
+
TEST_F(AssemblerX86_64Test, Psubd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubd, "psubd %{reg2}, %{reg1}"), "psubd");
}
+TEST_F(AssemblerX86_64AVXTest, VPsubd) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpsubd, "vpsubd %{reg3}, %{reg2}, %{reg1}"), "vpsubd");
+}
+
TEST_F(AssemblerX86_64Test, Pmulld) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld");
}
@@ -1411,10 +1462,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq");
}
+TEST_F(AssemblerX86_64AVXTest, VPaddq) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpaddq, "vpaddq %{reg3}, %{reg2}, %{reg1}"), "vpaddq");
+}
+
TEST_F(AssemblerX86_64Test, Psubq) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq");
}
+TEST_F(AssemblerX86_64AVXTest, VPsubq) {
+ DriverStr(
+ RepeatFFF(&x86_64::X86_64Assembler::vpsubq, "vpsubq %{reg3}, %{reg2}, %{reg1}"), "vpsubq");
+}
+
TEST_F(AssemblerX86_64Test, Paddusb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb");
}