Add AVX support for packed mul/div instructions.
This is a follow up for the below patch:
https://android-review.googlesource.com/c/platform/build/+/830841
Test: ./test.py --host --64, test-art-host-gtest
Change-Id: Id2aa473035556ee230e66addeb69707df8530e75
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 3eaf93a..84a8564 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -745,6 +745,20 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x59);
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::divps(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -754,6 +768,22 @@
}
+void X86Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_NONE);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x5E);
+ EmitXmmRegisterOperand(dst, src2);
+}
+
+
void X86Assembler::movapd(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
vmovapd(dst, src);
@@ -1113,6 +1143,20 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x59);
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::divpd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1122,6 +1166,20 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0x5E);
+ EmitXmmRegisterOperand(dst, src2);
+}
void X86Assembler::movdqa(XmmRegister dst, XmmRegister src) {
if (CpuHasAVXorAVX2FeatureFlag()) {
@@ -1425,6 +1483,40 @@
EmitXmmRegisterOperand(dst, src);
}
+void X86Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ /*X=*/ false,
+ /*B=*/ false,
+ SET_VEX_M_0F_38);
+ ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(ByteTwo);
+ EmitUint8(0x40);
+ EmitRegisterOperand(dst, src2);
+}
+
+void X86Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
+ DCHECK(CpuHasAVXorAVX2FeatureFlag());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ uint8_t ByteZero = 0x00, ByteOne = 0x00;
+ ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ true);
+ ByteOne = EmitVexPrefixByteOne(/*R=*/ false,
+ X86ManagedRegister::FromXmmRegister(src1),
+ SET_VEX_L_128,
+ SET_VEX_PP_66);
+ EmitUint8(ByteZero);
+ EmitUint8(ByteOne);
+ EmitUint8(0xD5);
+ EmitRegisterOperand(dst, src2);
+}
void X86Assembler::paddq(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);