Introduce a number of SIMD extensions for x86/x86_64 (SSE).
Rationale:
As a first step exploring how useful an ART vectorizer may be,
introducing a number of floating-point SIMD instructions.
Test: assembler_x86[_64]_test
Bug: 34083438
Change-Id: I0285dd9fca51f31875a6bbe728f873c48089940d
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index acad86d..5923a41 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -390,7 +390,11 @@
void leaq(CpuRegister dst, const Address& src);
void leal(CpuRegister dst, const Address& src);
- void movaps(XmmRegister dst, XmmRegister src);
+ void movaps(XmmRegister dst, XmmRegister src); // move
+ void movaps(XmmRegister dst, const Address& src); // load aligned
+ void movups(XmmRegister dst, const Address& src); // load unaligned
+ void movaps(const Address& dst, XmmRegister src); // store aligned
+ void movups(const Address& dst, XmmRegister src); // store unaligned
void movss(XmmRegister dst, const Address& src);
void movss(const Address& dst, XmmRegister src);
@@ -413,6 +417,17 @@
void divss(XmmRegister dst, XmmRegister src);
void divss(XmmRegister dst, const Address& src);
+ void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void subps(XmmRegister dst, XmmRegister src);
+ void mulps(XmmRegister dst, XmmRegister src);
+ void divps(XmmRegister dst, XmmRegister src);
+
+ void movapd(XmmRegister dst, XmmRegister src); // move
+ void movapd(XmmRegister dst, const Address& src); // load aligned
+ void movupd(XmmRegister dst, const Address& src); // load unaligned
+ void movapd(const Address& dst, XmmRegister src); // store aligned
+ void movupd(const Address& dst, XmmRegister src); // store unaligned
+
void movsd(XmmRegister dst, const Address& src);
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
@@ -426,6 +441,11 @@
void divsd(XmmRegister dst, XmmRegister src);
void divsd(XmmRegister dst, const Address& src);
+ void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void subpd(XmmRegister dst, XmmRegister src);
+ void mulpd(XmmRegister dst, XmmRegister src);
+ void divpd(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);