Added a few more integral SIMD extensions for x86/x86_64 (SSE).
Rationale:
ART vectorizer needs a couple of extra SIMD operations before
sending out the larger general CL.
Test: assembler_x86[_64]_test
Bug: 34083438
Change-Id: Id156283424ad311e6109b360efcd409c671cd5b7
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 4343e2e..2999599 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -408,14 +408,9 @@
void movsd(const Address& dst, XmmRegister src);
void movsd(XmmRegister dst, XmmRegister src);
- void psrlq(XmmRegister reg, const Immediate& shift_count);
- void punpckldq(XmmRegister dst, XmmRegister src);
-
void movhpd(XmmRegister dst, const Address& src);
void movhpd(const Address& dst, XmmRegister src);
- void psrldq(XmmRegister reg, const Immediate& shift_count);
-
void addsd(XmmRegister dst, XmmRegister src);
void addsd(XmmRegister dst, const Address& src);
void subsd(XmmRegister dst, XmmRegister src);
@@ -436,10 +431,20 @@
void movdqa(const Address& dst, XmmRegister src); // store aligned
void movdqu(const Address& dst, XmmRegister src); // store unaligned
- void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void psubb(XmmRegister dst, XmmRegister src);
+
+ void paddw(XmmRegister dst, XmmRegister src);
+ void psubw(XmmRegister dst, XmmRegister src);
+ void pmullw(XmmRegister dst, XmmRegister src);
+
+ void paddd(XmmRegister dst, XmmRegister src);
void psubd(XmmRegister dst, XmmRegister src);
void pmulld(XmmRegister dst, XmmRegister src);
+ void paddq(XmmRegister dst, XmmRegister src);
+ void psubq(XmmRegister dst, XmmRegister src);
+
void cvtsi2ss(XmmRegister dst, Register src);
void cvtsi2sd(XmmRegister dst, Register src);
@@ -489,6 +494,24 @@
void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
+ void punpcklbw(XmmRegister dst, XmmRegister src);
+ void punpcklwd(XmmRegister dst, XmmRegister src);
+ void punpckldq(XmmRegister dst, XmmRegister src);
+ void punpcklqdq(XmmRegister dst, XmmRegister src);
+
+ void psllw(XmmRegister reg, const Immediate& shift_count);
+ void pslld(XmmRegister reg, const Immediate& shift_count);
+ void psllq(XmmRegister reg, const Immediate& shift_count);
+
+ void psraw(XmmRegister reg, const Immediate& shift_count);
+ void psrad(XmmRegister reg, const Immediate& shift_count);
+ // no psraq
+
+ void psrlw(XmmRegister reg, const Immediate& shift_count);
+ void psrld(XmmRegister reg, const Immediate& shift_count);
+ void psrlq(XmmRegister reg, const Immediate& shift_count);
+ void psrldq(XmmRegister reg, const Immediate& shift_count);
+
void flds(const Address& src);
void fstps(const Address& dst);
void fsts(const Address& dst);