MIPS: Introduce a few MSA instructions

These instructions are needed for SIMD reduction.
Also added assembler tests for each instruction.

Test: mma test-art-host-gtest

Change-Id: I0f02618a14b4cbcc3b81ce51dd2586fa4cdbfd18
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 606d4c3..d8a4531 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1874,6 +1874,72 @@
   EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19);
 }
 
+void Mips64Assembler::Copy_sB(GpuRegister rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x2, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sH(GpuRegister rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x2, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sW(GpuRegister rd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x2, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sD(GpuRegister rd, VectorRegister ws, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  EmitMsaELM(0x2, n1 | kMsaDfNDoublewordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uB(GpuRegister rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x3, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uH(GpuRegister rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x3, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uW(GpuRegister rd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x3, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::InsertB(VectorRegister wd, GpuRegister rs, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x4, n4 | kMsaDfNByteMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertH(VectorRegister wd, GpuRegister rs, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x4, n3 | kMsaDfNHalfwordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertW(VectorRegister wd, GpuRegister rs, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x4, n2 | kMsaDfNWordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertD(VectorRegister wd, GpuRegister rs, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  EmitMsaELM(0x4, n1 | kMsaDfNDoublewordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
 void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) {
   CHECK(HasMsa());
   EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e);
@@ -1972,6 +2038,26 @@
   EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
 }
 
+void Mips64Assembler::IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x14);
+}
+
 void Mips64Assembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14);
@@ -1992,6 +2078,46 @@
   EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
 }
 
+void Mips64Assembler::IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x14);
+}
+
 void Mips64Assembler::MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x12);
@@ -2052,6 +2178,36 @@
   EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x1b);
 }
 
+void Mips64Assembler::Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15);
+}
+
 void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
                                                   FpuRegister src,
                                                   bool is_double) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a3787ac..d67fb00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -785,6 +785,17 @@
   void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
   void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
   void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void Copy_sB(GpuRegister rd, VectorRegister ws, int n4);
+  void Copy_sH(GpuRegister rd, VectorRegister ws, int n3);
+  void Copy_sW(GpuRegister rd, VectorRegister ws, int n2);
+  void Copy_sD(GpuRegister rd, VectorRegister ws, int n1);
+  void Copy_uB(GpuRegister rd, VectorRegister ws, int n4);
+  void Copy_uH(GpuRegister rd, VectorRegister ws, int n3);
+  void Copy_uW(GpuRegister rd, VectorRegister ws, int n2);
+  void InsertB(VectorRegister wd, GpuRegister rs, int n4);
+  void InsertH(VectorRegister wd, GpuRegister rs, int n3);
+  void InsertW(VectorRegister wd, GpuRegister rs, int n2);
+  void InsertD(VectorRegister wd, GpuRegister rs, int n1);
   void FillB(VectorRegister wd, GpuRegister rs);
   void FillH(VectorRegister wd, GpuRegister rs);
   void FillW(VectorRegister wd, GpuRegister rs);
@@ -803,10 +814,22 @@
   void StW(VectorRegister wd, GpuRegister rs, int offset);
   void StD(VectorRegister wd, GpuRegister rs, int offset);
 
+  void IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
@@ -821,6 +844,13 @@
   void FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  void Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Helper for replicating floating point value in all destination elements.
   void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bf0326d..164af78 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -3399,6 +3399,61 @@
             "splati.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Copy_sB) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sB, 4, "copy_s.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sH) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sH, 3, "copy_s.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sW) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sW, 2, "copy_s.w ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sD) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sD, 1, "copy_s.d ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uB) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uB, 4, "copy_u.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uH) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uH, 3, "copy_u.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uW) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uW, 2, "copy_u.w ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertB) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertB, 4, "insert.b ${reg1}[{imm}], ${reg2}"),
+            "insert.b");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertH) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertH, 3, "insert.h ${reg1}[{imm}], ${reg2}"),
+            "insert.h");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertW) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertW, 2, "insert.w ${reg1}[{imm}], ${reg2}"),
+            "insert.w");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertD) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertD, 1, "insert.d ${reg1}[{imm}], ${reg2}"),
+            "insert.d");
+}
+
 TEST_F(AssemblerMIPS64Test, FillB) {
   DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
 }
@@ -3469,6 +3524,26 @@
             "st.d");
 }
 
+TEST_F(AssemblerMIPS64Test, IlvlB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlB, "ilvl.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlH, "ilvl.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlW, "ilvl.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlD, "ilvl.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.d");
+}
+
 TEST_F(AssemblerMIPS64Test, IlvrB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"),
             "ilvr.b");
@@ -3489,6 +3564,46 @@
             "ilvr.d");
 }
 
+TEST_F(AssemblerMIPS64Test, IlvevB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevB, "ilvev.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevH, "ilvev.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevW, "ilvev.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevD, "ilvev.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.d");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodB, "ilvod.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodH, "ilvod.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodW, "ilvod.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodD, "ilvod.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.d");
+}
+
 TEST_F(AssemblerMIPS64Test, MaddvB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::MaddvB, "maddv.b ${reg1}, ${reg2}, ${reg3}"),
             "maddv.b");
@@ -3509,6 +3624,36 @@
             "maddv.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Hadd_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sH, "hadd_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sW, "hadd_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sD, "hadd_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uH, "hadd_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uW, "hadd_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uD, "hadd_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.d");
+}
+
 TEST_F(AssemblerMIPS64Test, MsubvB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::MsubvB, "msubv.b ${reg1}, ${reg2}, ${reg3}"),
             "msubv.b");