MIPS: Introduce a few MSA instructions

These instructions are needed for SIMD reduction.
Also added assembler tests for each instruction.

Test: mma test-art-host-gtest

Change-Id: I0f02618a14b4cbcc3b81ce51dd2586fa4cdbfd18
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index ae7636b..ad84412 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -650,6 +650,24 @@
   }
 
   template <typename ImmType>
+  std::string RepeatRVIb(void (Ass::*f)(Reg, VecReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt,
+                         int bias = 0,
+                         int multiplier = 1) {
+    return RepeatTemplatedRegistersImmBits<Reg, VecReg, ImmType>(
+        f,
+        imm_bits,
+        GetRegisters(),
+        GetVectorRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetVecRegName,
+        fmt,
+        bias,
+        multiplier);
+  }
+
+  template <typename ImmType>
   std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType),
                          int imm_bits,
                          const std::string& fmt,
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index e85645b..cbb2c0e 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -2800,6 +2800,74 @@
                 static_cast<FRegister>(ws));
 }
 
+void MipsAssembler::Copy_sB(Register rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  DsFsmInstrRf(EmitMsaELM(0x2, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19),
+               rd,
+               static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Copy_sH(Register rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  DsFsmInstrRf(EmitMsaELM(0x2, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19),
+               rd,
+               static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Copy_sW(Register rd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  DsFsmInstrRf(EmitMsaELM(0x2, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19),
+               rd,
+               static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Copy_uB(Register rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  DsFsmInstrRf(EmitMsaELM(0x3, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19),
+               rd,
+               static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Copy_uH(Register rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  DsFsmInstrRf(EmitMsaELM(0x3, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19),
+               rd,
+               static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::InsertB(VectorRegister wd, Register rs, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  DsFsmInstrFffr(EmitMsaELM(0x4, n4 | kMsaDfNByteMask, static_cast<VectorRegister>(rs), wd, 0x19),
+                 static_cast<FRegister>(wd),
+                 static_cast<FRegister>(wd),
+                 rs);
+}
+
+void MipsAssembler::InsertH(VectorRegister wd, Register rs, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  DsFsmInstrFffr(
+      EmitMsaELM(0x4, n3 | kMsaDfNHalfwordMask, static_cast<VectorRegister>(rs), wd, 0x19),
+      static_cast<FRegister>(wd),
+      static_cast<FRegister>(wd),
+      rs);
+}
+
+void MipsAssembler::InsertW(VectorRegister wd, Register rs, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  DsFsmInstrFffr(EmitMsaELM(0x4, n2 | kMsaDfNWordMask, static_cast<VectorRegister>(rs), wd, 0x19),
+                 static_cast<FRegister>(wd),
+                 static_cast<FRegister>(wd),
+                 rs);
+}
+
 void MipsAssembler::FillB(VectorRegister wd, Register rs) {
   CHECK(HasMsa());
   DsFsmInstrFr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e),
@@ -2921,6 +2989,38 @@
                rs);
 }
 
+void MipsAssembler::IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
 void MipsAssembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14),
@@ -2953,6 +3053,70 @@
                 static_cast<FRegister>(wt));
 }
 
+void MipsAssembler::IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
 void MipsAssembler::MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x12),
@@ -3049,6 +3213,54 @@
                 static_cast<FRegister>(wt));
 }
 
+void MipsAssembler::Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
 void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst,
                                                 FRegister src,
                                                 bool is_double) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 1c5b442..c0ea29f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -601,6 +601,14 @@
   void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
   void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
   void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void Copy_sB(Register rd, VectorRegister ws, int n4);
+  void Copy_sH(Register rd, VectorRegister ws, int n3);
+  void Copy_sW(Register rd, VectorRegister ws, int n2);
+  void Copy_uB(Register rd, VectorRegister ws, int n4);
+  void Copy_uH(Register rd, VectorRegister ws, int n3);
+  void InsertB(VectorRegister wd, Register rs, int n4);
+  void InsertH(VectorRegister wd, Register rs, int n3);
+  void InsertW(VectorRegister wd, Register rs, int n2);
   void FillB(VectorRegister wd, Register rs);
   void FillH(VectorRegister wd, Register rs);
   void FillW(VectorRegister wd, Register rs);
@@ -618,10 +626,22 @@
   void StW(VectorRegister wd, Register rs, int offset);
   void StD(VectorRegister wd, Register rs, int offset);
 
+  void IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
@@ -636,6 +656,13 @@
   void FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  void Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Helper for replicating floating point value in all destination elements.
   void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double);
 
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index b12b6b6..c76a568 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -2185,6 +2185,46 @@
             "splati.d");
 }
 
+TEST_F(AssemblerMIPS32r6Test, Copy_sB) {
+  DriverStr(RepeatRVIb(&mips::MipsAssembler::Copy_sB, 4, "copy_s.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Copy_sH) {
+  DriverStr(RepeatRVIb(&mips::MipsAssembler::Copy_sH, 3, "copy_s.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Copy_sW) {
+  DriverStr(RepeatRVIb(&mips::MipsAssembler::Copy_sW, 2, "copy_s.w ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Copy_uB) {
+  DriverStr(RepeatRVIb(&mips::MipsAssembler::Copy_uB, 4, "copy_u.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Copy_uH) {
+  DriverStr(RepeatRVIb(&mips::MipsAssembler::Copy_uH, 3, "copy_u.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, InsertB) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::InsertB, 4, "insert.b ${reg1}[{imm}], ${reg2}"),
+            "insert.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, InsertH) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::InsertH, 3, "insert.h ${reg1}[{imm}], ${reg2}"),
+            "insert.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, InsertW) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::InsertW, 2, "insert.w ${reg1}[{imm}], ${reg2}"),
+            "insert.w");
+}
+
 TEST_F(AssemblerMIPS32r6Test, FillB) {
   DriverStr(RepeatVR(&mips::MipsAssembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
 }
@@ -2251,6 +2291,22 @@
             "st.d");
 }
 
+TEST_F(AssemblerMIPS32r6Test, IlvlB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvlB, "ilvl.b ${reg1}, ${reg2}, ${reg3}"), "ilvl.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvlH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvlH, "ilvl.h ${reg1}, ${reg2}, ${reg3}"), "ilvl.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvlW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvlW, "ilvl.w ${reg1}, ${reg2}, ${reg3}"), "ilvl.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvlD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvlD, "ilvl.d ${reg1}, ${reg2}, ${reg3}"), "ilvl.d");
+}
+
 TEST_F(AssemblerMIPS32r6Test, IlvrB) {
   DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"), "ilvr.b");
 }
@@ -2267,6 +2323,46 @@
   DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"), "ilvr.d");
 }
 
+TEST_F(AssemblerMIPS32r6Test, IlvevB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvevB, "ilvev.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvevH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvevH, "ilvev.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvevW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvevW, "ilvev.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvevD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvevD, "ilvev.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvodB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvodB, "ilvod.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvodH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvodH, "ilvod.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvodW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvodW, "ilvod.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvodD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvodD, "ilvod.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.d");
+}
+
 TEST_F(AssemblerMIPS32r6Test, MaddvB) {
   DriverStr(RepeatVVV(&mips::MipsAssembler::MaddvB, "maddv.b ${reg1}, ${reg2}, ${reg3}"),
             "maddv.b");
@@ -2287,6 +2383,36 @@
             "maddv.d");
 }
 
+TEST_F(AssemblerMIPS32r6Test, Hadd_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_sH, "hadd_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Hadd_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_sW, "hadd_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Hadd_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_sD, "hadd_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Hadd_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_uH, "hadd_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Hadd_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_uW, "hadd_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Hadd_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Hadd_uD, "hadd_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.d");
+}
+
 TEST_F(AssemblerMIPS32r6Test, MsubvB) {
   DriverStr(RepeatVVV(&mips::MipsAssembler::MsubvB, "msubv.b ${reg1}, ${reg2}, ${reg3}"),
             "msubv.b");
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 606d4c3..d8a4531 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1874,6 +1874,72 @@
   EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19);
 }
 
+void Mips64Assembler::Copy_sB(GpuRegister rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x2, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sH(GpuRegister rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x2, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sW(GpuRegister rd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x2, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_sD(GpuRegister rd, VectorRegister ws, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  EmitMsaELM(0x2, n1 | kMsaDfNDoublewordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uB(GpuRegister rd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x3, n4 | kMsaDfNByteMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uH(GpuRegister rd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x3, n3 | kMsaDfNHalfwordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::Copy_uW(GpuRegister rd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x3, n2 | kMsaDfNWordMask, ws, static_cast<VectorRegister>(rd), 0x19);
+}
+
+void Mips64Assembler::InsertB(VectorRegister wd, GpuRegister rs, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x4, n4 | kMsaDfNByteMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertH(VectorRegister wd, GpuRegister rs, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x4, n3 | kMsaDfNHalfwordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertW(VectorRegister wd, GpuRegister rs, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x4, n2 | kMsaDfNWordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
+void Mips64Assembler::InsertD(VectorRegister wd, GpuRegister rs, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  EmitMsaELM(0x4, n1 | kMsaDfNDoublewordMask, static_cast<VectorRegister>(rs), wd, 0x19);
+}
+
 void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) {
   CHECK(HasMsa());
   EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e);
@@ -1972,6 +2038,26 @@
   EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
 }
 
+void Mips64Assembler::IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x14);
+}
+
 void Mips64Assembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14);
@@ -1992,6 +2078,46 @@
   EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
 }
 
+void Mips64Assembler::IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x14);
+}
+
+void Mips64Assembler::IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x14);
+}
+
 void Mips64Assembler::MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x12);
@@ -2052,6 +2178,36 @@
   EmitMsa3R(0x2, 0x3, wt, ws, wd, 0x1b);
 }
 
+void Mips64Assembler::Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x15);
+}
+
+void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15);
+}
+
 void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
                                                   FpuRegister src,
                                                   bool is_double) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a3787ac..d67fb00 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -785,6 +785,17 @@
   void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
   void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
   void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void Copy_sB(GpuRegister rd, VectorRegister ws, int n4);
+  void Copy_sH(GpuRegister rd, VectorRegister ws, int n3);
+  void Copy_sW(GpuRegister rd, VectorRegister ws, int n2);
+  void Copy_sD(GpuRegister rd, VectorRegister ws, int n1);
+  void Copy_uB(GpuRegister rd, VectorRegister ws, int n4);
+  void Copy_uH(GpuRegister rd, VectorRegister ws, int n3);
+  void Copy_uW(GpuRegister rd, VectorRegister ws, int n2);
+  void InsertB(VectorRegister wd, GpuRegister rs, int n4);
+  void InsertH(VectorRegister wd, GpuRegister rs, int n3);
+  void InsertW(VectorRegister wd, GpuRegister rs, int n2);
+  void InsertD(VectorRegister wd, GpuRegister rs, int n1);
   void FillB(VectorRegister wd, GpuRegister rs);
   void FillH(VectorRegister wd, GpuRegister rs);
   void FillW(VectorRegister wd, GpuRegister rs);
@@ -803,10 +814,22 @@
   void StW(VectorRegister wd, GpuRegister rs, int offset);
   void StD(VectorRegister wd, GpuRegister rs, int offset);
 
+  void IlvlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvevD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvodD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void MaddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void MaddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
@@ -821,6 +844,13 @@
   void FmsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FmsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  void Hadd_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Helper for replicating floating point value in all destination elements.
   void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index bf0326d..164af78 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -3399,6 +3399,61 @@
             "splati.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Copy_sB) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sB, 4, "copy_s.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sH) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sH, 3, "copy_s.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sW) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sW, 2, "copy_s.w ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_sD) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_sD, 1, "copy_s.d ${reg1}, ${reg2}[{imm}]"),
+            "copy_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uB) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uB, 4, "copy_u.b ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uH) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uH, 3, "copy_u.h ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Copy_uW) {
+  DriverStr(RepeatRVIb(&mips64::Mips64Assembler::Copy_uW, 2, "copy_u.w ${reg1}, ${reg2}[{imm}]"),
+            "copy_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertB) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertB, 4, "insert.b ${reg1}[{imm}], ${reg2}"),
+            "insert.b");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertH) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertH, 3, "insert.h ${reg1}[{imm}], ${reg2}"),
+            "insert.h");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertW) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertW, 2, "insert.w ${reg1}[{imm}], ${reg2}"),
+            "insert.w");
+}
+
+TEST_F(AssemblerMIPS64Test, InsertD) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::InsertD, 1, "insert.d ${reg1}[{imm}], ${reg2}"),
+            "insert.d");
+}
+
 TEST_F(AssemblerMIPS64Test, FillB) {
   DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
 }
@@ -3469,6 +3524,26 @@
             "st.d");
 }
 
+TEST_F(AssemblerMIPS64Test, IlvlB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlB, "ilvl.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlH, "ilvl.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlW, "ilvl.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvlD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvlD, "ilvl.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvl.d");
+}
+
 TEST_F(AssemblerMIPS64Test, IlvrB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"),
             "ilvr.b");
@@ -3489,6 +3564,46 @@
             "ilvr.d");
 }
 
+TEST_F(AssemblerMIPS64Test, IlvevB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevB, "ilvev.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevH, "ilvev.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevW, "ilvev.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvevD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvevD, "ilvev.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvev.d");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodB, "ilvod.b ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.b");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodH, "ilvod.h ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.h");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodW, "ilvod.w ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.w");
+}
+
+TEST_F(AssemblerMIPS64Test, IlvodD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::IlvodD, "ilvod.d ${reg1}, ${reg2}, ${reg3}"),
+            "ilvod.d");
+}
+
 TEST_F(AssemblerMIPS64Test, MaddvB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::MaddvB, "maddv.b ${reg1}, ${reg2}, ${reg3}"),
             "maddv.b");
@@ -3509,6 +3624,36 @@
             "maddv.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Hadd_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sH, "hadd_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sW, "hadd_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_sD, "hadd_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uH, "hadd_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uW, "hadd_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Hadd_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Hadd_uD, "hadd_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "hadd_u.d");
+}
+
 TEST_F(AssemblerMIPS64Test, MsubvB) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::MsubvB, "msubv.b ${reg1}, ${reg2}, ${reg3}"),
             "msubv.b");