MIPS64: Improve non-trivial 64-bit constant loading.

- Add special case where the upper 32 bits of the constant are equal to
  the lower 32 bits of the constant. Use the "dinsu" instruction to
  replicate the bottom 32 bits into the top 32 bits.

- Test output of LoadConst32()/LoadConst64() against various test
  constants.
  o Make the implementation of LoadConst64() into a template.
  o Using the template code for LoadConst64() implement test
    cases which simulate the computation of the value which the
    generated code sequence is actually supposed to load.
  o Add conditionally compiled code to verify that all of the
    various data paths through the LoadConst64() function
    actually get exercised.

Test: Booted MIPS64 in QEMU, and ran assembler tests.

Change-Id: Ie1ec050e65c666168a9ea582bae9fda97e4cf36a
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index a2621cb..1a21df9 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1038,129 +1038,15 @@
 }
 
 void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
-  if (IsUint<16>(value)) {
-    // Use OR with (unsigned) immediate to encode 16b unsigned int.
-    Ori(rd, ZERO, value);
-  } else if (IsInt<16>(value)) {
-    // Use ADD with (signed) immediate to encode 16b signed int.
-    Addiu(rd, ZERO, value);
-  } else {
-    Lui(rd, value >> 16);
-    if (value & 0xFFFF)
-      Ori(rd, rd, value);
-  }
+  TemplateLoadConst32(this, rd, value);
+}
+
+// This function is only used for testing purposes.
+void Mips64Assembler::RecordLoadConst64Path(int value ATTRIBUTE_UNUSED) {
 }
 
 void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) {
-  int bit31 = (value & UINT64_C(0x80000000)) != 0;
-
-  // Loads with 1 instruction.
-  if (IsUint<16>(value)) {
-    Ori(rd, ZERO, value);
-  } else if (IsInt<16>(value)) {
-    Daddiu(rd, ZERO, value);
-  } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
-    Lui(rd, value >> 16);
-  } else if (IsInt<32>(value)) {
-    // Loads with 2 instructions.
-    Lui(rd, value >> 16);
-    Ori(rd, rd, value);
-  } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
-    Ori(rd, ZERO, value);
-    Dahi(rd, value >> 32);
-  } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
-    Ori(rd, ZERO, value);
-    Dati(rd, value >> 48);
-  } else if ((value & 0xFFFF) == 0 &&
-             (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
-    Lui(rd, value >> 16);
-    Dahi(rd, (value >> 32) + bit31);
-  } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
-    Lui(rd, value >> 16);
-    Dati(rd, (value >> 48) + bit31);
-  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
-    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
-    Daddiu(rd, ZERO, -1);
-    if (shift_cnt < 32) {
-      Dsrl(rd, rd, shift_cnt);
-    } else {
-      Dsrl32(rd, rd, shift_cnt & 31);
-    }
-  } else {
-    int shift_cnt = CTZ(value);
-    int64_t tmp = value >> shift_cnt;
-    if (IsUint<16>(tmp)) {
-      Ori(rd, ZERO, tmp);
-      if (shift_cnt < 32) {
-        Dsll(rd, rd, shift_cnt);
-      } else {
-        Dsll32(rd, rd, shift_cnt & 31);
-      }
-    } else if (IsInt<16>(tmp)) {
-      Daddiu(rd, ZERO, tmp);
-      if (shift_cnt < 32) {
-        Dsll(rd, rd, shift_cnt);
-      } else {
-        Dsll32(rd, rd, shift_cnt & 31);
-      }
-    } else if (IsInt<32>(tmp)) {
-      // Loads with 3 instructions.
-      Lui(rd, tmp >> 16);
-      Ori(rd, rd, tmp);
-      if (shift_cnt < 32) {
-        Dsll(rd, rd, shift_cnt);
-      } else {
-        Dsll32(rd, rd, shift_cnt & 31);
-      }
-    } else {
-      shift_cnt = 16 + CTZ(value >> 16);
-      tmp = value >> shift_cnt;
-      if (IsUint<16>(tmp)) {
-        Ori(rd, ZERO, tmp);
-        if (shift_cnt < 32) {
-          Dsll(rd, rd, shift_cnt);
-        } else {
-          Dsll32(rd, rd, shift_cnt & 31);
-        }
-        Ori(rd, rd, value);
-      } else if (IsInt<16>(tmp)) {
-        Daddiu(rd, ZERO, tmp);
-        if (shift_cnt < 32) {
-          Dsll(rd, rd, shift_cnt);
-        } else {
-          Dsll32(rd, rd, shift_cnt & 31);
-        }
-        Ori(rd, rd, value);
-      } else {
-        // Loads with 3-4 instructions.
-        uint64_t tmp2 = value;
-        bool used_lui = false;
-        if (((tmp2 >> 16) & 0xFFFF) != 0 || (tmp2 & 0xFFFFFFFF) == 0) {
-          Lui(rd, tmp2 >> 16);
-          used_lui = true;
-        }
-        if ((tmp2 & 0xFFFF) != 0) {
-          if (used_lui) {
-            Ori(rd, rd, tmp2);
-          } else {
-            Ori(rd, ZERO, tmp2);
-          }
-        }
-        if (bit31) {
-          tmp2 += UINT64_C(0x100000000);
-        }
-        if (((tmp2 >> 32) & 0xFFFF) != 0) {
-          Dahi(rd, tmp2 >> 32);
-        }
-        if (tmp2 & UINT64_C(0x800000000000)) {
-          tmp2 += UINT64_C(0x1000000000000);
-        }
-        if ((tmp2 >> 48) != 0) {
-          Dati(rd, tmp2 >> 48);
-        }
-      }
-    }
-  }
+  TemplateLoadConst64(this, rd, value);
 }
 
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a7d350c..6277b5d 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -33,6 +33,237 @@
 namespace art {
 namespace mips64 {
 
+enum LoadConst64Path {
+  kLoadConst64PathZero           = 0x0,
+  kLoadConst64PathOri            = 0x1,
+  kLoadConst64PathDaddiu         = 0x2,
+  kLoadConst64PathLui            = 0x4,
+  kLoadConst64PathLuiOri         = 0x8,
+  kLoadConst64PathOriDahi        = 0x10,
+  kLoadConst64PathOriDati        = 0x20,
+  kLoadConst64PathLuiDahi        = 0x40,
+  kLoadConst64PathLuiDati        = 0x80,
+  kLoadConst64PathDaddiuDsrlX    = 0x100,
+  kLoadConst64PathOriDsllX       = 0x200,
+  kLoadConst64PathDaddiuDsllX    = 0x400,
+  kLoadConst64PathLuiOriDsllX    = 0x800,
+  kLoadConst64PathOriDsllXOri    = 0x1000,
+  kLoadConst64PathDaddiuDsllXOri = 0x2000,
+  kLoadConst64PathDaddiuDahi     = 0x4000,
+  kLoadConst64PathDaddiuDati     = 0x8000,
+  kLoadConst64PathDinsu1         = 0x10000,
+  kLoadConst64PathDinsu2         = 0x20000,
+  kLoadConst64PathCatchAll       = 0x40000,
+  kLoadConst64PathAllPaths       = 0x7ffff,
+};
+
+template <typename Asm>
+void TemplateLoadConst32(Asm* a, GpuRegister rd, int32_t value) {
+  if (IsUint<16>(value)) {
+    // Use OR with (unsigned) immediate to encode 16b unsigned int.
+    a->Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // Use ADD with (signed) immediate to encode 16b signed int.
+    a->Addiu(rd, ZERO, value);
+  } else {
+    // Set 16 most significant bits of value. The "lui" instruction
+    // also clears the 16 least significant bits to zero.
+    a->Lui(rd, value >> 16);
+    if (value & 0xFFFF) {
+      // If the 16 least significant bits are non-zero, set them
+      // here.
+      a->Ori(rd, rd, value);
+    }
+  }
+}
+
+static inline int InstrCountForLoadReplicatedConst32(int64_t value) {
+  int32_t x = Low32Bits(value);
+  int32_t y = High32Bits(value);
+
+  if (x == y) {
+    return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3;
+  }
+
+  return INT_MAX;
+}
+
+template <typename Asm, typename Rtype, typename Vtype>
+void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) {
+  int bit31 = (value & UINT64_C(0x80000000)) != 0;
+  int rep32_count = InstrCountForLoadReplicatedConst32(value);
+
+  // Loads with 1 instruction.
+  if (IsUint<16>(value)) {
+    // 64-bit value can be loaded as an unsigned 16-bit number.
+    a->RecordLoadConst64Path(kLoadConst64PathOri);
+    a->Ori(rd, ZERO, value);
+  } else if (IsInt<16>(value)) {
+    // 64-bit value can be loaded as an signed 16-bit number.
+    a->RecordLoadConst64Path(kLoadConst64PathDaddiu);
+    a->Daddiu(rd, ZERO, value);
+  } else if ((value & 0xFFFF) == 0 && IsInt<16>(value >> 16)) {
+    // 64-bit value can be loaded as an signed 32-bit number which has all
+    // of its 16 least significant bits set to zero.
+    a->RecordLoadConst64Path(kLoadConst64PathLui);
+    a->Lui(rd, value >> 16);
+  } else if (IsInt<32>(value)) {
+    // Loads with 2 instructions.
+    // 64-bit value can be loaded as an signed 32-bit number which has some
+    // or all of its 16 least significant bits set to one.
+    a->RecordLoadConst64Path(kLoadConst64PathLuiOri);
+    a->Lui(rd, value >> 16);
+    a->Ori(rd, rd, value);
+  } else if ((value & 0xFFFF0000) == 0 && IsInt<16>(value >> 32)) {
+    // 64-bit value which consists of an unsigned 16-bit value in its
+    // least significant 32-bits, and a signed 16-bit value in its
+    // most significant 32-bits.
+    a->RecordLoadConst64Path(kLoadConst64PathOriDahi);
+    a->Ori(rd, ZERO, value);
+    a->Dahi(rd, value >> 32);
+  } else if ((value & UINT64_C(0xFFFFFFFF0000)) == 0) {
+    // 64-bit value which consists of an unsigned 16-bit value in its
+    // least significant 48-bits, and a signed 16-bit value in its
+    // most significant 16-bits.
+    a->RecordLoadConst64Path(kLoadConst64PathOriDati);
+    a->Ori(rd, ZERO, value);
+    a->Dati(rd, value >> 48);
+  } else if ((value & 0xFFFF) == 0 &&
+             (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
+    // 16 LSBs (Least Significant Bits) all set to zero.
+    // 48 MSBs (Most Significant Bits) hold a signed 32-bit value.
+    a->RecordLoadConst64Path(kLoadConst64PathLuiDahi);
+    a->Lui(rd, value >> 16);
+    a->Dahi(rd, (value >> 32) + bit31);
+  } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
+    // 16 LSBs all set to zero.
+    // 48 MSBs hold a signed value which can't be represented by signed
+    // 32-bit number, and the middle 16 bits are all zero, or all one.
+    a->RecordLoadConst64Path(kLoadConst64PathLuiDati);
+    a->Lui(rd, value >> 16);
+    a->Dati(rd, (value >> 48) + bit31);
+  } else if (IsInt<16>(static_cast<int32_t>(value)) &&
+             (-32768 - bit31) <= (value >> 32) && (value >> 32) <= (32767 - bit31)) {
+    // 32 LSBs contain an unsigned 16-bit number.
+    // 32 MSBs contain a signed 16-bit number.
+    a->RecordLoadConst64Path(kLoadConst64PathDaddiuDahi);
+    a->Daddiu(rd, ZERO, value);
+    a->Dahi(rd, (value >> 32) + bit31);
+  } else if (IsInt<16>(static_cast<int32_t>(value)) &&
+             ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) {
+    // 48 LSBs contain an unsigned 16-bit number.
+    // 16 MSBs contain a signed 16-bit number.
+    a->RecordLoadConst64Path(kLoadConst64PathDaddiuDati);
+    a->Daddiu(rd, ZERO, value);
+    a->Dati(rd, (value >> 48) + bit31);
+  } else if (IsPowerOfTwo(value + UINT64_C(1))) {
+    // 64-bit values which have their "n" MSBs set to one, and their
+    // "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64.
+    int shift_cnt = 64 - CTZ(value + UINT64_C(1));
+    a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsrlX);
+    a->Daddiu(rd, ZERO, -1);
+    if (shift_cnt < 32) {
+      a->Dsrl(rd, rd, shift_cnt);
+    } else {
+      a->Dsrl32(rd, rd, shift_cnt & 31);
+    }
+  } else {
+    int shift_cnt = CTZ(value);
+    int64_t tmp = value >> shift_cnt;
+    a->RecordLoadConst64Path(kLoadConst64PathOriDsllX);
+    if (IsUint<16>(tmp)) {
+      // Value can be computed by loading a 16-bit unsigned value, and
+      // then shifting left.
+      a->Ori(rd, ZERO, tmp);
+      if (shift_cnt < 32) {
+        a->Dsll(rd, rd, shift_cnt);
+      } else {
+        a->Dsll32(rd, rd, shift_cnt & 31);
+      }
+    } else if (IsInt<16>(tmp)) {
+      // Value can be computed by loading a 16-bit signed value, and
+      // then shifting left.
+      a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllX);
+      a->Daddiu(rd, ZERO, tmp);
+      if (shift_cnt < 32) {
+        a->Dsll(rd, rd, shift_cnt);
+      } else {
+        a->Dsll32(rd, rd, shift_cnt & 31);
+      }
+    } else if (rep32_count < 3) {
+      // Value being loaded has 32 LSBs equal to the 32 MSBs, and the
+      // value loaded into the 32 LSBs can be loaded with a single
+      // MIPS instruction.
+      a->LoadConst32(rd, value);
+      a->Dinsu(rd, rd, 32, 32);
+      a->RecordLoadConst64Path(kLoadConst64PathDinsu1);
+    } else if (IsInt<32>(tmp)) {
+      // Loads with 3 instructions.
+      // Value can be computed by loading a 32-bit signed value, and
+      // then shifting left.
+      a->RecordLoadConst64Path(kLoadConst64PathLuiOriDsllX);
+      a->Lui(rd, tmp >> 16);
+      a->Ori(rd, rd, tmp);
+      if (shift_cnt < 32) {
+        a->Dsll(rd, rd, shift_cnt);
+      } else {
+        a->Dsll32(rd, rd, shift_cnt & 31);
+      }
+    } else {
+      shift_cnt = 16 + CTZ(value >> 16);
+      tmp = value >> shift_cnt;
+      if (IsUint<16>(tmp)) {
+        // Value can be computed by loading a 16-bit unsigned value,
+        // shifting left, and "or"ing in another 16-bit unsigned value.
+        a->RecordLoadConst64Path(kLoadConst64PathOriDsllXOri);
+        a->Ori(rd, ZERO, tmp);
+        if (shift_cnt < 32) {
+          a->Dsll(rd, rd, shift_cnt);
+        } else {
+          a->Dsll32(rd, rd, shift_cnt & 31);
+        }
+        a->Ori(rd, rd, value);
+      } else if (IsInt<16>(tmp)) {
+        // Value can be computed by loading a 16-bit signed value,
+        // shifting left, and "or"ing in a 16-bit unsigned value.
+        a->RecordLoadConst64Path(kLoadConst64PathDaddiuDsllXOri);
+        a->Daddiu(rd, ZERO, tmp);
+        if (shift_cnt < 32) {
+          a->Dsll(rd, rd, shift_cnt);
+        } else {
+          a->Dsll32(rd, rd, shift_cnt & 31);
+        }
+        a->Ori(rd, rd, value);
+      } else if (rep32_count < 4) {
+        // Value being loaded has 32 LSBs equal to the 32 MSBs, and the
+        // value in the 32 LSBs requires 2 MIPS instructions to load.
+        a->LoadConst32(rd, value);
+        a->Dinsu(rd, rd, 32, 32);
+        a->RecordLoadConst64Path(kLoadConst64PathDinsu2);
+      } else {
+        // Loads with 3-4 instructions.
+        // Catch-all case to get any other 64-bit values which aren't
+        // handled by special cases above.
+        uint64_t tmp2 = value;
+        a->RecordLoadConst64Path(kLoadConst64PathCatchAll);
+        a->LoadConst32(rd, value);
+        if (bit31) {
+          tmp2 += UINT64_C(0x100000000);
+        }
+        if (((tmp2 >> 32) & 0xFFFF) != 0) {
+          a->Dahi(rd, tmp2 >> 32);
+        }
+        if (tmp2 & UINT64_C(0x800000000000)) {
+          tmp2 += UINT64_C(0x1000000000000);
+        }
+        if ((tmp2 >> 48) != 0) {
+          a->Dati(rd, tmp2 >> 48);
+        }
+      }
+    }
+  }
+}
+
 static constexpr size_t kMips64WordSize = 4;
 static constexpr size_t kMips64DoublewordSize = 8;
 
@@ -326,9 +557,13 @@
   void Not(GpuRegister rd, GpuRegister rs);
 
   // Higher level composite instructions.
+  int InstrCountForLoadReplicatedConst32(int64_t);
   void LoadConst32(GpuRegister rd, int32_t value);
   void LoadConst64(GpuRegister rd, int64_t value);  // MIPS64
 
+  // This function is only used for testing purposes.
+  void RecordLoadConst64Path(int value);
+
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
   void Bind(Label* label) OVERRIDE {
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index b758d64..1fdef96 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1636,6 +1636,177 @@
   DriverStr(expected, "StoreFpuToOffset");
 }
 
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPS64Test, LoadConst32) {
+  // IsUint<16>(value)
+  __ LoadConst32(mips64::V0, 0);
+  __ LoadConst32(mips64::V0, 65535);
+  // IsInt<16>(value)
+  __ LoadConst32(mips64::V0, -1);
+  __ LoadConst32(mips64::V0, -32768);
+  // Everything else
+  __ LoadConst32(mips64::V0, 65536);
+  __ LoadConst32(mips64::V0, 65537);
+  __ LoadConst32(mips64::V0, 2147483647);
+  __ LoadConst32(mips64::V0, -32769);
+  __ LoadConst32(mips64::V0, -65536);
+  __ LoadConst32(mips64::V0, -65537);
+  __ LoadConst32(mips64::V0, -2147483647);
+  __ LoadConst32(mips64::V0, -2147483648);
+
+  const char* expected =
+      // IsUint<16>(value)
+      "ori $v0, $zero, 0\n"         // __ LoadConst32(mips64::V0, 0);
+      "ori $v0, $zero, 65535\n"     // __ LoadConst32(mips64::V0, 65535);
+      // IsInt<16>(value)
+      "addiu $v0, $zero, -1\n"      // __ LoadConst32(mips64::V0, -1);
+      "addiu $v0, $zero, -32768\n"  // __ LoadConst32(mips64::V0, -32768);
+      // Everything else
+      "lui $v0, 1\n"                // __ LoadConst32(mips64::V0, 65536);
+      "lui $v0, 1\n"                // __ LoadConst32(mips64::V0, 65537);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32767\n"            // __ LoadConst32(mips64::V0, 2147483647);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips64::V0, -32769);
+      "ori $v0, 32767\n"            //                 "
+      "lui $v0, 65535\n"            // __ LoadConst32(mips64::V0, -65536);
+      "lui $v0, 65534\n"            // __ LoadConst32(mips64::V0, -65537);
+      "ori $v0, 65535\n"            //                 "
+      "lui $v0, 32768\n"            // __ LoadConst32(mips64::V0, -2147483647);
+      "ori $v0, 1\n"                //                 "
+      "lui $v0, 32768\n";           // __ LoadConst32(mips64::V0, -2147483648);
+  DriverStr(expected, "LoadConst32");
+}
+
+static uint64_t SignExtend16To64(uint16_t n) {
+  return static_cast<int16_t>(n);
+}
+
+// The art::mips64::Mips64Assembler::LoadConst64() method uses a template
+// to minimize the number of instructions needed to load a 64-bit constant
+// value into a register. The template calls various methods which emit
+// MIPS machine instructions. This struct (class) uses the same template
+// but overrides the definitions of the methods which emit MIPS instructions
+// to use methods which simulate the operation of the corresponding MIPS
+// instructions. After invoking LoadConst64() the target register should
+// contain the same 64-bit value as was input to LoadConst64(). If the
+// simulated register doesn't contain the correct value then there is probably
+// an error in the template function.
+struct LoadConst64Tester {
+  LoadConst64Tester() {
+    // Initialize all of the registers for simulation to zero.
+    for (int r = 0; r < 32; r++) {
+      regs_[r] = 0;
+    }
+    // Clear all of the path flags.
+    loadconst64_paths_ = art::mips64::kLoadConst64PathZero;
+  }
+  void Addiu(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) {
+    regs_[rd] = static_cast<int32_t>(regs_[rs] + SignExtend16To64(c));
+  }
+  void Daddiu(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) {
+    regs_[rd] = regs_[rs] + SignExtend16To64(c);
+  }
+  void Dahi(mips64::GpuRegister rd, uint16_t c) {
+    regs_[rd] += SignExtend16To64(c) << 32;
+  }
+  void Dati(mips64::GpuRegister rd, uint16_t c) {
+    regs_[rd] += SignExtend16To64(c) << 48;
+  }
+  void Dinsu(mips64::GpuRegister rt, mips64::GpuRegister rs, int pos, int size) {
+    CHECK(IsUint<5>(pos - 32)) << pos;
+    CHECK(IsUint<5>(size - 1)) << size;
+    CHECK(IsUint<5>(pos + size - 33)) << pos << " + " << size;
+    uint64_t src_mask = (UINT64_C(1) << size) - 1;
+    uint64_t dsk_mask = ~(src_mask << pos);
+
+    regs_[rt] = (regs_[rt] & dsk_mask) | ((regs_[rs] & src_mask) << pos);
+  }
+  void Dsll(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) {
+    regs_[rd] = regs_[rt] << (shamt & 0x1f);
+  }
+  void Dsll32(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) {
+    regs_[rd] = regs_[rt] << (32 + (shamt & 0x1f));
+  }
+  void Dsrl(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) {
+    regs_[rd] = regs_[rt] >> (shamt & 0x1f);
+  }
+  void Dsrl32(mips64::GpuRegister rd, mips64::GpuRegister rt, int shamt) {
+    regs_[rd] = regs_[rt] >> (32 + (shamt & 0x1f));
+  }
+  void Lui(mips64::GpuRegister rd, uint16_t c) {
+    regs_[rd] = SignExtend16To64(c) << 16;
+  }
+  void Ori(mips64::GpuRegister rd, mips64::GpuRegister rs, uint16_t c) {
+    regs_[rd] = regs_[rs] | c;
+  }
+  void LoadConst32(mips64::GpuRegister rd, int32_t c) {
+    CHECK_NE(rd, 0);
+    mips64::TemplateLoadConst32<LoadConst64Tester>(this, rd, c);
+    CHECK_EQ(regs_[rd], static_cast<uint64_t>(c));
+  }
+  void LoadConst64(mips64::GpuRegister rd, int64_t c) {
+    CHECK_NE(rd, 0);
+    mips64::TemplateLoadConst64<LoadConst64Tester>(this, rd, c);
+    CHECK_EQ(regs_[rd], static_cast<uint64_t>(c));
+  }
+  uint64_t regs_[32];
+
+  // Getter function for loadconst64_paths_.
+  int GetPathsCovered() {
+    return loadconst64_paths_;
+  }
+
+  void RecordLoadConst64Path(int value) {
+    loadconst64_paths_ |= value;
+  }
+
+ private:
+  // This variable holds a bitmask to tell us which paths were taken
+  // through the template function which loads 64-bit values.
+  int loadconst64_paths_;
+};
+
+TEST_F(AssemblerMIPS64Test, LoadConst64) {
+  const uint16_t imms[] = {
+      0, 1, 2, 3, 4, 0x33, 0x66, 0x55, 0x99, 0xaa, 0xcc, 0xff, 0x5500, 0x5555,
+      0x7ffc, 0x7ffd, 0x7ffe, 0x7fff, 0x8000, 0x8001, 0x8002, 0x8003, 0x8004,
+      0xaaaa, 0xfffc, 0xfffd, 0xfffe, 0xffff
+  };
+  unsigned d0, d1, d2, d3;
+  LoadConst64Tester tester;
+
+  union {
+    int64_t v64;
+    uint16_t v16[4];
+  } u;
+
+  for (d3 = 0; d3 < sizeof imms / sizeof imms[0]; d3++) {
+    u.v16[3] = imms[d3];
+
+    for (d2 = 0; d2 < sizeof imms / sizeof imms[0]; d2++) {
+      u.v16[2] = imms[d2];
+
+      for (d1 = 0; d1 < sizeof imms / sizeof imms[0]; d1++) {
+        u.v16[1] = imms[d1];
+
+        for (d0 = 0; d0 < sizeof imms / sizeof imms[0]; d0++) {
+          u.v16[0] = imms[d0];
+
+          tester.LoadConst64(mips64::V0, u.v64);
+        }
+      }
+    }
+  }
+
+  // Verify that we tested all paths through the "load 64-bit value"
+  // function template.
+  EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths);
+}
+
 #undef __
 
 }  // namespace art