MIPS64: Improve method invocation.

Improvements include:
- support for all kinds of method loads and static/direct calls
- 32-bit and 64-bit literals for the above and future work
- shorter instruction sequences for recursive static/direct calls
Also:
- include the MIPS64 dinsu instruction (missed earlier) and minor
  clean-up in the disassembler
- properly prefix constant names with 'k' in relative patcher tests

Test: test-art-host-gtest
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: "make -j1 ART_TEST_DEFAULT_COMPILER=false ART_TEST_OPTIMIZING=true
       ART_TEST_INTERPRETER=false ART_TEST_JIT=false
       ART_TEST_PIC_TEST=true test-art-target-run-test64"

Change-Id: I19876fa5316b68531af7dfddfce90d2068433116
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 1a21df9..84280b9 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -35,6 +35,7 @@
   for (auto& exception_block : exception_blocks_) {
     EmitExceptionPoll(&exception_block);
   }
+  EmitLiterals();
   PromoteBranches();
 }
 
@@ -450,6 +451,21 @@
   EmitI(0x27, rs, rt, imm16);
 }
 
+void Mips64Assembler::Lwpc(GpuRegister rs, uint32_t imm19) {
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, (0x01 << 19) | imm19);
+}
+
+void Mips64Assembler::Lwupc(GpuRegister rs, uint32_t imm19) {
+  CHECK(IsUint<19>(imm19)) << imm19;
+  EmitI21(0x3B, rs, (0x02 << 19) | imm19);
+}
+
+void Mips64Assembler::Ldpc(GpuRegister rs, uint32_t imm18) {
+  CHECK(IsUint<18>(imm18)) << imm18;
+  EmitI21(0x3B, rs, (0x06 << 18) | imm18);
+}
+
 void Mips64Assembler::Lui(GpuRegister rt, uint16_t imm16) {
   EmitI(0xf, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -548,6 +564,10 @@
   EmitI26(0x32, imm26);
 }
 
+void Mips64Assembler::Balc(uint32_t imm26) {
+  EmitI26(0x3A, imm26);
+}
+
 void Mips64Assembler::Jic(GpuRegister rt, uint16_t imm16) {
   EmitI(0x36, static_cast<GpuRegister>(0), rt, imm16);
 }
@@ -1064,19 +1084,37 @@
   type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
 }
 
-void Mips64Assembler::Branch::InitializeType(bool is_call) {
+void Mips64Assembler::Branch::InitializeType(Type initial_type) {
   OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
-  if (is_call) {
-    InitShortOrLong(offset_size, kCall, kLongCall);
-  } else if (condition_ == kUncond) {
-    InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
-  } else {
-    if (condition_ == kCondEQZ || condition_ == kCondNEZ) {
-      // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
-      type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
-    } else {
-      InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
-    }
+  switch (initial_type) {
+    case kLabel:
+    case kLiteral:
+    case kLiteralUnsigned:
+    case kLiteralLong:
+      CHECK(!IsResolved());
+      type_ = initial_type;
+      break;
+    case kCall:
+      InitShortOrLong(offset_size, kCall, kLongCall);
+      break;
+    case kCondBranch:
+      switch (condition_) {
+        case kUncond:
+          InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+          break;
+        case kCondEQZ:
+        case kCondNEZ:
+          // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+          type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
+          break;
+        default:
+          InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+          break;
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected branch type " << initial_type;
+      UNREACHABLE();
   }
   old_type_ = type_;
 }
@@ -1109,14 +1147,14 @@
   }
 }
 
-Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target)
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call)
     : old_location_(location),
       location_(location),
       target_(target),
       lhs_reg_(ZERO),
       rhs_reg_(ZERO),
       condition_(kUncond) {
-  InitializeType(false);
+  InitializeType(is_call ? kCall : kCondBranch);
 }
 
 Mips64Assembler::Branch::Branch(uint32_t location,
@@ -1164,19 +1202,18 @@
     // Branch condition is always true, make the branch unconditional.
     condition_ = kUncond;
   }
-  InitializeType(false);
+  InitializeType(kCondBranch);
 }
 
-Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, GpuRegister indirect_reg)
+Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type)
     : old_location_(location),
       location_(location),
-      target_(target),
-      lhs_reg_(indirect_reg),
+      target_(kUnresolved),
+      lhs_reg_(dest_reg),
       rhs_reg_(ZERO),
       condition_(kUncond) {
-  CHECK_NE(indirect_reg, ZERO);
-  CHECK_NE(indirect_reg, AT);
-  InitializeType(true);
+  CHECK_NE(dest_reg, ZERO);
+  InitializeType(label_or_literal_type);
 }
 
 Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
@@ -1278,11 +1315,23 @@
     case kUncondBranch:
     case kCondBranch:
     case kCall:
+    // Near label.
+    case kLabel:
+    // Near literals.
+    case kLiteral:
+    case kLiteralUnsigned:
+    case kLiteralLong:
       return false;
     // Long branches.
     case kLongUncondBranch:
     case kLongCondBranch:
     case kLongCall:
+    // Far label.
+    case kFarLabel:
+    // Far literals.
+    case kFarLiteral:
+    case kFarLiteralUnsigned:
+    case kFarLiteralLong:
       return true;
   }
   UNREACHABLE();
@@ -1351,6 +1400,20 @@
     case kCall:
       type_ = kLongCall;
       break;
+    // Near label.
+    case kLabel:
+      type_ = kFarLabel;
+      break;
+    // Near literals.
+    case kLiteral:
+      type_ = kFarLiteral;
+      break;
+    case kLiteralUnsigned:
+      type_ = kFarLiteralUnsigned;
+      break;
+    case kLiteralLong:
+      type_ = kFarLiteralLong;
+      break;
     default:
       // Note: 'type_' is already long.
       break;
@@ -1397,7 +1460,15 @@
   uint32_t ofs_mask = 0xFFFFFFFF >> (32 - GetOffsetSize());
   // Calculate the byte distance between instructions and also account for
   // different PC-relative origins.
-  uint32_t offset = target_ - GetOffsetLocation() - branch_info_[type_].pc_org * sizeof(uint32_t);
+  uint32_t offset_location = GetOffsetLocation();
+  if (type_ == kLiteralLong) {
+    // Special case for the ldpc instruction, whose address (PC) is rounded down to
+    // a multiple of 8 before adding the offset.
+    // Note, branch promotion has already taken care of aligning `target_` to an
+    // address that's a multiple of 8.
+    offset_location = RoundDown(offset_location, sizeof(uint64_t));
+  }
+  uint32_t offset = target_ - offset_location - branch_info_[type_].pc_org * sizeof(uint32_t);
   // Prepare the offset for encoding into the instruction(s).
   offset = (offset & ofs_mask) >> branch_info_[type_].offset_shift;
   return offset;
@@ -1444,7 +1515,7 @@
   label->BindTo(bound_pc);
 }
 
-uint32_t Mips64Assembler::GetLabelLocation(Mips64Label* label) const {
+uint32_t Mips64Assembler::GetLabelLocation(const Mips64Label* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
   if (label->prev_branch_id_plus_one_) {
@@ -1500,7 +1571,7 @@
 
 void Mips64Assembler::Buncond(Mips64Label* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(buffer_.Size(), target);
+  branches_.emplace_back(buffer_.Size(), target, /* is_call */ false);
   FinalizeLabeledBranch(label);
 }
 
@@ -1517,12 +1588,87 @@
   FinalizeLabeledBranch(label);
 }
 
-void Mips64Assembler::Call(Mips64Label* label, GpuRegister indirect_reg) {
+void Mips64Assembler::Call(Mips64Label* label) {
   uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
-  branches_.emplace_back(buffer_.Size(), target, indirect_reg);
+  branches_.emplace_back(buffer_.Size(), target, /* is_call */ true);
   FinalizeLabeledBranch(label);
 }
 
+void Mips64Assembler::LoadLabelAddress(GpuRegister dest_reg, Mips64Label* label) {
+  // Label address loads are treated as pseudo branches since they require very similar handling.
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(buffer_.Size(), dest_reg, Branch::kLabel);
+  FinalizeLabeledBranch(label);
+}
+
+Literal* Mips64Assembler::NewLiteral(size_t size, const uint8_t* data) {
+  // We don't support byte and half-word literals.
+  if (size == 4u) {
+    literals_.emplace_back(size, data);
+    return &literals_.back();
+  } else {
+    DCHECK_EQ(size, 8u);
+    long_literals_.emplace_back(size, data);
+    return &long_literals_.back();
+  }
+}
+
+void Mips64Assembler::LoadLiteral(GpuRegister dest_reg,
+                                  LoadOperandType load_type,
+                                  Literal* literal) {
+  // Literal loads are treated as pseudo branches since they require very similar handling.
+  Branch::Type literal_type;
+  switch (load_type) {
+    case kLoadWord:
+      DCHECK_EQ(literal->GetSize(), 4u);
+      literal_type = Branch::kLiteral;
+      break;
+    case kLoadUnsignedWord:
+      DCHECK_EQ(literal->GetSize(), 4u);
+      literal_type = Branch::kLiteralUnsigned;
+      break;
+    case kLoadDoubleword:
+      DCHECK_EQ(literal->GetSize(), 8u);
+      literal_type = Branch::kLiteralLong;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected literal load type " << load_type;
+      UNREACHABLE();
+  }
+  Mips64Label* label = literal->GetLabel();
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(buffer_.Size(), dest_reg, literal_type);
+  FinalizeLabeledBranch(label);
+}
+
+void Mips64Assembler::EmitLiterals() {
+  if (!literals_.empty()) {
+    for (Literal& literal : literals_) {
+      Mips64Label* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK_EQ(literal.GetSize(), 4u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+  if (!long_literals_.empty()) {
+    // Reserve 4 bytes for potential alignment. If after the branch promotion the 64-bit
+    // literals don't end up 8-byte-aligned, they will be moved down 4 bytes.
+    Emit(0);  // NOP.
+    for (Literal& literal : long_literals_) {
+      Mips64Label* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK_EQ(literal.GetSize(), 8u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+}
+
 void Mips64Assembler::PromoteBranches() {
   // Promote short branches to long as necessary.
   bool changed;
@@ -1561,6 +1707,35 @@
       end = branch.GetOldLocation();
     }
   }
+
+  // Align 64-bit literals by moving them down by 4 bytes if needed.
+  // This will reduce the PC-relative distance, which should be safe for both near and far literals.
+  if (!long_literals_.empty()) {
+    uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel());
+    size_t lit_size = long_literals_.size() * sizeof(uint64_t);
+    size_t buf_size = buffer_.Size();
+    // 64-bit literals must be at the very end of the buffer.
+    CHECK_EQ(first_literal_location + lit_size, buf_size);
+    if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) {
+      buffer_.Move(first_literal_location - sizeof(uint32_t), first_literal_location, lit_size);
+      // The 4 reserved bytes proved useless, reduce the buffer size.
+      buffer_.Resize(buf_size - sizeof(uint32_t));
+      // Reduce target addresses in literal and address loads by 4 bytes in order for correct
+      // offsets from PC to be generated.
+      for (auto& branch : branches_) {
+        uint32_t target = branch.GetTarget();
+        if (target >= first_literal_location) {
+          branch.Resolve(target - sizeof(uint32_t));
+        }
+      }
+      // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal,
+      // we need to adjust the location of the literal's label as well.
+      for (Literal& literal : long_literals_) {
+        // Bound label's position is negative, hence incrementing it instead of decrementing.
+        literal.GetLabel()->position_ += sizeof(uint32_t);
+      }
+    }
+  }
 }
 
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
@@ -1569,11 +1744,23 @@
   {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kUncondBranch
   {  2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 },  // kCondBranch
                                                         // Exception: kOffset23 for beqzc/bnezc
-  {  2, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kCall
+  {  1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 },  // kCall
+  // Near label.
+  {  1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kLabel
+  // Near literals.
+  {  1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kLiteral
+  {  1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 },  // kLiteralUnsigned
+  {  1, 0, 0, Mips64Assembler::Branch::kOffset21, 3 },  // kLiteralLong
   // Long branches.
   {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongUncondBranch
   {  3, 1, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCondBranch
-  {  3, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCall
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kLongCall
+  // Far label.
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kFarLabel
+  // Far literals.
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kFarLiteral
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kFarLiteralUnsigned
+  {  2, 0, 0, Mips64Assembler::Branch::kOffset32, 0 },  // kFarLiteralLong
 };
 
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
@@ -1597,8 +1784,26 @@
       break;
     case Branch::kCall:
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Balc(offset);
+      break;
+
+    // Near label.
+    case Branch::kLabel:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
       Addiupc(lhs, offset);
-      Jialc(lhs, 0);
+      break;
+    // Near literals.
+    case Branch::kLiteral:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lwpc(lhs, offset);
+      break;
+    case Branch::kLiteralUnsigned:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Lwupc(lhs, offset);
+      break;
+    case Branch::kLiteralLong:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Ldpc(lhs, offset);
       break;
 
     // Long branches.
@@ -1616,11 +1821,37 @@
       Jic(AT, Low16Bits(offset));
       break;
     case Branch::kLongCall:
-      offset += (offset & 0x8000) << 1;  // Account for sign extension in daddiu.
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in jialc.
       CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
-      Auipc(lhs, High16Bits(offset));
-      Daddiu(lhs, lhs, Low16Bits(offset));
-      Jialc(lhs, 0);
+      Auipc(AT, High16Bits(offset));
+      Jialc(AT, Low16Bits(offset));
+      break;
+
+    // Far label.
+    case Branch::kFarLabel:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in addiu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Addiu(lhs, AT, Low16Bits(offset));
+      break;
+    // Far literals.
+    case Branch::kFarLiteral:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lw.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Lw(lhs, AT, Low16Bits(offset));
+      break;
+    case Branch::kFarLiteralUnsigned:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in lwu.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Lwu(lhs, AT, Low16Bits(offset));
+      break;
+    case Branch::kFarLiteralLong:
+      offset += (offset & 0x8000) << 1;  // Account for sign extension in ld.
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      Auipc(AT, High16Bits(offset));
+      Ld(lhs, AT, Low16Bits(offset));
       break;
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
@@ -1631,8 +1862,8 @@
   Buncond(label);
 }
 
-void Mips64Assembler::Jialc(Mips64Label* label, GpuRegister indirect_reg) {
-  Call(label, indirect_reg);
+void Mips64Assembler::Balc(Mips64Label* label) {
+  Call(label);
 }
 
 void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {