MIPS: Eliminate hard-coded offsets in branches

The bulk of the change is in the assemblers and their

The main goal is to introduce "bare" branches to labels
(as opposed to the existing bare branches with relative
offsets, whose direct use we want to eliminate).
These branches' delay/forbidden slots are filled
manually and these branches do not promote to long (the
branch target must be within reach of the individual
branch instruction).

The secondary goal is to add more branch tests (mainly
for bare vs non-bare branches and a few extra) and
refactor and reorganize the branch test code a bit.

The third goal is to improve idiom recognition in the
disassembler, including branch idioms and a few others.

Further details:
- introduce bare branches (R2 and R6) to labels, making
  R2 branches available for use on R6
- make use of the above in the code generators
- align beqz/bnez with their GNU assembler encoding to
  simplify and shorten the test code
- update the CFI test because of the above
- add trivial tests for bare and non-bare branches
  (addressing existing debt as well)
- add MIPS32R6 tests for long beqc/beqzc/bc (debt)
- add MIPS64R6 long beqzc test (debt)
- group branch tests together
- group constant/literal/address-loading tests together
- make the disassembler recognize:
  - b/beqz/bnez (beq/bne with $zero reg)
  - nal (bltzal with $zero reg)
  - bal/bgezal (bal = bgezal with $zero reg)
  - move (or with $zero reg)
  - li (ori/addiu with $zero reg)
  - dli (daddiu with $zero reg)
- disassemble 16-bit immediate operands (in andi, ori,
  xori, li, dli) as signed or unsigned as appropriate
- drop unused instructions (bltzl, bltzall, addi) from
  the disassembler as there are no plans to use them

Test: test-art-host-gtest
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-gtest
Test: testrunner.py --target --optimizing
Test: same tests as above on CI20
Test: booted MIPS32R2 in QEMU

Change-Id: I62b74a6c00ce0651528114806ba24a59ba564a73
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index b6eb5c1..2e78af5 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -6573,7 +6573,8 @@
           __ AddUpper(base, obj, offset_high);
-        __ Beqz(T9, (isR6 ? 2 : 4));  // Skip jialc / addiu+jalr+nop.
+        MipsLabel skip_call;
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         if (label_low != nullptr) {
           __ Bind(label_low);
@@ -6588,6 +6589,7 @@
           __ Jalr(T9);
           __ Nop();
+        __ Bind(&skip_call);
         __ SetReorder(reordering);
       } else {
         // Note that we do not actually check the value of `GetIsGcMarking()`
@@ -6724,27 +6726,31 @@
     __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
     Register ref_reg = ref.AsRegister<Register>();
     Register base = short_offset ? obj : TMP;
+    MipsLabel skip_call;
     if (short_offset) {
       if (isR6) {
-        __ Beqzc(T9, 2);  // Skip jialc.
+        __ Beqzc(T9, &skip_call, /* is_bare */ true);
         __ Nop();  // In forbidden slot.
         __ Jialc(T9, thunk_disp);
       } else {
-        __ Beqz(T9, 3);  // Skip jalr+nop.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Addiu(T9, T9, thunk_disp);  // In delay slot.
         __ Jalr(T9);
         __ Nop();  // In delay slot.
+      __ Bind(&skip_call);
     } else {
       if (isR6) {
-        __ Beqz(T9, 2);  // Skip jialc.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Aui(base, obj, offset_high);  // In delay slot.
         __ Jialc(T9, thunk_disp);
+        __ Bind(&skip_call);
       } else {
         __ Lui(base, offset_high);
-        __ Beqz(T9, 2);  // Skip jalr.
+        __ Beqz(T9, &skip_call, /* is_bare */ true);
         __ Addiu(T9, T9, thunk_disp);  // In delay slot.
         __ Jalr(T9);
+        __ Bind(&skip_call);
         __ Addu(base, base, obj);  // In delay slot.
@@ -6826,15 +6832,18 @@
     Register index_reg = index.IsRegisterPair()
         ? index.AsRegisterPairLow<Register>()
         : index.AsRegister<Register>();
+    MipsLabel skip_call;
     if (GetInstructionSetFeatures().IsR6()) {
-      __ Beqz(T9, 2);  // Skip jialc.
+      __ Beqz(T9, &skip_call, /* is_bare */ true);
       __ Lsa(TMP, index_reg, obj, scale_factor);  // In delay slot.
       __ Jialc(T9, thunk_disp);
+      __ Bind(&skip_call);
     } else {
       __ Sll(TMP, index_reg, scale_factor);
-      __ Beqz(T9, 2);  // Skip jalr.
+      __ Beqz(T9, &skip_call, /* is_bare */ true);
       __ Addiu(T9, T9, thunk_disp);  // In delay slot.
       __ Jalr(T9);
+      __ Bind(&skip_call);
       __ Addu(TMP, TMP, obj);  // In delay slot.
     // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))