MIPS32: Allow some patched instructions in delay slots

Test: test-art-host-gtest
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-gtest32
Test: testrunner.py --target --optimizing --32
Test: same tests as above on CI20
Test: booted MIPS32R2 in QEMU

Change-Id: I7e1ba59993008014d0115ae20c56e0a71fef0fb0
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 3bec30f..6c974c3 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -61,10 +61,6 @@
       // lui reg, offset_high
       DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
       DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
-      // addu reg, reg, reg2
-      DCHECK_EQ((*code)[literal_offset + 4], 0x21);
-      DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00);
-      DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00);
     }
   } else {
     // instr reg(s), offset_low
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 2e78af5..51f5b96 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -267,13 +267,10 @@
       DCHECK(bss_info_high_);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_);
-      bool reordering = __ SetReorder(false);
-      __ Bind(&info_low->label);
-      __ StoreToOffset(kStoreWord,
-                       calling_convention.GetRegisterAt(0),
-                       entry_address,
-                       /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      __ Sw(calling_convention.GetRegisterAt(0),
+            entry_address,
+            /* placeholder */ 0x5678,
+            &info_low->label);
     }
 
     // Move the class to the desired location.
@@ -296,10 +293,8 @@
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high);
-      bool reordering = __ SetReorder(false);
-      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
-      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+      __ Sw(out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678, &info_low->label);
     }
     __ B(GetExitLabel());
   }
@@ -366,13 +361,10 @@
       DCHECK(bss_info_high_);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_);
-      bool reordering = __ SetReorder(false);
-      __ Bind(&info_low->label);
-      __ StoreToOffset(kStoreWord,
-                       calling_convention.GetRegisterAt(0),
-                       entry_address,
-                       /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      __ Sw(calling_convention.GetRegisterAt(0),
+            entry_address,
+            /* placeholder */ 0x5678,
+            &info_low->label);
     }
 
     Primitive::Type type = instruction_->GetType();
@@ -391,10 +383,8 @@
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high);
-      bool reordering = __ SetReorder(false);
-      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
-      __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+      __ Sw(out, TMP, /* placeholder */ 0x5678, &info_low->label);
     }
     __ B(GetExitLabel());
   }
@@ -1743,16 +1733,17 @@
 
 void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
                                                              Register out,
-                                                             Register base,
-                                                             PcRelativePatchInfo* info_low) {
+                                                             Register base) {
   DCHECK(!info_high->patch_info_high);
   DCHECK_NE(out, base);
+  bool reordering = __ SetReorder(false);
   if (GetInstructionSetFeatures().IsR6()) {
     DCHECK_EQ(base, ZERO);
     __ Bind(&info_high->label);
     __ Bind(&info_high->pc_rel_label);
     // Add the high half of a 32-bit offset to PC.
     __ Auipc(out, /* placeholder */ 0x1234);
+    __ SetReorder(reordering);
   } else {
     // If base is ZERO, emit NAL to obtain the actual base.
     if (base == ZERO) {
@@ -1766,15 +1757,12 @@
     if (base == ZERO) {
       __ Bind(&info_high->pc_rel_label);
     }
+    __ SetReorder(reordering);
     // Add the high half of a 32-bit offset to PC.
     __ Addu(out, out, (base == ZERO) ? RA : base);
   }
   // A following instruction will add the sign-extended low half of the 32-bit
   // offset to `out` (e.g. lw, jialc, addiu).
-  if (info_low != nullptr) {
-    DCHECK_EQ(info_low->patch_info_high, info_high);
-    __ Bind(&info_low->label);
-  }
 }
 
 CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
@@ -7515,11 +7503,9 @@
       PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
       PcRelativePatchInfo* info_low =
           NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high);
-      bool reordering = __ SetReorder(false);
       Register temp_reg = temp.AsRegister<Register>();
-      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
-      __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+      __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
@@ -7531,10 +7517,8 @@
       PcRelativePatchInfo* info_low = NewMethodBssEntryPatch(
           MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high);
       Register temp_reg = temp.AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
-      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
-      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+      EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
@@ -7729,13 +7713,10 @@
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
-                                                     base_or_current_method_reg,
-                                                     info_low);
-      __ Addiu(out, out, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+                                                     base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -7754,11 +7735,9 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high);
       constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
       Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high,
                                                      temp,
                                                      base_or_current_method_reg);
-      __ SetReorder(reordering);
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               temp,
@@ -7899,13 +7878,10 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      out,
-                                                     base_or_current_method_reg,
-                                                     info_low);
-      __ Addiu(out, out, /* placeholder */ 0x5678);
-      __ SetReorder(reordering);
+                                                     base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
@@ -7925,11 +7901,9 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
       constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
       Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
-      bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
                                                      temp,
                                                      base_or_current_method_reg);
-      __ SetReorder(reordering);
       GenerateGcRootFieldLoad(load,
                               out_loc,
                               temp,
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7195b9d..c0e1ec0 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -637,8 +637,7 @@
 
   void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
                                             Register out,
-                                            Register base,
-                                            PcRelativePatchInfo* info_low = nullptr);
+                                            Register base);
 
   // The JitPatchInfo is used for JIT string and class loads.
   struct JitPatchInfo {
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 85710d0..d835c63 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -31,9 +31,11 @@
 }  // namespace arm64
 namespace mips {
   class MipsAssembler;
+  class MipsLabel;
 }  // namespace mips
 namespace mips64 {
   class Mips64Assembler;
+  class Mips64Label;
 }  // namespace mips64
 namespace x86 {
   class X86Assembler;
@@ -114,7 +116,9 @@
 
   friend class arm64::Arm64Assembler;
   friend class mips::MipsAssembler;
+  friend class mips::MipsLabel;
   friend class mips64::Mips64Assembler;
+  friend class mips64::Mips64Label;
   friend class x86::X86Assembler;
   friend class x86::NearLabel;
   friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 18099d8..b300cc5 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -47,7 +47,8 @@
       fpr_outs_mask_(0),
       fpr_ins_mask_(0),
       cc_outs_mask_(0),
-      cc_ins_mask_(0) {}
+      cc_ins_mask_(0),
+      patcher_label_(nullptr) {}
 
 void MipsAssembler::DsFsmInstr(uint32_t instruction,
                                uint32_t gpr_outs_mask,
@@ -55,7 +56,8 @@
                                uint32_t fpr_outs_mask,
                                uint32_t fpr_ins_mask,
                                uint32_t cc_outs_mask,
-                               uint32_t cc_ins_mask) {
+                               uint32_t cc_ins_mask,
+                               MipsLabel* patcher_label) {
   if (!reordering_) {
     CHECK_EQ(ds_fsm_state_, kExpectingLabel);
     CHECK_EQ(delay_slot_.instruction_, 0u);
@@ -96,6 +98,7 @@
   delay_slot_.fpr_ins_mask_ = fpr_ins_mask;
   delay_slot_.cc_outs_mask_ = cc_outs_mask;
   delay_slot_.cc_ins_mask_ = cc_ins_mask;
+  delay_slot_.patcher_label_ = patcher_label;
 }
 
 void MipsAssembler::DsFsmLabel() {
@@ -167,8 +170,12 @@
   DsFsmInstr(0, 0, 0, 0, 0, 0, 0);
 }
 
-void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) {
-  DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0);
+void MipsAssembler::DsFsmInstrRrr(uint32_t instruction,
+                                  Register out,
+                                  Register in1,
+                                  Register in2,
+                                  MipsLabel* patcher_label) {
+  DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0, patcher_label);
 }
 
 void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction,
@@ -310,8 +317,8 @@
   // Switch from appending instructions at the end of the buffer to overwriting
   // existing instructions (branch placeholders) in the buffer.
   overwriting_ = true;
-  for (auto& branch : branches_) {
-    EmitBranch(&branch);
+  for (size_t id = 0; id < branches_.size(); id++) {
+    EmitBranch(id);
   }
   overwriting_ = false;
 }
@@ -531,8 +538,15 @@
   DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
 }
 
+void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
 void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs);
+  Addiu(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
@@ -791,8 +805,15 @@
   DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs);
 }
 
+void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
 void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs);
+  Lw(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
@@ -866,8 +887,15 @@
   DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs);
 }
 
+void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+  if (patcher_label != nullptr) {
+    Bind(patcher_label);
+  }
+  DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs, patcher_label);
+}
+
 void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) {
-  DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs);
+  Sw(rt, rs, imm16, /* patcher_label */ nullptr);
 }
 
 void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
@@ -991,6 +1019,7 @@
 
 void MipsAssembler::Jalr(Register rd, Register rs) {
   uint32_t last_instruction = delay_slot_.instruction_;
+  MipsLabel* patcher_label = delay_slot_.patcher_label_;
   bool exchange = (last_instruction != 0 &&
       (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 &&
       ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0);
@@ -1011,6 +1040,10 @@
     CHECK_EQ(instr1, last_instruction);
     buffer_.Store<uint32_t>(pos1, instr2);
     buffer_.Store<uint32_t>(pos2, instr1);
+    // Move the patcher label along with the patched instruction.
+    if (patcher_label != nullptr) {
+      patcher_label->AdjustBoundPosition(sizeof(uint32_t));
+    }
   } else if (reordering_) {
     Nop();
   }
@@ -3237,7 +3270,8 @@
       lhs_reg_(0),
       rhs_reg_(0),
       condition_(kUncond),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   InitializeType(
       (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
       is_r6);
@@ -3256,7 +3290,8 @@
       lhs_reg_(lhs_reg),
       rhs_reg_(rhs_reg),
       condition_(condition),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   CHECK_NE(condition, kUncond);
   switch (condition) {
     case kCondLT:
@@ -3313,7 +3348,8 @@
       lhs_reg_(dest_reg),
       rhs_reg_(base_reg),
       condition_(kUncond),
-      delayed_instruction_(kUnfilledDelaySlot) {
+      delayed_instruction_(kUnfilledDelaySlot),
+      patcher_label_(nullptr) {
   CHECK_NE(dest_reg, ZERO);
   if (is_r6) {
     CHECK_EQ(base_reg, ZERO);
@@ -3690,6 +3726,17 @@
   return &branches_[branch_id];
 }
 
+void MipsAssembler::BindRelativeToPrecedingBranch(MipsLabel* label,
+                                                  uint32_t prev_branch_id_plus_one,
+                                                  uint32_t position) {
+  if (prev_branch_id_plus_one != 0) {
+    const Branch* branch = GetBranch(prev_branch_id_plus_one - 1);
+    position -= branch->GetEndLocation();
+  }
+  label->prev_branch_id_plus_one_ = prev_branch_id_plus_one;
+  label->BindTo(position);
+}
+
 void MipsAssembler::Bind(MipsLabel* label) {
   CHECK(!label->IsBound());
   uint32_t bound_pc = buffer_.Size();
@@ -3715,22 +3762,15 @@
 
   // Now make the label object contain its own location (relative to the end of the preceding
   // branch, if any; it will be used by the branches referring to and following this label).
-  label->prev_branch_id_plus_one_ = branches_.size();
-  if (label->prev_branch_id_plus_one_) {
-    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
-    const Branch* branch = GetBranch(branch_id);
-    bound_pc -= branch->GetEndLocation();
-  }
-  label->BindTo(bound_pc);
+  BindRelativeToPrecedingBranch(label, branches_.size(), bound_pc);
 }
 
 uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
   CHECK(label->IsBound());
   uint32_t target = label->Position();
-  if (label->prev_branch_id_plus_one_) {
+  if (label->prev_branch_id_plus_one_ != 0) {
     // Get label location based on the branch preceding it.
-    uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
-    const Branch* branch = GetBranch(branch_id);
+    const Branch* branch = GetBranch(label->prev_branch_id_plus_one_ - 1);
     target += branch->GetEndLocation();
   }
   return target;
@@ -3872,10 +3912,15 @@
   return delayed_instruction_;
 }
 
-void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) {
+MipsLabel* MipsAssembler::Branch::GetPatcherLabel() const {
+  return patcher_label_;
+}
+
+void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label) {
   CHECK_NE(instruction, kUnfilledDelaySlot);
   CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot);
   delayed_instruction_ = instruction;
+  patcher_label_ = patcher_label;
 }
 
 void MipsAssembler::Branch::DecrementLocations() {
@@ -3916,7 +3961,7 @@
     buffer_.Resize(size);
     // Attach it to the branch and adjust the branch locations.
     branch.DecrementLocations();
-    branch.SetDelayedInstruction(delay_slot_.instruction_);
+    branch.SetDelayedInstruction(delay_slot_.instruction_, delay_slot_.patcher_label_);
   } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) {
     // If reordefing is disabled, prevent absorption of the target instruction.
     branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot);
@@ -4140,15 +4185,49 @@
   {  2, 0, 0, MipsAssembler::Branch::kOffset32, 0 },  // kR6FarLiteral
 };
 
+static inline bool IsAbsorbableInstruction(uint32_t instruction) {
+  // The relative patcher patches addiu, lw and sw with an immediate operand of 0x5678.
+  // We want to make sure that these instructions do not get absorbed into delay slots
+  // of unconditional branches on R2. Absorption would otherwise make copies of
+  // unpatched instructions.
+  if ((instruction & 0xFFFF) != 0x5678) {
+    return true;
+  }
+  switch (instruction >> kOpcodeShift) {
+    case 0x09:  // Addiu.
+    case 0x23:  // Lw.
+    case 0x2B:  // Sw.
+      return false;
+    default:
+      return true;
+  }
+}
+
 // Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
-void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
+void MipsAssembler::EmitBranch(uint32_t branch_id) {
   CHECK_EQ(overwriting_, true);
+  Branch* branch = GetBranch(branch_id);
   overwrite_location_ = branch->GetLocation();
   uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
   BranchCondition condition = branch->GetCondition();
   Register lhs = branch->GetLeftRegister();
   Register rhs = branch->GetRightRegister();
   uint32_t delayed_instruction = branch->GetDelayedInstruction();
+  MipsLabel* patcher_label = branch->GetPatcherLabel();
+  if (patcher_label != nullptr) {
+    // Update the patcher label location to account for branch promotion and
+    // delay slot filling.
+    CHECK(patcher_label->IsBound());
+    uint32_t bound_pc = branch->GetLocation();
+    if (!branch->IsLong()) {
+      // Short branches precede delay slots.
+      // Long branches follow "delay slots".
+      bound_pc += sizeof(uint32_t);
+    }
+    // Rebind the label.
+    patcher_label->Reinitialize();
+    BindRelativeToPrecedingBranch(patcher_label, branch_id, bound_pc);
+  }
   switch (branch->GetType()) {
     // R2 short branches.
     case Branch::kUncondBranch:
@@ -4164,8 +4243,11 @@
         if (offset != 0x7FFF) {
           uint32_t target = branch->GetTarget();
           if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) {
-            delayed_instruction = buffer_.Load<uint32_t>(target);
-            offset++;
+            uint32_t target_instruction = buffer_.Load<uint32_t>(target);
+            if (IsAbsorbableInstruction(target_instruction)) {
+              delayed_instruction = target_instruction;
+              offset++;
+            }
           }
         }
       }
@@ -4406,6 +4488,11 @@
   }
   CHECK_EQ(overwrite_location_, branch->GetEndLocation());
   CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
+  if (patcher_label != nullptr) {
+    // The patched instruction should look like one.
+    uint32_t patched_instruction = buffer_.Load<uint32_t>(GetLabelLocation(patcher_label));
+    CHECK(!IsAbsorbableInstruction(patched_instruction));
+  }
 }
 
 void MipsAssembler::B(MipsLabel* label, bool is_bare) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 7f9d576..0f163ac 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -80,6 +80,12 @@
   MipsLabel(MipsLabel&& src)
       : Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
 
+  void AdjustBoundPosition(int delta) {
+    CHECK(IsBound());
+    // Bound label's position is negative, hence decrementing it.
+    position_ -= delta;
+  }
+
  private:
   uint32_t prev_branch_id_plus_one_;  // To get distance from preceding branch, if any.
 
@@ -215,6 +221,7 @@
 
   // Emit Machine Instructions.
   void Addu(Register rd, Register rs, Register rt);
+  void Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Addiu(Register rt, Register rs, uint16_t imm16);
   void Subu(Register rd, Register rs, Register rt);
 
@@ -272,6 +279,7 @@
 
   void Lb(Register rt, Register rs, uint16_t imm16);
   void Lh(Register rt, Register rs, uint16_t imm16);
+  void Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Lw(Register rt, Register rs, uint16_t imm16);
   void Lwl(Register rt, Register rs, uint16_t imm16);
   void Lwr(Register rt, Register rs, uint16_t imm16);
@@ -287,6 +295,7 @@
 
   void Sb(Register rt, Register rs, uint16_t imm16);
   void Sh(Register rt, Register rs, uint16_t imm16);
+  void Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
   void Sw(Register rt, Register rs, uint16_t imm16);
   void Swl(Register rt, Register rs, uint16_t imm16);
   void Swr(Register rt, Register rs, uint16_t imm16);
@@ -1288,6 +1297,9 @@
     uint32_t cc_ins_mask_;
     // Branches never operate on the LO and HI registers, hence there's
     // no mask for LO and HI.
+
+    // Label for patchable instructions to allow moving them into delay slots.
+    MipsLabel* patcher_label_;
   };
 
   // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated
@@ -1440,8 +1452,9 @@
 
     // Various helpers for branch delay slot management.
     bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const;
-    void SetDelayedInstruction(uint32_t instruction);
+    void SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label = nullptr);
     uint32_t GetDelayedInstruction() const;
+    MipsLabel* GetPatcherLabel() const;
     void DecrementLocations();
 
     // Returns the bit size of the signed offset that the branch instruction can handle.
@@ -1526,6 +1539,8 @@
                                     // kUnfillableDelaySlot if none and unfillable
                                     // (the latter is only used for unconditional R2
                                     // branches).
+
+    MipsLabel* patcher_label_;      // Patcher label for the instruction in the delay slot.
   };
   friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
   friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -1580,9 +1595,14 @@
                   uint32_t fpr_outs_mask,
                   uint32_t fpr_ins_mask,
                   uint32_t cc_outs_mask,
-                  uint32_t cc_ins_mask);
+                  uint32_t cc_ins_mask,
+                  MipsLabel* patcher_label = nullptr);
   void DsFsmInstrNop(uint32_t instruction);
-  void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2);
+  void DsFsmInstrRrr(uint32_t instruction,
+                     Register out,
+                     Register in1,
+                     Register in2,
+                     MipsLabel* patcher_label = nullptr);
   void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
   void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
   void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
@@ -1605,12 +1625,15 @@
   const Branch* GetBranch(uint32_t branch_id) const;
   uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
   uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
+  void BindRelativeToPrecedingBranch(MipsLabel* label,
+                                     uint32_t prev_branch_id_plus_one,
+                                     uint32_t position);
 
   void EmitLiterals();
   void ReserveJumpTableSpace();
   void EmitJumpTables();
   void PromoteBranches();
-  void EmitBranch(Branch* branch);
+  void EmitBranch(uint32_t branch_id);
   void EmitBranches();
   void PatchCFI(size_t number_of_delayed_adjust_pcs);
 
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 6e52b17..a5cd5a7 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -1529,10 +1529,62 @@
   DriverStr(expected, "SetReorder");
 }
 
-TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
-  mips::MipsLabel label;
+TEST_F(AssemblerMIPS32r6Test, ReorderPatchedInstruction) {
   __ SetReorder(true);
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  mips::MipsLabel label1, label2;
+  mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+  __ Bc1eqz(mips::F0, &label1);
+  constexpr uint32_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+  __ Bc1nez(mips::F2, &label2);
+  constexpr uint32_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+  __ Bc1eqz(mips::F4, &label1);
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+  __ Jalr(mips::T9);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+  __ Bltc(mips::V0, mips::V1, &label2);
+  __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+  std::string expected =
+      ".set noreorder\n"
+      "bc1eqz $f0, 1f\n"
+      "lw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bc1nez $f2, 2f\n"
+      "sw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "bc1eqz $f4, 1b\n"
+      "addiu $v0, $a0, 0x5678\n"
+      "jalr $t9\n"
+      "lw $v0, 0x5678($a0)\n"
+      "sw $v0, 0x5678($a0)\n"
+      "bltc $v0, $v1, 2b\n"
+      "nop\n"
+      "addu $zero, $zero, $zero\n";
+  DriverStr(expected, "ReorderPatchedInstruction");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 8) * 4u);
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
+  mips::MipsLabel label, patcher_label1, patcher_label2;
+  __ SetReorder(true);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
   __ Bc1nez(mips::F0, &label);
   constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
   for (uint32_t i = 0; i != kAdduCount1; ++i) {
@@ -1543,7 +1595,7 @@
   for (uint32_t i = 0; i != kAdduCount2; ++i) {
     __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
   }
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
   __ Bc1eqz(mips::F0, &label);
 
   uint32_t offset_forward = 2 + kAdduCount1;  // 2: account for auipc and jic.
@@ -1557,7 +1609,7 @@
   std::ostringstream oss;
   oss <<
       ".set noreorder\n"
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "bc1eqz $f0, 1f\n"
       "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
       "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
@@ -1565,13 +1617,15 @@
       RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
       "2:\n" <<
       RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "bc1nez $f0, 3f\n"
       "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
       "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
       "3:\n";
   std::string expected = oss.str();
   DriverStr(expected, "LongBranchReorder");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 4) * 4u);
 }
 
 ///////////////////////
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index d9bf0b8..680c347 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -2506,6 +2506,7 @@
 
 TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
   mips::MipsLabel label1, label2, label3, label4, label5, label6;
+  mips::MipsLabel label7, label8, label9, label10, label11, label12, label13;
   __ SetReorder(true);
 
   __ B(&label1);
@@ -2529,6 +2530,41 @@
   __ Bind(&label6);
   __ CodePosition();  // Even across Bind(), CodePosition() prevents absorbing the ADDU above.
 
+  __ Nop();
+  __ B(&label7);
+  __ Bind(&label7);
+  __ Lw(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label8);
+  __ Bind(&label8);
+  __ Sw(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label9);
+  __ Bind(&label9);
+  __ Addiu(mips::V0, mips::A0, 0x5678);  // Possibly patchable instruction, not absorbed.
+
+  __ Nop();
+  __ B(&label10);
+  __ Bind(&label10);
+  __ Lw(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label11);
+  __ Bind(&label11);
+  __ Sw(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label12);
+  __ Bind(&label12);
+  __ Addiu(mips::V0, mips::A0, 0x5680);  // Immediate isn't 0x5678, absorbed.
+
+  __ Nop();
+  __ B(&label13);
+  __ Bind(&label13);
+  __ Andi(mips::V0, mips::A0, 0x5678);  // Not one of patchable instructions, absorbed.
+
   std::string expected =
       ".set noreorder\n"
       "b 1f\n"
@@ -2550,7 +2586,49 @@
       "b 5f\n"
       "nop\n"
       "5:\n"
-      "addu $t0, $t1, $t2\n";
+      "addu $t0, $t1, $t2\n"
+
+      "nop\n"
+      "b 7f\n"
+      "nop\n"
+      "7:\n"
+      "lw $v0, 0x5678($a0)\n"
+
+      "nop\n"
+      "b 8f\n"
+      "nop\n"
+      "8:\n"
+      "sw $v0, 0x5678($a0)\n"
+
+      "nop\n"
+      "b 9f\n"
+      "nop\n"
+      "9:\n"
+      "addiu $v0, $a0, 0x5678\n"
+
+      "nop\n"
+      "b 10f\n"
+      "lw $v0, 0x5680($a0)\n"
+      "lw $v0, 0x5680($a0)\n"
+      "10:\n"
+
+      "nop\n"
+      "b 11f\n"
+      "sw $v0, 0x5680($a0)\n"
+      "sw $v0, 0x5680($a0)\n"
+      "11:\n"
+
+      "nop\n"
+      "b 12f\n"
+      "addiu $v0, $a0, 0x5680\n"
+      "addiu $v0, $a0, 0x5680\n"
+      "12:\n"
+
+      "nop\n"
+      "b 13f\n"
+      "andi $v0, $a0, 0x5678\n"
+      "andi $v0, $a0, 0x5678\n"
+      "13:\n";
   DriverStr(expected, "AbsorbTargetInstruction");
 }
 
@@ -2637,10 +2715,62 @@
   DriverStr(expected, "SetReorder");
 }
 
-TEST_F(AssemblerMIPSTest, LongBranchReorder) {
-  mips::MipsLabel label;
+TEST_F(AssemblerMIPSTest, ReorderPatchedInstruction) {
   __ SetReorder(true);
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  mips::MipsLabel label1, label2;
+  mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+  __ Beq(mips::A0, mips::A1, &label1);
+  constexpr uint32_t kAdduCount1 = 63;
+  for (size_t i = 0; i != kAdduCount1; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label1);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+  __ Bltz(mips::V1, &label2);
+  constexpr uint32_t kAdduCount2 = 64;
+  for (size_t i = 0; i != kAdduCount2; ++i) {
+    __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+  }
+  __ Bind(&label2);
+  __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+  __ B(&label1);
+  __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+  __ Jalr(mips::T9);
+  __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+  __ Blt(mips::V0, mips::V1, &label2);
+  __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+  std::string expected =
+      ".set noreorder\n"
+      "beq $a0, $a1, 1f\n"
+      "lw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+      "1:\n"
+      "bltz $v1, 2f\n"
+      "sw $v0, 0x5678($a0)\n" +
+      RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+      "2:\n"
+      "b 1b\n"
+      "addiu $v0, $a0, 0x5678\n"
+      "jalr $t9\n"
+      "lw $v0, 0x5678($a0)\n"
+      "slt $at, $v0, $v1\n"
+      "bnez $at, 2b\n"
+      "sw $v0, 0x5678($a0)\n"
+      "addu $zero, $zero, $zero\n";
+  DriverStr(expected, "ReorderPatchedInstruction");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 10) * 4u);
+}
+
+TEST_F(AssemblerMIPSTest, LongBranchReorder) {
+  mips::MipsLabel label, patcher_label1, patcher_label2;
+  __ SetReorder(true);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
   __ B(&label);
   constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
   for (size_t i = 0; i != kAdduCount1; ++i) {
@@ -2651,7 +2781,7 @@
   for (size_t i = 0; i != kAdduCount2; ++i) {
     __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
   }
-  __ Subu(mips::T0, mips::T1, mips::T2);
+  __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
   __ B(&label);
 
   // Account for 5 extra instructions: ori, addu, lw, jalr, addiu.
@@ -2662,7 +2792,7 @@
   std::ostringstream oss;
   oss <<
       ".set noreorder\n"
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "addiu $sp, $sp, -4\n"
       "sw $ra, 0($sp)\n"
       "bltzal $zero, .+4\n"
@@ -2674,7 +2804,7 @@
       "addiu $sp, $sp, 4\n" <<
       RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
       RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
-      "subu $t0, $t1, $t2\n"
+      "addiu $t0, $t1, 0x5678\n"
       "addiu $sp, $sp, -4\n"
       "sw $ra, 0($sp)\n"
       "bltzal $zero, .+4\n"
@@ -2686,6 +2816,8 @@
       "addiu $sp, $sp, 4\n";
   std::string expected = oss.str();
   DriverStr(expected, "LongBranchReorder");
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+  EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 10) * 4u);
 }
 
 ///////////////////////