MIPS32: Fill branch delay slots

Test: booted MIPS32 in QEMU
Test: test-art-host-gtest
Test: test-art-target-gtest
Test: test-art-target-run-test-optimizing on CI20

Change-Id: I727e80753395ab99fff004cb5d2e0a06409150d7
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8a2f90d..e0de03b 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -792,12 +792,24 @@
       // TODO: __ cfi().Restore(DWARFReg(reg));
     }
 
-    __ DecreaseFrameSize(GetFrameSize());
+    size_t frame_size = GetFrameSize();
+    // Adjust the stack pointer in the delay slot if doing so doesn't break CFI.
+    bool exchange = IsInt<16>(static_cast<int32_t>(frame_size));
+    bool reordering = __ SetReorder(false);
+    if (exchange) {
+      __ Jr(RA);
+      __ DecreaseFrameSize(frame_size);  // Single instruction in delay slot.
+    } else {
+      __ DecreaseFrameSize(frame_size);
+      __ Jr(RA);
+      __ Nop();  // In delay slot.
+    }
+    __ SetReorder(reordering);
+  } else {
+    __ Jr(RA);
+    __ NopIfNoReordering();
   }
 
-  __ Jr(RA);
-  __ Nop();
-
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
 }
@@ -1251,6 +1263,7 @@
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path,
                                       bool is_direct_entrypoint) {
+  bool reordering = __ SetReorder(false);
   __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
   __ Jalr(T9);
   if (is_direct_entrypoint) {
@@ -1262,6 +1275,7 @@
   } else {
     __ Nop();  // In delay slot.
   }
+  __ SetReorder(reordering);
   RecordPcInfo(instruction, dex_pc, slow_path);
 }
 
@@ -3953,7 +3967,7 @@
   __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
-  __ Nop();
+  __ NopIfNoReordering();
   DCHECK(!codegen_->IsLeafMethod());
   codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
@@ -4254,7 +4268,7 @@
       // T9 prepared above for better instruction scheduling.
       // T9()
       __ Jalr(T9);
-      __ Nop();
+      __ NopIfNoReordering();
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
       // TODO: Implement this type.
@@ -4270,7 +4284,7 @@
                             kMipsPointerSize).Int32Value());
       // T9()
       __ Jalr(T9);
-      __ Nop();
+      __ NopIfNoReordering();
       break;
   }
   DCHECK(!IsLeafMethod());
@@ -4312,7 +4326,7 @@
   __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value());
   // T9();
   __ Jalr(T9);
-  __ Nop();
+  __ NopIfNoReordering();
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4421,6 +4435,7 @@
       DCHECK(!kEmitCompilerReadBarrier);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+      bool reordering = __ SetReorder(false);
       if (isR6) {
         __ Bind(&info->high_label);
         __ Bind(&info->pc_rel_label);
@@ -4436,6 +4451,7 @@
         // Add a 32-bit offset to PC.
         __ Addu(out, out, base_or_current_method_reg);
       }
+      __ SetReorder(reordering);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -4579,6 +4595,7 @@
       DCHECK(!kEmitCompilerReadBarrier);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+      bool reordering = __ SetReorder(false);
       if (isR6) {
         __ Bind(&info->high_label);
         __ Bind(&info->pc_rel_label);
@@ -4594,6 +4611,7 @@
         // Add a 32-bit offset to PC.
         __ Addu(out, out, base_or_current_method_reg);
       }
+      __ SetReorder(reordering);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
@@ -4851,7 +4869,7 @@
     __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
     __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
     __ Jalr(T9);
-    __ Nop();
+    __ NopIfNoReordering();
     codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
   } else {
     codegen_->InvokeRuntime(
@@ -5751,7 +5769,7 @@
   Register reg = base->GetLocations()->Out().AsRegister<Register>();
   CodeGeneratorMIPS::PcRelativePatchInfo* info =
       codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-
+  bool reordering = __ SetReorder(false);
   if (codegen_->GetInstructionSetFeatures().IsR6()) {
     __ Bind(&info->high_label);
     __ Bind(&info->pc_rel_label);
@@ -5769,6 +5787,7 @@
     __ Addu(reg, reg, RA);
     // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()?
   }
+  __ SetReorder(reordering);
 }
 
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {