MIPS32R6: Improve PC-relative string/class loads and invokes.

Use PC-relative addressing on MIPS32R6 instead of
HMipsDexCacheArraysBase and allow such PC-relative
addressing in presence of irreducible loops.

Also save a couple of instructions when handling
string and class loads from bss.

Test: test-art-host-gtest
Test: booted MIPS32R2 in QEMU
Test: "make -j1 ART_TEST_DEFAULT_COMPILER=false ART_TEST_OPTIMIZING=true
       ART_TEST_INTERPRETER=false ART_TEST_JIT=false
       ART_TEST_PIC_TEST=true test-art-target-run-test"
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: "make -j1 ART_TEST_DEFAULT_COMPILER=false ART_TEST_OPTIMIZING=true
       ART_TEST_INTERPRETER=false ART_TEST_JIT=false
       ART_TEST_PIC_TEST=true test-art-target-run-test32"

Change-Id: I5d0fcbf271541294a3d4479987d52e2aaff084d9
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 76be74e..a095970 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -258,8 +258,10 @@
       DCHECK_NE(out.AsRegister<Register>(), AT);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
-      mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
-      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, 0);
+      bool reordering = __ SetReorder(false);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
     }
     __ B(GetExitLabel());
   }
@@ -313,8 +315,10 @@
     DCHECK_NE(out, AT);
     CodeGeneratorMIPS::PcRelativePatchInfo* info =
         mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
-    mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
-    __ StoreToOffset(kStoreWord, out, TMP, 0);
+    bool reordering = __ SetReorder(false);
+    mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+    __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+    __ SetReorder(reordering);
 
     __ B(GetExitLabel());
   }
@@ -1127,16 +1131,15 @@
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder(
-    PcRelativePatchInfo* info, Register out, Register base) {
-  bool reordering = __ SetReorder(false);
+void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
+                                                             Register out,
+                                                             Register base) {
   if (GetInstructionSetFeatures().IsR6()) {
     DCHECK_EQ(base, ZERO);
     __ Bind(&info->high_label);
     __ Bind(&info->pc_rel_label);
-    // Add a 32-bit offset to PC.
+    // Add the high half of a 32-bit offset to PC.
     __ Auipc(out, /* placeholder */ 0x1234);
-    __ Addiu(out, out, /* placeholder */ 0x5678);
   } else {
     // If base is ZERO, emit NAL to obtain the actual base.
     if (base == ZERO) {
@@ -1150,11 +1153,11 @@
     if (base == ZERO) {
       __ Bind(&info->pc_rel_label);
     }
-    __ Ori(out, out, /* placeholder */ 0x5678);
-    // Add a 32-bit offset to PC.
+    // Add the high half of a 32-bit offset to PC.
     __ Addu(out, out, (base == ZERO) ? RA : base);
   }
-  __ SetReorder(reordering);
+  // The immediately following instruction will add the sign-extended low half of the 32-bit
+  // offset to `out` (e.g. lw, jialc, addiu).
 }
 
 void CodeGeneratorMIPS::MarkGCCard(Register object,
@@ -5159,7 +5162,8 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  bool has_extra_input = invoke->HasPcRelativeDexCache();
+  bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -5200,12 +5204,13 @@
   if (kEmitCompilerReadBarrier) {
     UNIMPLEMENTED(FATAL) << "for read barrier";
   }
-  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
   // with irreducible loops.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = has_irreducible_loops;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5238,10 +5243,11 @@
   if (kEmitCompilerReadBarrier) {
     UNIMPLEMENTED(FATAL) << "for read barrier";
   }
-  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = has_irreducible_loops;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (desired_class_load_kind) {
     case HLoadClass::LoadKind::kReferrersClass:
       fallback_load = false;
@@ -5259,6 +5265,7 @@
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
+      // TODO: implement.
       fallback_load = true;
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -5273,6 +5280,7 @@
 
 Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                   Register temp) {
+  CHECK(!GetInstructionSetFeatures().IsR6());
   CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   if (!invoke->GetLocations()->Intrinsified()) {
@@ -5301,13 +5309,13 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
   HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
-  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = true;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (dispatch_info.method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      fallback_load = has_irreducible_loops;
       break;
     default:
       fallback_load = false;
@@ -5325,7 +5333,8 @@
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
-  Register base_reg = invoke->HasPcRelativeDexCache()
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
       ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
       : ZERO;
 
@@ -5346,14 +5355,23 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
-      HMipsDexCacheArraysBase* base =
-          invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
-      int32_t offset =
-          invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      if (is_r6) {
+        uint32_t offset = invoke->GetDexCacheArrayOffset();
+        CodeGeneratorMIPS::PcRelativePatchInfo* info =
+            NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset);
+        bool reordering = __ SetReorder(false);
+        EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO);
+        __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+        __ SetReorder(reordering);
+      } else {
+        HMipsDexCacheArraysBase* base =
+            invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+        int32_t offset =
+            invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+        __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      }
       break;
-    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
@@ -5546,7 +5564,10 @@
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -5562,8 +5583,10 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, 0);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       generate_null_check = true;
       break;
     }
@@ -5678,7 +5701,10 @@
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
@@ -5694,8 +5720,10 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, 0);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqz(out, slow_path->GetEntryLabel());
@@ -6894,8 +6922,12 @@
   Register reg = base->GetLocations()->Out().AsRegister<Register>();
   CodeGeneratorMIPS::PcRelativePatchInfo* info =
       codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+  bool reordering = __ SetReorder(false);
   // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL.
-  codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO);
+  codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO);
+  __ Addiu(reg, reg, /* placeholder */ 0x5678);
+  __ SetReorder(reordering);
 }
 
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {