MIPS32R6: Improve PC-relative string/class loads and invokes.

Use PC-relative addressing on MIPS32R6 instead of
HMipsDexCacheArraysBase and allow such PC-relative
addressing in presence of irreducible loops.

Also save a couple of instructions when handling
string and class loads from bss.

Test: test-art-host-gtest
Test: booted MIPS32R2 in QEMU
Test: "make -j1 ART_TEST_DEFAULT_COMPILER=false ART_TEST_OPTIMIZING=true
       ART_TEST_INTERPRETER=false ART_TEST_JIT=false
       ART_TEST_PIC_TEST=true test-art-target-run-test"
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: "make -j1 ART_TEST_DEFAULT_COMPILER=false ART_TEST_OPTIMIZING=true
       ART_TEST_INTERPRETER=false ART_TEST_JIT=false
       ART_TEST_PIC_TEST=true test-art-target-run-test32"

Change-Id: I5d0fcbf271541294a3d4479987d52e2aaff084d9
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 76be74e..a095970 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -258,8 +258,10 @@
       DCHECK_NE(out.AsRegister<Register>(), AT);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
-      mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
-      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, 0);
+      bool reordering = __ SetReorder(false);
+      mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+      __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
     }
     __ B(GetExitLabel());
   }
@@ -313,8 +315,10 @@
     DCHECK_NE(out, AT);
     CodeGeneratorMIPS::PcRelativePatchInfo* info =
         mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
-    mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
-    __ StoreToOffset(kStoreWord, out, TMP, 0);
+    bool reordering = __ SetReorder(false);
+    mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+    __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+    __ SetReorder(reordering);
 
     __ B(GetExitLabel());
   }
@@ -1127,16 +1131,15 @@
   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
 }
 
-void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder(
-    PcRelativePatchInfo* info, Register out, Register base) {
-  bool reordering = __ SetReorder(false);
+void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
+                                                             Register out,
+                                                             Register base) {
   if (GetInstructionSetFeatures().IsR6()) {
     DCHECK_EQ(base, ZERO);
     __ Bind(&info->high_label);
     __ Bind(&info->pc_rel_label);
-    // Add a 32-bit offset to PC.
+    // Add the high half of a 32-bit offset to PC.
     __ Auipc(out, /* placeholder */ 0x1234);
-    __ Addiu(out, out, /* placeholder */ 0x5678);
   } else {
     // If base is ZERO, emit NAL to obtain the actual base.
     if (base == ZERO) {
@@ -1150,11 +1153,11 @@
     if (base == ZERO) {
       __ Bind(&info->pc_rel_label);
     }
-    __ Ori(out, out, /* placeholder */ 0x5678);
-    // Add a 32-bit offset to PC.
+    // Add the high half of a 32-bit offset to PC.
     __ Addu(out, out, (base == ZERO) ? RA : base);
   }
-  __ SetReorder(reordering);
+  // The immediately following instruction will add the sign-extended low half of the 32-bit
+  // offset to `out` (e.g. lw, jialc, addiu).
 }
 
 void CodeGeneratorMIPS::MarkGCCard(Register object,
@@ -5159,7 +5162,8 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  bool has_extra_input = invoke->HasPcRelativeDexCache();
+  bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
+  bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -5200,12 +5204,13 @@
   if (kEmitCompilerReadBarrier) {
     UNIMPLEMENTED(FATAL) << "for read barrier";
   }
-  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
   // with irreducible loops.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = has_irreducible_loops;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5238,10 +5243,11 @@
   if (kEmitCompilerReadBarrier) {
     UNIMPLEMENTED(FATAL) << "for read barrier";
   }
-  // We disable pc-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = has_irreducible_loops;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (desired_class_load_kind) {
     case HLoadClass::LoadKind::kReferrersClass:
       fallback_load = false;
@@ -5259,6 +5265,7 @@
       break;
     case HLoadClass::LoadKind::kJitTableAddress:
       DCHECK(Runtime::Current()->UseJitCompilation());
+      // TODO: implement.
       fallback_load = true;
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
@@ -5273,6 +5280,7 @@
 
 Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                   Register temp) {
+  CHECK(!GetInstructionSetFeatures().IsR6());
   CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
   Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
   if (!invoke->GetLocations()->Intrinsified()) {
@@ -5301,13 +5309,13 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
   HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
-  // We disable PC-relative load when there is an irreducible loop, as the optimization
+  // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
-  bool fallback_load = true;
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  bool fallback_load = has_irreducible_loops && !is_r6;
   switch (dispatch_info.method_load_kind) {
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      fallback_load = has_irreducible_loops;
       break;
     default:
       fallback_load = false;
@@ -5325,7 +5333,8 @@
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
-  Register base_reg = invoke->HasPcRelativeDexCache()
+  bool is_r6 = GetInstructionSetFeatures().IsR6();
+  Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
       ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
       : ZERO;
 
@@ -5346,14 +5355,23 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
-      HMipsDexCacheArraysBase* base =
-          invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
-      int32_t offset =
-          invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+      if (is_r6) {
+        uint32_t offset = invoke->GetDexCacheArrayOffset();
+        CodeGeneratorMIPS::PcRelativePatchInfo* info =
+            NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset);
+        bool reordering = __ SetReorder(false);
+        EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO);
+        __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+        __ SetReorder(reordering);
+      } else {
+        HMipsDexCacheArraysBase* base =
+            invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+        int32_t offset =
+            invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+        __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+      }
       break;
-    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
       Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       Register reg = temp.AsRegister<Register>();
@@ -5546,7 +5564,10 @@
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
@@ -5562,8 +5583,10 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, 0);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       generate_null_check = true;
       break;
     }
@@ -5678,7 +5701,10 @@
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ Addiu(out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
@@ -5694,8 +5720,10 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, 0);
+      bool reordering = __ SetReorder(false);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqz(out, slow_path->GetEntryLabel());
@@ -6894,8 +6922,12 @@
   Register reg = base->GetLocations()->Out().AsRegister<Register>();
   CodeGeneratorMIPS::PcRelativePatchInfo* info =
       codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+  bool reordering = __ SetReorder(false);
   // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL.
-  codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO);
+  codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO);
+  __ Addiu(reg, reg, /* placeholder */ 0x5678);
+  __ SetReorder(reordering);
 }
 
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c8fd325..e92eeef 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -463,7 +463,7 @@
   Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
-  void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base);
+  void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base);
 
  private:
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 192b4a5..e96e3d7 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3117,14 +3117,6 @@
     Location root,
     GpuRegister obj,
     uint32_t offset) {
-  // When handling PC-relative loads, the caller calls
-  // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad().
-  // The relative patcher expects the two methods to emit the following patchable
-  // sequence of instructions in this case:
-  //   auipc reg1, 0x1234  // 0x1234 is a placeholder for offset_high.
-  //   lwu   reg2, 0x5678(reg1)  // 0x5678 is a placeholder for offset_low.
-  // TODO: Adjust GenerateGcRootFieldLoad() and its caller when this method is
-  // extended (e.g. for read barriers) so as not to break the relative patcher.
   GpuRegister root_reg = root.AsRegister<GpuRegister>();
   if (kEmitCompilerReadBarrier) {
     UNIMPLEMENTED(FATAL) << "for read barrier";
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 04a4294..7734f91 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -47,7 +47,7 @@
     // Computing the dex cache base for PC-relative accesses will clobber RA with
     // the NAL instruction on R2. Take a note of this before generating the method
     // entry.
-    if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+    if (!dex_cache_array_bases_.empty()) {
       codegen_->ClobberRA();
     }
   }
@@ -92,6 +92,11 @@
 };
 
 void DexCacheArrayFixups::Run() {
+  CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
+  if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
+    // Do nothing for R6 because it has PC-relative addressing.
+    return;
+  }
   if (graph_->HasIrreducibleLoops()) {
     // Do not run this optimization, as irreducible loops do not work with an instruction
     // that can be live-in at the irreducible loop header.