Change pResolveString entrypoint to kSaveEverything.

Test: Run ART test suite including gcstress on host and Nexus 9.
Test: Run ART test suite including gcstress with baker CC on host and Nexus 9.
Bug: 20323084
Change-Id: I63c21a7d3be8ff7a5765b5003c85b5317635efe6
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 3b77880..4a9de7f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -214,7 +214,7 @@
         DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
       } else {
-        // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+        // With the read barrier (non-Baker) enabled, it could be kDexCacheArray in the
         // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
         DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9870876..80b4907 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -429,34 +429,50 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    HLoadString* load = instruction_->AsLoadString();
+    const uint32_t string_index = load->GetStringIndex();
+    Register out = locations->Out().AsRegister<Register>();
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
 
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+    // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+    bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+    Register entry_address = temp_is_r0 ? out : temp;
+    DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+    if (call_saves_everything_except_r0 && temp_is_r0) {
+      __ mov(entry_address, ShifterOperand(temp));
+    }
+
     __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+    // Store the resolved String to the .bss entry.
+    if (call_saves_everything_except_r0) {
+      // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+      __ str(R0, Address(entry_address));
+    } else {
+      // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(entry_address, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(entry_address, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(entry_address, entry_address, ShifterOperand(PC));
+      __ str(R0, Address(entry_address));
+    }
+
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
-
     RestoreLiveRegisters(codegen, locations);
 
-    // Store the resolved String to the BSS entry.
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
-    // .bss entry address in the fast path, so that we can avoid another calculation here.
-    CodeGeneratorARM::PcRelativePatchInfo* labels =
-        arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
-    __ BindTrackedLabel(&labels->movw_label);
-    __ movw(IP, /* placeholder */ 0u);
-    __ BindTrackedLabel(&labels->movt_label);
-    __ movt(IP, /* placeholder */ 0u);
-    __ BindTrackedLabel(&labels->add_pc_label);
-    __ add(IP, IP, ShifterOperand(PC));
-    __ str(locations->Out().AsRegister<Register>(), Address(IP));
-
     __ b(GetExitLabel());
   }
 
@@ -5704,10 +5720,25 @@
 
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetOut(Location::RegisterLocation(R0));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything, including temps.
+        // Note that IP may theoretically be clobbered by saving/restoring the live register
+        // (only one thanks to the custom calling convention), so we request a different temp.
+        locations->AddTemp(Location::RequiresRegister());
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+        // that the the kPrimNot result register is the same as the first argument register.
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -5743,15 +5774,16 @@
     }
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       __ BindTrackedLabel(&labels->movw_label);
-      __ movw(out, /* placeholder */ 0u);
+      __ movw(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
-      __ movt(out, /* placeholder */ 0u);
+      __ movt(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->add_pc_label);
-      __ add(out, out, ShifterOperand(PC));
-      GenerateGcRootFieldLoad(load, out_loc, out, 0);
+      __ add(temp, temp, ShifterOperand(PC));
+      GenerateGcRootFieldLoad(load, out_loc, temp, 0);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -5765,6 +5797,7 @@
   // TODO: Consider re-adding the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 969d653..8197787 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -331,13 +331,20 @@
 
 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
+  LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
+      : SlowPathCodeARM64(instruction),
+        temp_(temp),
+        adrp_label_(adrp_label) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
 
+    // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
+    UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+    temps.Exclude(temp_);
+
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
@@ -352,21 +359,21 @@
     RestoreLiveRegisters(codegen, locations);
 
     // Store the resolved String to the BSS entry.
-    UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
-    Register temp = temps.AcquireX();
     const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary
-    // for the ADRP in the fast path, so that we can avoid the ADRP here.
-    vixl::aarch64::Label* adrp_label =
-        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
-    arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp);
+    if (!kUseReadBarrier || kUseBakerReadBarrier) {
+      // The string entry page address was preserved in temp_ thanks to kSaveEverything.
+    } else {
+      // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
+      adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
+      arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
+    }
     vixl::aarch64::Label* strp_label =
-        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
     {
       SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
       __ Bind(strp_label);
       __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
-             MemOperand(temp, /* offset placeholder */ 0));
+             MemOperand(temp_, /* offset placeholder */ 0));
     }
 
     __ B(GetExitLabel());
@@ -375,6 +382,9 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
 
  private:
+  const Register temp_;
+  vixl::aarch64::Label* adrp_label_;
+
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
 };
 
@@ -4238,11 +4248,24 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything, including temps.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+        DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+                  RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
+                               Primitive::kPrimNot).GetCode());
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -4277,18 +4300,21 @@
       const DexFile& dex_file = load->GetDexFile();
       uint32_t string_index = load->GetStringIndex();
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+      Register temp = temps.AcquireX();
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
-      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+      codegen_->EmitAdrpPlaceholder(adrp_label, temp);
       // Add LDR with its PC-relative String patch.
       vixl::aarch64::Label* ldr_label =
           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(load,
                               load->GetLocations()->Out(),
-                              out.X(),
+                              temp,
                               /* placeholder */ 0u,
                               ldr_label);
-      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
       codegen_->AddSlowPath(slow_path);
       __ Cbz(out.X(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
@@ -4300,6 +4326,7 @@
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b23599..ab60671 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6059,8 +6059,7 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
-      load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+  if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
       load_kind == HLoadString::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -6068,6 +6067,17 @@
     locations->SetOut(Location::RegisterLocation(EAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -6114,6 +6124,7 @@
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 28638d7..6518c32 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -299,9 +299,9 @@
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    InvokeRuntimeCallingConvention calling_convention;
     const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
+    // Custom calling convention: RAX serves as both input and output.
+    __ movl(CpuRegister(RAX), Immediate(string_index));
     x86_64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -5450,10 +5450,20 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetOut(Location::RegisterLocation(RAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything.
+        // Custom calling convention: RAX serves as both input and output.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        caller_saves.Add(Location::RegisterLocation(RAX));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -5493,9 +5503,8 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
-          Immediate(load->GetStringIndex()));
+  // Custom calling convention: RAX serves as both input and output.
+  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
   codegen_->InvokeRuntime(kQuickResolveString,
                           load,
                           load->GetDexPc());