Inline IRT frame push/pop into JNI stubs.

Golem results for art-opt-cc (higher is better):
linux-ia32                       before after
NativeDowncallStaticNormal       25.704 26.839 (+4.414%)
NativeDowncallStaticNormal6      23.857 25.086 (+5.152%)
NativeDowncallStaticNormalRefs6  23.704 25.248 (+6.513%)
NativeDowncallVirtualNormal      25.578 27.000 (+5.560%)
NativeDowncallVirtualNormal6     23.704 24.925 (+5.153%)
NativeDowncallVirtualNormalRefs6 23.704 25.074 (+5.870%)
NativeDowncallStaticFast         100.65 149.13 (+48.17%)
NativeDowncallStaticFast6        78.304 107.39 (+37.71%)
NativeDowncallStaticFastRefs6    76.962 104.45 (+35.71%)
NativeDowncallVirtualFast        100.40 147.28 (+46.69%)
NativeDowncallVirtualFast6       79.302 106.34 (+34.10%)
NativeDowncallVirtualFastRef26   76.617 103.29 (+34.82%)
linux-x64                        before after
NativeDowncallStaticNormal       26.083 26.987 (+3.465%)
NativeDowncallStaticNormal6      24.606 25.411 (+3.271%)
NativeDowncallStaticNormalRefs6  24.150 25.086 (+3.877%)
NativeDowncallVirtualNormal      25.743 26.812 (+4.156%)
NativeDowncallVirtualNormal6     24.294 25.248 (+3.927%)
NativeDowncallVirtualNormalRefs6 23.857 25.086 (+5.152%)
NativeDowncallStaticFast         109.95 133.10 (+21.06%)
NativeDowncallStaticFast6        90.274 109.12 (+20.87%)
NativeDowncallStaticFastRefs6    87.282 105.29 (+20.63%)
NativeDowncallVirtualFast        104.00 127.55 (+22.65%)
NativeDowncallVirtualFast6       88.191 106.73 (+21.02%)
NativeDowncallVirtualFastRef26   85.530 102.09 (+19.36%)
linux-armv7                      before after
NativeDowncallStaticNormal       6.1148 6.3694 (+4.316%)
NativeDowncallStaticNormal6      5.6845 5.9026 (+3.837%)
NativeDowncallStaticNormalRefs6  5.4054 5.6022 (+3.641%)
NativeDowncallVirtualNormal      5.4726 5.7088 (+4.316%)
NativeDowncallVirtualNormal6     5.1789 5.3685 (+3.660%)
NativeDowncallVirtualNormalRefs6 4.9140 5.0902 (+3.586%)
NativeDowncallStaticFast         16.683 18.058 (+8.239%)
NativeDowncallStaticFast6        13.951 14.896 (+6.770%)
NativeDowncallStaticFastRefs6    12.279 13.006 (+5.919%)
NativeDowncallVirtualFast        16.161 17.848 (+10.44%)
NativeDowncallVirtualFast6       14.085 15.196 (+7.892%)
NativeDowncallVirtualFastRef26   12.089 12.897 (+6.683%)
linux-armv8                      before after
NativeDowncallStaticNormal       6.0663 6.4229 (+5.879%)
NativeDowncallStaticNormal6      5.7252 6.0437 (+5.563%)
NativeDowncallStaticNormalRefs6  5.3114 5.5814 (+5.082%)
NativeDowncallVirtualNormal      5.8795 6.2651 (+6.558%)
NativeDowncallVirtualNormal6     5.6232 5.9494 (+5.801%)
NativeDowncallVirtualNormalRefs6 5.1862 5.4429 (+4.948%)
NativeDowncallStaticFast         17.638 19.183 (+8.760%)
NativeDowncallStaticFast6        14.903 16.161 (+8.438%)
NativeDowncallStaticFastRefs6    12.475 13.235 (+6.094%)
NativeDowncallVirtualFast        15.826 17.848 (+12.78%)
NativeDowncallVirtualFast6       14.064 15.504 (+10.24%)
NativeDowncallVirtualFastRef26   11.628 12.475 (+7.285%)

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I5ecfa7a661f08ab63dd2a75d666e1c1b9121935f
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 70a1939..c23d682 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -37,7 +37,7 @@
 #define ___   asm_.GetVIXLAssembler()->
 #endif
 
-// The AAPCS requires 8-byte alignement. This is not as strict as the Managed ABI stack alignment.
+// The AAPCS requires 8-byte alignment. This is not as strict as the Managed ABI stack alignment.
 static constexpr size_t kAapcsStackAlignment = 8u;
 static_assert(kAapcsStackAlignment < kStackAlignment);
 
@@ -267,7 +267,21 @@
   }
 }
 
+ManagedRegister ArmVIXLJNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
+  DCHECK(src.AsArm().IsCoreRegister());
+  DCHECK_EQ(size, 4u);
+  return src;
+}
+
 void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) {
+  Store(ArmManagedRegister::FromCoreRegister(SP), MemberOffset(dest.Int32Value()), m_src, size);
+}
+
+void ArmVIXLJNIMacroAssembler::Store(ManagedRegister m_base,
+                                     MemberOffset offs,
+                                     ManagedRegister m_src,
+                                     size_t size) {
+  ArmManagedRegister base = m_base.AsArm();
   ArmManagedRegister src = m_src.AsArm();
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
@@ -275,19 +289,19 @@
     CHECK_EQ(4u, size);
     UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
     temps.Exclude(AsVIXLRegister(src));
-    asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), sp, dest.Int32Value());
+    asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), AsVIXLRegister(base), offs.Int32Value());
   } else if (src.IsRegisterPair()) {
     CHECK_EQ(8u, size);
     ___ Strd(AsVIXLRegisterPairLow(src),
              AsVIXLRegisterPairHigh(src),
-             MemOperand(sp, dest.Int32Value()));
+             MemOperand(AsVIXLRegister(base), offs.Int32Value()));
   } else if (src.IsSRegister()) {
     CHECK_EQ(4u, size);
-    asm_.StoreSToOffset(AsVIXLSRegister(src), sp, dest.Int32Value());
+    asm_.StoreSToOffset(AsVIXLSRegister(src), AsVIXLRegister(base), offs.Int32Value());
   } else {
     CHECK_EQ(8u, size);
     CHECK(src.IsDRegister()) << src;
-    asm_.StoreDToOffset(AsVIXLDRegister(src), sp, dest.Int32Value());
+    asm_.StoreDToOffset(AsVIXLDRegister(src), AsVIXLRegister(base), offs.Int32Value());
   }
 }
 
@@ -373,6 +387,13 @@
   return Load(m_dst.AsArm(), sp, src.Int32Value(), size);
 }
 
+void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst,
+                                    ManagedRegister m_base,
+                                    MemberOffset offs,
+                                    size_t size) {
+  return Load(m_dst.AsArm(), AsVIXLRegister(m_base.AsArm()), offs.Int32Value(), size);
+}
+
 void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
                                               ThreadOffset32 src,
                                               size_t size) {
@@ -1050,8 +1071,7 @@
   UNIMPLEMENTED(FATAL);
 }
 
-void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister
-                                    dest,
+void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister dest,
                                     vixl32::Register base,
                                     int32_t offset,
                                     size_t size) {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 248fc67..d98f688 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -61,8 +61,11 @@
   void IncreaseFrameSize(size_t adjust) override;
   void DecreaseFrameSize(size_t adjust) override;
 
+  ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
   // Store routines.
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+  void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
@@ -76,6 +79,7 @@
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
+  void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
 
   void LoadFromThread(ManagedRegister dest,
                       ThreadOffset32 src,
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index c4dbd3f..33fff55 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -37,7 +37,7 @@
 #define reg_d(D) Arm64Assembler::reg_d(D)
 #define reg_s(S) Arm64Assembler::reg_s(S)
 
-// The AAPCS64 requires 16-byte alignement. This is the same as the Managed ABI stack alignment.
+// The AAPCS64 requires 16-byte alignment. This is the same as the Managed ABI stack alignment.
 static constexpr size_t kAapcs64StackAlignment = 16u;
 static_assert(kAapcs64StackAlignment == kStackAlignment);
 
@@ -77,6 +77,30 @@
   }
 }
 
+ManagedRegister Arm64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister m_src, size_t size) {
+  DCHECK(size == 4u || size == 8u) << size;
+  Arm64ManagedRegister src = m_src.AsArm64();
+  // Switch between X and W registers using the `XRegister` and `WRegister` enumerations.
+  static_assert(W0 == static_cast<std::underlying_type_t<XRegister>>(X0));
+  static_assert(W30 == static_cast<std::underlying_type_t<XRegister>>(X30));
+  static_assert(WSP == static_cast<std::underlying_type_t<XRegister>>(SP));
+  static_assert(WZR == static_cast<std::underlying_type_t<XRegister>>(XZR));
+  if (src.IsXRegister()) {
+    if (size == 8u) {
+      return m_src;
+    }
+    auto id = static_cast<std::underlying_type_t<XRegister>>(src.AsXRegister());
+    return Arm64ManagedRegister::FromWRegister(enum_cast<WRegister>(id));
+  } else {
+    CHECK(src.IsWRegister());
+    if (size == 4u) {
+      return m_src;
+    }
+    auto id = static_cast<std::underlying_type_t<WRegister>>(src.AsWRegister());
+    return Arm64ManagedRegister::FromXRegister(enum_cast<XRegister>(id));
+  }
+}
+
 void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
   AddConstant(rd, rd, value, cond);
 }
@@ -132,20 +156,28 @@
 }
 
 void Arm64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+  Store(Arm64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value()), m_src, size);
+}
+
+void Arm64JNIMacroAssembler::Store(ManagedRegister m_base,
+                                   MemberOffset offs,
+                                   ManagedRegister m_src,
+                                   size_t size) {
+  Arm64ManagedRegister base = m_base.AsArm64();
   Arm64ManagedRegister src = m_src.AsArm64();
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (src.IsWRegister()) {
     CHECK_EQ(4u, size);
-    StoreWToOffset(kStoreWord, src.AsWRegister(), SP, offs.Int32Value());
+    StoreWToOffset(kStoreWord, src.AsWRegister(), base.AsXRegister(), offs.Int32Value());
   } else if (src.IsXRegister()) {
     CHECK_EQ(8u, size);
-    StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
+    StoreToOffset(src.AsXRegister(), base.AsXRegister(), offs.Int32Value());
   } else if (src.IsSRegister()) {
-    StoreSToOffset(src.AsSRegister(), SP, offs.Int32Value());
+    StoreSToOffset(src.AsSRegister(), base.AsXRegister(), offs.Int32Value());
   } else {
     CHECK(src.IsDRegister()) << src;
-    StoreDToOffset(src.AsDRegister(), SP, offs.Int32Value());
+    StoreDToOffset(src.AsDRegister(), base.AsXRegister(), offs.Int32Value());
   }
 }
 
@@ -280,6 +312,13 @@
   return Load(m_dst.AsArm64(), SP, src.Int32Value(), size);
 }
 
+void Arm64JNIMacroAssembler::Load(ManagedRegister m_dst,
+                                  ManagedRegister m_base,
+                                  MemberOffset offs,
+                                  size_t size) {
+  return Load(m_dst.AsArm64(), m_base.AsArm64().AsXRegister(), offs.Int32Value(), size);
+}
+
 void Arm64JNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
                                             ThreadOffset64 src,
                                             size_t size) {
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index ad027d3..2c4b252 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -64,8 +64,11 @@
   void IncreaseFrameSize(size_t adjust) override;
   void DecreaseFrameSize(size_t adjust) override;
 
+  ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
   // Store routines.
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+  void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
   void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
@@ -75,6 +78,7 @@
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
+  void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
   void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override;
   void LoadRef(ManagedRegister dest, FrameOffset src) override;
   void LoadRef(ManagedRegister dest,
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index d621122..a9d9f54 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -111,8 +111,13 @@
   virtual void IncreaseFrameSize(size_t adjust) = 0;
   virtual void DecreaseFrameSize(size_t adjust) = 0;
 
+  // Return the same core register but with correct size if the architecture-specific
+  // ManagedRegister has different representation for different sizes.
+  virtual ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) = 0;
+
   // Store routines
   virtual void Store(FrameOffset offs, ManagedRegister src, size_t size) = 0;
+  virtual void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) = 0;
   virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
   virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
 
@@ -129,6 +134,7 @@
 
   // Load routines
   virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
+  virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0;
 
   virtual void LoadFromThread(ManagedRegister dest,
                               ThreadOffset<kPointerSize> src,
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 2710eb1..3c88447 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -127,33 +127,48 @@
   }
 }
 
+ManagedRegister X86JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
+  DCHECK(src.AsX86().IsCpuRegister());
+  DCHECK_EQ(size, 4u);
+  return src;
+}
+
 void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
   DecreaseFrameSizeImpl(&asm_, adjust);
 }
 
 void X86JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  Store(X86ManagedRegister::FromCpuRegister(ESP), MemberOffset(offs.Int32Value()), msrc, size);
+}
+
+void X86JNIMacroAssembler::Store(ManagedRegister mbase,
+                                 MemberOffset offs,
+                                 ManagedRegister msrc,
+                                 size_t size) {
+  X86ManagedRegister base = mbase.AsX86();
   X86ManagedRegister src = msrc.AsX86();
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (src.IsCpuRegister()) {
     CHECK_EQ(4u, size);
-    __ movl(Address(ESP, offs), src.AsCpuRegister());
+    __ movl(Address(base.AsCpuRegister(), offs), src.AsCpuRegister());
   } else if (src.IsRegisterPair()) {
     CHECK_EQ(8u, size);
-    __ movl(Address(ESP, offs), src.AsRegisterPairLow());
-    __ movl(Address(ESP, FrameOffset(offs.Int32Value()+4)), src.AsRegisterPairHigh());
+    __ movl(Address(base.AsCpuRegister(), offs), src.AsRegisterPairLow());
+    __ movl(Address(base.AsCpuRegister(), FrameOffset(offs.Int32Value()+4)),
+            src.AsRegisterPairHigh());
   } else if (src.IsX87Register()) {
     if (size == 4) {
-      __ fstps(Address(ESP, offs));
+      __ fstps(Address(base.AsCpuRegister(), offs));
     } else {
-      __ fstpl(Address(ESP, offs));
+      __ fstpl(Address(base.AsCpuRegister(), offs));
     }
   } else {
     CHECK(src.IsXmmRegister());
     if (size == 4) {
-      __ movss(Address(ESP, offs), src.AsXmmRegister());
+      __ movss(Address(base.AsCpuRegister(), offs), src.AsXmmRegister());
     } else {
-      __ movsd(Address(ESP, offs), src.AsXmmRegister());
+      __ movsd(Address(base.AsCpuRegister(), offs), src.AsXmmRegister());
     }
   }
 }
@@ -191,28 +206,37 @@
 }
 
 void X86JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  Load(mdest, X86ManagedRegister::FromCpuRegister(ESP), MemberOffset(src.Int32Value()), size);
+}
+
+void X86JNIMacroAssembler::Load(ManagedRegister mdest,
+                                ManagedRegister mbase,
+                                MemberOffset offs,
+                                size_t size) {
   X86ManagedRegister dest = mdest.AsX86();
+  X86ManagedRegister base = mbase.AsX86();
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (dest.IsCpuRegister()) {
     CHECK_EQ(4u, size);
-    __ movl(dest.AsCpuRegister(), Address(ESP, src));
+    __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
   } else if (dest.IsRegisterPair()) {
     CHECK_EQ(8u, size);
-    __ movl(dest.AsRegisterPairLow(), Address(ESP, src));
-    __ movl(dest.AsRegisterPairHigh(), Address(ESP, FrameOffset(src.Int32Value()+4)));
+    __ movl(dest.AsRegisterPairLow(), Address(base.AsCpuRegister(), offs));
+    __ movl(dest.AsRegisterPairHigh(),
+            Address(base.AsCpuRegister(), FrameOffset(offs.Int32Value()+4)));
   } else if (dest.IsX87Register()) {
     if (size == 4) {
-      __ flds(Address(ESP, src));
+      __ flds(Address(base.AsCpuRegister(), offs));
     } else {
-      __ fldl(Address(ESP, src));
+      __ fldl(Address(base.AsCpuRegister(), offs));
     }
   } else {
     CHECK(dest.IsXmmRegister());
     if (size == 4) {
-      __ movss(dest.AsXmmRegister(), Address(ESP, src));
+      __ movss(dest.AsXmmRegister(), Address(base.AsCpuRegister(), offs));
     } else {
-      __ movsd(dest.AsXmmRegister(), Address(ESP, src));
+      __ movsd(dest.AsXmmRegister(), Address(base.AsCpuRegister(), offs));
     }
   }
 }
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 448a7f4..1f9355a 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -54,8 +54,11 @@
   void IncreaseFrameSize(size_t adjust) override;
   void DecreaseFrameSize(size_t adjust) override;
 
+  ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
   // Store routines
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+  void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
@@ -69,6 +72,7 @@
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
+  void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
 
   void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) override;
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index b5e17d1..d9f05df 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -151,35 +151,44 @@
   DecreaseFrameSizeImpl(adjust, &asm_);
 }
 
+ManagedRegister X86_64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
+  DCHECK(src.AsX86_64().IsCpuRegister());
+  DCHECK(size == 4u || size == 8u) << size;
+  return src;
+}
+
 void X86_64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) {
+  Store(X86_64ManagedRegister::FromCpuRegister(RSP), MemberOffset(offs.Int32Value()), msrc, size);
+}
+
+void X86_64JNIMacroAssembler::Store(ManagedRegister mbase,
+                                    MemberOffset offs,
+                                    ManagedRegister msrc,
+                                    size_t size) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
   X86_64ManagedRegister src = msrc.AsX86_64();
   if (src.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (src.IsCpuRegister()) {
     if (size == 4) {
       CHECK_EQ(4u, size);
-      __ movl(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+      __ movl(Address(base.AsCpuRegister(), offs), src.AsCpuRegister());
     } else {
       CHECK_EQ(8u, size);
-      __ movq(Address(CpuRegister(RSP), offs), src.AsCpuRegister());
+      __ movq(Address(base.AsCpuRegister(), offs), src.AsCpuRegister());
     }
-  } else if (src.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    __ movq(Address(CpuRegister(RSP), offs), src.AsRegisterPairLow());
-    __ movq(Address(CpuRegister(RSP), FrameOffset(offs.Int32Value()+4)),
-            src.AsRegisterPairHigh());
   } else if (src.IsX87Register()) {
     if (size == 4) {
-      __ fstps(Address(CpuRegister(RSP), offs));
+      __ fstps(Address(base.AsCpuRegister(), offs));
     } else {
-      __ fstpl(Address(CpuRegister(RSP), offs));
+      __ fstpl(Address(base.AsCpuRegister(), offs));
     }
   } else {
     CHECK(src.IsXmmRegister());
     if (size == 4) {
-      __ movss(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+      __ movss(Address(base.AsCpuRegister(), offs), src.AsXmmRegister());
     } else {
-      __ movsd(Address(CpuRegister(RSP), offs), src.AsXmmRegister());
+      __ movsd(Address(base.AsCpuRegister(), offs), src.AsXmmRegister());
     }
   }
 }
@@ -218,33 +227,37 @@
 }
 
 void X86_64JNIMacroAssembler::Load(ManagedRegister mdest, FrameOffset src, size_t size) {
+  Load(mdest, X86_64ManagedRegister::FromCpuRegister(RSP), MemberOffset(src.Int32Value()), size);
+}
+
+void X86_64JNIMacroAssembler::Load(ManagedRegister mdest,
+                                   ManagedRegister mbase,
+                                   MemberOffset offs,
+                                   size_t size) {
   X86_64ManagedRegister dest = mdest.AsX86_64();
+  X86_64ManagedRegister base = mbase.AsX86_64();
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (dest.IsCpuRegister()) {
     if (size == 4) {
       CHECK_EQ(4u, size);
-      __ movl(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+      __ movl(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
     } else {
       CHECK_EQ(8u, size);
-      __ movq(dest.AsCpuRegister(), Address(CpuRegister(RSP), src));
+      __ movq(dest.AsCpuRegister(), Address(base.AsCpuRegister(), offs));
     }
-  } else if (dest.IsRegisterPair()) {
-    CHECK_EQ(0u, size);
-    __ movq(dest.AsRegisterPairLow(), Address(CpuRegister(RSP), src));
-    __ movq(dest.AsRegisterPairHigh(), Address(CpuRegister(RSP), FrameOffset(src.Int32Value()+4)));
   } else if (dest.IsX87Register()) {
     if (size == 4) {
-      __ flds(Address(CpuRegister(RSP), src));
+      __ flds(Address(base.AsCpuRegister(), offs));
     } else {
-      __ fldl(Address(CpuRegister(RSP), src));
+      __ fldl(Address(base.AsCpuRegister(), offs));
     }
   } else {
     CHECK(dest.IsXmmRegister());
     if (size == 4) {
-      __ movss(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+      __ movss(dest.AsXmmRegister(), Address(base.AsCpuRegister(), offs));
     } else {
-      __ movsd(dest.AsXmmRegister(), Address(CpuRegister(RSP), src));
+      __ movsd(dest.AsXmmRegister(), Address(base.AsCpuRegister(), offs));
     }
   }
 }
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index a5f7bbb..f1ec74f 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -55,8 +55,11 @@
   void IncreaseFrameSize(size_t adjust) override;
   void DecreaseFrameSize(size_t adjust) override;
 
+  ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
   // Store routines
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+  void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
@@ -70,6 +73,7 @@
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
+  void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
 
   void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override;