Move entry spills determination to JNI compiler.

The calling convention no longer describes entry spills as
spilling is the JNI compiler's responsibility. This allows
future improvements, such as spilling registers directly to
the HandleScope or outgoing stack args.

Remove the notion of interprocedural scratch register from
calling conventions and let assemblers deal with all scratch
register uses. The remaining JNI assembler APIs that take
scratch registers are currently unused and can be removed.

Also fix a bug in disassembly comparison for tests; the
contents of two files were considered identical if the
second one just contained additional data.

This change fully preserves the generated code and adds TODO
comments where doing so results in weird or suboptimal code.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: Prebuilt boot image is unchanged.
Test: aosp_taimen-userdebug boots.
Bug: 12189621
Change-Id: Ic26a670276920313cd907a6eda8d982cf0abfd81
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index cec94c9..7980e18 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -82,8 +82,15 @@
     std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm(
         JNIMacroAssembler<kPointerSize>::Create(&allocator, isa));
     jni_asm->cfi().SetEnabled(true);
-    jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(),
-                        callee_save_regs, mr_conv->EntrySpills());
+    jni_asm->BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs);
+    // Spill arguments.
+    mr_conv->ResetIterator(FrameOffset(frame_size));
+    for (; mr_conv->HasNext(); mr_conv->Next()) {
+      if (mr_conv->IsCurrentParamInRegister()) {
+        size_t size = mr_conv->IsCurrentParamALongOrDouble() ? 8u : 4u;
+        jni_asm->Store(mr_conv->CurrentParamStackOffset(), mr_conv->CurrentParamRegister(), size);
+      }
+    }
     jni_asm->IncreaseFrameSize(32);
     jni_asm->DecreaseFrameSize(32);
     jni_asm->RemoveFrame(frame_size, callee_save_regs, /* may_suspend= */ true);
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index e06c914..d07ab98 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -27,8 +27,6 @@
 namespace art {
 namespace arm {
 
-static_assert(kArmPointerSize == PointerSize::k32, "Unexpected ARM pointer size");
-
 //
 // JNI calling convention constants.
 //
@@ -49,18 +47,21 @@
 static const Register kHFCoreArgumentRegisters[] = {
   R0, R1, R2, R3
 };
+static constexpr size_t kHFCoreArgumentRegistersCount = arraysize(kHFCoreArgumentRegisters);
 
 // (VFP single-precision registers.)
 static const SRegister kHFSArgumentRegisters[] = {
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
+static constexpr size_t kHFSArgumentRegistersCount = arraysize(kHFSArgumentRegisters);
 
 // (VFP double-precision registers.)
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
+static constexpr size_t kHFDArgumentRegistersCount = arraysize(kHFDArgumentRegisters);
 
-static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
+static_assert(kHFDArgumentRegistersCount * 2 == kHFSArgumentRegistersCount,
     "ks d argument registers mismatch");
 
 //
@@ -159,14 +160,6 @@
 
 // Calling convention
 
-ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
-  return ArmManagedRegister::FromCoreRegister(IP);  // R12
-}
-
-ManagedRegister ArmJniCallingConvention::InterproceduralScratchRegister() const {
-  return ArmManagedRegister::FromCoreRegister(IP);  // R12
-}
-
 ManagedRegister ArmManagedRuntimeCallingConvention::ReturnRegister() {
   switch (GetShorty()[0]) {
     case 'V':
@@ -204,93 +197,93 @@
   return ArmManagedRegister::FromCoreRegister(R0);
 }
 
+void ArmManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
+  ManagedRuntimeCallingConvention::ResetIterator(displacement);
+  gpr_index_ = 1u;  // Skip r0 for ArtMethod*
+  float_index_ = 0u;
+  double_index_ = 0u;
+}
+
+void ArmManagedRuntimeCallingConvention::Next() {
+  if (IsCurrentParamAFloatOrDouble()) {
+    if (float_index_ % 2 == 0) {
+      // The register for the current float is the same as the first register for double.
+      DCHECK_EQ(float_index_, double_index_ * 2u);
+    } else {
+      // There is a space for an extra float before space for a double.
+      DCHECK_LT(float_index_, double_index_ * 2u);
+    }
+    if (IsCurrentParamADouble()) {
+      double_index_ += 1u;
+      if (float_index_ % 2 == 0) {
+        float_index_ = double_index_ * 2u;
+      }
+    } else {
+      if (float_index_ % 2 == 0) {
+        float_index_ += 1u;
+        double_index_ += 1u;  // Leaves space for one more float before the next double.
+      } else {
+        float_index_ = double_index_ * 2u;
+      }
+    }
+  } else {  // Not a float/double.
+    if (IsCurrentParamALong()) {
+      // Note that the alignment to even register is done lazily.
+      gpr_index_ = RoundUp(gpr_index_, 2u) + 2u;
+    } else {
+      gpr_index_ += 1u;
+    }
+  }
+  ManagedRuntimeCallingConvention::Next();
+}
+
 bool ArmManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
-  return false;  // Everything moved to stack on entry.
+  if (IsCurrentParamAFloatOrDouble()) {
+    if (IsCurrentParamADouble()) {
+      return double_index_ < kHFDArgumentRegistersCount;
+    } else {
+      return float_index_ < kHFSArgumentRegistersCount;
+    }
+  } else {
+    if (IsCurrentParamALong()) {
+      // Round up to even register and do not split a long between the last register and the stack.
+      return RoundUp(gpr_index_, 2u) + 1u < kHFCoreArgumentRegistersCount;
+    } else {
+      return gpr_index_ < kHFCoreArgumentRegistersCount;
+    }
+  }
 }
 
 bool ArmManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  return true;
+  return !IsCurrentParamInRegister();
 }
 
 ManagedRegister ArmManagedRuntimeCallingConvention::CurrentParamRegister() {
-  LOG(FATAL) << "Should not reach here";
-  UNREACHABLE();
+  DCHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
+    if (IsCurrentParamADouble()) {
+      return ArmManagedRegister::FromDRegister(kHFDArgumentRegisters[double_index_]);
+    } else {
+      return ArmManagedRegister::FromSRegister(kHFSArgumentRegisters[float_index_]);
+    }
+  } else {
+    if (IsCurrentParamALong()) {
+      // Currently the only register pair for a long parameter is r2-r3.
+      // Note that the alignment to even register is done lazily.
+      CHECK_EQ(RoundUp(gpr_index_, 2u), 2u);
+      return ArmManagedRegister::FromRegisterPair(R2_R3);
+    } else {
+      return ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index_]);
+    }
+  }
 }
 
 FrameOffset ArmManagedRuntimeCallingConvention::CurrentParamStackOffset() {
-  CHECK(IsCurrentParamOnStack());
   return FrameOffset(displacement_.Int32Value() +        // displacement
                      kFramePointerSize +                 // Method*
                      (itr_slots_ * kFramePointerSize));  // offset into in args
 }
 
-const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpills() {
-  // We spill the argument registers on ARM to free them up for scratch use, we then assume
-  // all arguments are on the stack.
-  if ((entry_spills_.size() == 0) && (NumArgs() > 0)) {
-    uint32_t gpr_index = 1;  // R0 ~ R3. Reserve r0 for ArtMethod*.
-    uint32_t fpr_index = 0;  // S0 ~ S15.
-    uint32_t fpr_double_index = 0;  // D0 ~ D7.
-
-    ResetIterator(FrameOffset(0));
-    while (HasNext()) {
-      if (IsCurrentParamAFloatOrDouble()) {
-        if (IsCurrentParamADouble()) {  // Double.
-          // Double should not overlap with float.
-          fpr_double_index = (std::max(fpr_double_index * 2, RoundUp(fpr_index, 2))) / 2;
-          if (fpr_double_index < arraysize(kHFDArgumentRegisters)) {
-            entry_spills_.push_back(
-                ArmManagedRegister::FromDRegister(kHFDArgumentRegisters[fpr_double_index++]));
-          } else {
-            entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
-          }
-        } else {  // Float.
-          // Float should not overlap with double.
-          if (fpr_index % 2 == 0) {
-            fpr_index = std::max(fpr_double_index * 2, fpr_index);
-          }
-          if (fpr_index < arraysize(kHFSArgumentRegisters)) {
-            entry_spills_.push_back(
-                ArmManagedRegister::FromSRegister(kHFSArgumentRegisters[fpr_index++]));
-          } else {
-            entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-          }
-        }
-      } else {
-        // FIXME: Pointer this returns as both reference and long.
-        if (IsCurrentParamALong() && !IsCurrentParamAReference()) {  // Long.
-          if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) {
-            // Skip R1, and use R2_R3 if the long is the first parameter.
-            if (gpr_index == 1) {
-              gpr_index++;
-            }
-          }
-
-          // If it spans register and memory, we must use the value in memory.
-          if (gpr_index < arraysize(kHFCoreArgumentRegisters) - 1) {
-            entry_spills_.push_back(
-                ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++]));
-          } else if (gpr_index == arraysize(kHFCoreArgumentRegisters) - 1) {
-            gpr_index++;
-            entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-          } else {
-            entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-          }
-        }
-        // High part of long or 32-bit argument.
-        if (gpr_index < arraysize(kHFCoreArgumentRegisters)) {
-          entry_spills_.push_back(
-              ArmManagedRegister::FromCoreRegister(kHFCoreArgumentRegisters[gpr_index++]));
-        } else {
-          entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-        }
-      }
-      Next();
-    }
-  }
-  return entry_spills_;
-}
-
 // JNI calling convention
 
 ArmJniCallingConvention::ArmJniCallingConvention(bool is_static,
@@ -538,7 +531,6 @@
   DCHECK(std::none_of(kJniArgumentRegisters,
                       kJniArgumentRegisters + std::size(kJniArgumentRegisters),
                       [](Register reg) { return reg == R4; }));
-  DCHECK(!InterproceduralScratchRegister().Equals(ArmManagedRegister::FromCoreRegister(R4)));
   return ArmManagedRegister::FromCoreRegister(R4);
 }
 
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index e4b86fa..7896d64 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -29,22 +29,26 @@
       : ManagedRuntimeCallingConvention(is_static,
                                         is_synchronized,
                                         shorty,
-                                        PointerSize::k32) {}
+                                        PointerSize::k32),
+        gpr_index_(1u),  // Skip r0 for ArtMethod*
+        float_index_(0u),
+        double_index_(0u) {}
   ~ArmManagedRuntimeCallingConvention() override {}
   // Calling convention
   ManagedRegister ReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
+  void ResetIterator(FrameOffset displacement) override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  void Next() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
   FrameOffset CurrentParamStackOffset() override;
-  const ManagedRegisterEntrySpills& EntrySpills() override;
 
  private:
-  ManagedRegisterEntrySpills entry_spills_;
-
+  size_t gpr_index_;
+  size_t float_index_;
+  size_t double_index_;
   DISALLOW_COPY_AND_ASSIGN(ArmManagedRuntimeCallingConvention);
 };
 
@@ -58,7 +62,6 @@
   // Calling convention
   ManagedRegister ReturnRegister() override;
   ManagedRegister IntReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // JNI calling convention
   void Next() override;  // Override default behavior for AAPCS
   size_t FrameSize() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 231e140..32da141 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -26,8 +26,6 @@
 namespace art {
 namespace arm64 {
 
-static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size");
-
 static const XRegister kXArgumentRegisters[] = {
   X0, X1, X2, X3, X4, X5, X6, X7
 };
@@ -143,14 +141,6 @@
     CalculateFpCalleeSpillMask(kAapcs64CalleeSaveRegisters);
 
 // Calling convention
-ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
-  return Arm64ManagedRegister::FromXRegister(IP0);  // X16
-}
-
-ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() const {
-  return Arm64ManagedRegister::FromXRegister(IP0);  // X16
-}
-
 static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
   if (shorty[0] == 'F') {
     return Arm64ManagedRegister::FromSRegister(S0);
@@ -184,73 +174,44 @@
 }
 
 bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
-  return false;  // Everything moved to stack on entry.
+  if (IsCurrentParamAFloatOrDouble()) {
+    return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    return /* method */ 1u + non_fp_arg_number < kMaxIntLikeRegisterArguments;
+  }
 }
 
 bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  return true;
+  return !IsCurrentParamInRegister();
 }
 
 ManagedRegister Arm64ManagedRuntimeCallingConvention::CurrentParamRegister() {
-  LOG(FATAL) << "Should not reach here";
-  UNREACHABLE();
+  DCHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
+    if (IsCurrentParamADouble()) {
+      return Arm64ManagedRegister::FromDRegister(kDArgumentRegisters[itr_float_and_doubles_]);
+    } else {
+      return Arm64ManagedRegister::FromSRegister(kSArgumentRegisters[itr_float_and_doubles_]);
+    }
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    if (IsCurrentParamALong()) {
+      XRegister x_reg = kXArgumentRegisters[/* method */ 1u + non_fp_arg_number];
+      return Arm64ManagedRegister::FromXRegister(x_reg);
+    } else {
+      WRegister w_reg = kWArgumentRegisters[/* method */ 1u + non_fp_arg_number];
+      return Arm64ManagedRegister::FromWRegister(w_reg);
+    }
+  }
 }
 
 FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
-  CHECK(IsCurrentParamOnStack());
   return FrameOffset(displacement_.Int32Value() +  // displacement
                      kFramePointerSize +  // Method ref
                      (itr_slots_ * sizeof(uint32_t)));  // offset into in args
 }
 
-const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
-  // We spill the argument registers on ARM64 to free them up for scratch use, we then assume
-  // all arguments are on the stack.
-  if ((entry_spills_.size() == 0) && (NumArgs() > 0)) {
-    int gp_reg_index = 1;   // we start from X1/W1, X0 holds ArtMethod*.
-    int fp_reg_index = 0;   // D0/S0.
-
-    // We need to choose the correct register (D/S or X/W) since the managed
-    // stack uses 32bit stack slots.
-    ResetIterator(FrameOffset(0));
-    while (HasNext()) {
-      if (IsCurrentParamAFloatOrDouble()) {  // FP regs.
-          if (fp_reg_index < 8) {
-            if (!IsCurrentParamADouble()) {
-              entry_spills_.push_back(Arm64ManagedRegister::FromSRegister(kSArgumentRegisters[fp_reg_index]));
-            } else {
-              entry_spills_.push_back(Arm64ManagedRegister::FromDRegister(kDArgumentRegisters[fp_reg_index]));
-            }
-            fp_reg_index++;
-          } else {  // just increase the stack offset.
-            if (!IsCurrentParamADouble()) {
-              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-            } else {
-              entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
-            }
-          }
-      } else {  // GP regs.
-        if (gp_reg_index < 8) {
-          if (IsCurrentParamALong() && (!IsCurrentParamAReference())) {
-            entry_spills_.push_back(Arm64ManagedRegister::FromXRegister(kXArgumentRegisters[gp_reg_index]));
-          } else {
-            entry_spills_.push_back(Arm64ManagedRegister::FromWRegister(kWArgumentRegisters[gp_reg_index]));
-          }
-          gp_reg_index++;
-        } else {  // just increase the stack offset.
-          if (IsCurrentParamALong() && (!IsCurrentParamAReference())) {
-              entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
-          } else {
-              entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
-          }
-        }
-      }
-      Next();
-    }
-  }
-  return entry_spills_;
-}
-
 // JNI calling convention
 
 Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static,
@@ -411,7 +372,6 @@
   DCHECK(std::none_of(kXArgumentRegisters,
                       kXArgumentRegisters + std::size(kXArgumentRegisters),
                       [](XRegister reg) { return reg == X15; }));
-  DCHECK(!InterproceduralScratchRegister().Equals(Arm64ManagedRegister::FromXRegister(X15)));
   return Arm64ManagedRegister::FromXRegister(X15);
 }
 
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 64b29f1..7beca08 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -33,18 +33,14 @@
   ~Arm64ManagedRuntimeCallingConvention() override {}
   // Calling convention
   ManagedRegister ReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
   FrameOffset CurrentParamStackOffset() override;
-  const ManagedRegisterEntrySpills& EntrySpills() override;
 
  private:
-  ManagedRegisterEntrySpills entry_spills_;
-
   DISALLOW_COPY_AND_ASSIGN(Arm64ManagedRuntimeCallingConvention);
 };
 
@@ -58,7 +54,6 @@
   // Calling convention
   ManagedRegister ReturnRegister() override;
   ManagedRegister IntReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // JNI calling convention
   size_t FrameSize() const override;
   size_t OutArgSize() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 3d4cefe..b4396f0 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -48,15 +48,13 @@
 
   // Register that holds result of this method invocation.
   virtual ManagedRegister ReturnRegister() = 0;
-  // Register reserved for scratch usage during procedure calls.
-  virtual ManagedRegister InterproceduralScratchRegister() const = 0;
 
   // Iterator interface
 
   // Place iterator at start of arguments. The displacement is applied to
   // frame offset methods to account for frames which may be on the stack
   // below the one being iterated over.
-  void ResetIterator(FrameOffset displacement) {
+  virtual void ResetIterator(FrameOffset displacement) {
     displacement_ = displacement;
     itr_slots_ = 0;
     itr_args_ = 0;
@@ -252,11 +250,14 @@
 
   // Iterator interface
   bool HasNext();
-  void Next();
+  virtual void Next();
   bool IsCurrentParamAReference();
   bool IsCurrentParamAFloatOrDouble();
   bool IsCurrentParamADouble();
   bool IsCurrentParamALong();
+  bool IsCurrentParamALongOrDouble() {
+    return IsCurrentParamALong() || IsCurrentParamADouble();
+  }
   bool IsCurrentArgExplicit();  // ie a non-implict argument such as this
   bool IsCurrentArgPossiblyNull();
   size_t CurrentParamSize();
@@ -267,9 +268,6 @@
 
   virtual ~ManagedRuntimeCallingConvention() {}
 
-  // Registers to spill to caller's out registers on entry.
-  virtual const ManagedRegisterEntrySpills& EntrySpills() = 0;
-
  protected:
   ManagedRuntimeCallingConvention(bool is_static,
                                   bool is_synchronized,
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index c2db73a..ac060cc 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -224,9 +224,25 @@
   ManagedRegister method_register =
       is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
   ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
-  __ BuildFrame(current_frame_size, method_register, callee_save_regs, mr_conv->EntrySpills());
+  __ BuildFrame(current_frame_size, method_register, callee_save_regs);
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
 
+  {
+    // Spill all register arguments.
+    // TODO: Spill reference args directly to the HandleScope.
+    // TODO: Spill native stack args straight to their stack locations (adjust SP earlier).
+    // TODO: Move args in registers without spilling for @CriticalNative.
+    // TODO: Provide assembler API for batched moves to allow moving multiple arguments
+    // with single instruction (arm: LDRD/STRD/LDMIA/STMIA, arm64: LDP/STP).
+    mr_conv->ResetIterator(FrameOffset(current_frame_size));
+    for (; mr_conv->HasNext(); mr_conv->Next()) {
+      if (mr_conv->IsCurrentParamInRegister()) {
+        size_t size = mr_conv->IsCurrentParamALongOrDouble() ? 8u : 4u;
+        __ Store(mr_conv->CurrentParamStackOffset(), mr_conv->CurrentParamRegister(), size);
+      }
+    }
+  }
+
   if (LIKELY(!is_critical_native)) {
     // NOTE: @CriticalNative methods don't have a HandleScope
     //       because they can't have any reference parameters or return values.
@@ -235,15 +251,12 @@
     mr_conv->ResetIterator(FrameOffset(current_frame_size));
     main_jni_conv->ResetIterator(FrameOffset(0));
     __ StoreImmediateToFrame(main_jni_conv->HandleScopeNumRefsOffset(),
-                             main_jni_conv->ReferenceCount(),
-                             mr_conv->InterproceduralScratchRegister());
+                             main_jni_conv->ReferenceCount());
 
     __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
-                            Thread::TopHandleScopeOffset<kPointerSize>(),
-                            mr_conv->InterproceduralScratchRegister());
+                            Thread::TopHandleScopeOffset<kPointerSize>());
     __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
-                                main_jni_conv->HandleScopeOffset(),
-                                mr_conv->InterproceduralScratchRegister());
+                                main_jni_conv->HandleScopeOffset());
 
     // 3. Place incoming reference arguments into handle scope
     main_jni_conv->Next();  // Skip JNIEnv*
@@ -252,15 +265,12 @@
       FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
       // Check handle scope offset is within frame
       CHECK_LT(handle_scope_offset.Uint32Value(), current_frame_size);
-      // Note this LoadRef() doesn't need heap unpoisoning since it's from the ArtMethod.
-      // Note this LoadRef() does not include read barrier. It will be handled below.
-      //
-      // scratchRegister = *method[DeclaringClassOffset()];
-      __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
-                 mr_conv->MethodRegister(), ArtMethod::DeclaringClassOffset(), false);
-      __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
-      // *handleScopeOffset = scratchRegister
-      __ StoreRef(handle_scope_offset, main_jni_conv->InterproceduralScratchRegister());
+      // Note: This CopyRef() doesn't need heap unpoisoning since it's from the ArtMethod.
+      // Note: This CopyRef() does not include read barrier. It will be handled below.
+      __ CopyRef(handle_scope_offset,
+                 mr_conv->MethodRegister(),
+                 ArtMethod::DeclaringClassOffset(),
+                 /* unpoison_reference= */ false);
       main_jni_conv->Next();  // in handle scope so move to next argument
     }
     // Place every reference into the handle scope (ignore other parameters).
@@ -277,8 +287,10 @@
         CHECK_LT(handle_scope_offset.Uint32Value(), current_frame_size);
         CHECK_NE(handle_scope_offset.Uint32Value(),
                  main_jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
-        bool input_in_reg = mr_conv->IsCurrentParamInRegister();
-        bool input_on_stack = mr_conv->IsCurrentParamOnStack();
+        // We spilled all registers above, so use stack locations.
+        // TODO: Spill refs straight to the HandleScope.
+        bool input_in_reg = false;  // mr_conv->IsCurrentParamInRegister();
+        bool input_on_stack = true;  // mr_conv->IsCurrentParamOnStack();
         CHECK(input_in_reg || input_on_stack);
 
         if (input_in_reg) {
@@ -288,8 +300,7 @@
         } else if (input_on_stack) {
           FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
           __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
-          __ CopyRef(handle_scope_offset, in_off,
-                     mr_conv->InterproceduralScratchRegister());
+          __ CopyRef(handle_scope_offset, in_off);
         }
       }
       mr_conv->Next();
@@ -330,13 +341,8 @@
       //
       // Check if gc_is_marking is set -- if it's not, we don't need
       // a read barrier so skip it.
-      __ LoadFromThread(main_jni_conv->InterproceduralScratchRegister(),
-                        Thread::IsGcMarkingOffset<kPointerSize>(),
-                        Thread::IsGcMarkingSize());
       // Jump over the slow path if gc is marking is false.
-      __ Jump(skip_cold_path_label.get(),
-              JNIMacroUnaryCondition::kZero,
-              main_jni_conv->InterproceduralScratchRegister());
+      __ TestGcMarking(skip_cold_path_label.get(), JNIMacroUnaryCondition::kZero);
     }
 
     // Construct slow path for read barrier:
@@ -353,25 +359,22 @@
     // Pass the handle for the class as the first argument.
     if (main_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-      __ CreateHandleScopeEntry(out_off, class_handle_scope_offset,
-                         mr_conv->InterproceduralScratchRegister(),
-                         false);
+      __ CreateHandleScopeEntry(out_off, class_handle_scope_offset, /*null_allowed=*/ false);
     } else {
       ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-      __ CreateHandleScopeEntry(out_reg, class_handle_scope_offset,
-                         ManagedRegister::NoRegister(), false);
+      __ CreateHandleScopeEntry(out_reg,
+                                class_handle_scope_offset,
+                                ManagedRegister::NoRegister(),
+                                /*null_allowed=*/ false);
     }
     main_jni_conv->Next();
     // Pass the current thread as the second argument and call.
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      __ Call(main_jni_conv->CurrentParamRegister(),
-              Offset(read_barrier),
-              main_jni_conv->InterproceduralScratchRegister());
+      __ Call(main_jni_conv->CurrentParamRegister(), Offset(read_barrier));
     } else {
-      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
-                          main_jni_conv->InterproceduralScratchRegister());
-      __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
+      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
+      __ CallFromThread(read_barrier);
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
 
@@ -403,27 +406,27 @@
       main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
       if (main_jni_conv->IsCurrentParamOnStack()) {
         FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-        __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
-                                  mr_conv->InterproceduralScratchRegister(), false);
+        __ CreateHandleScopeEntry(out_off,
+                                  locked_object_handle_scope_offset,
+                                  /*null_allowed=*/ false);
       } else {
         ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-        __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
-                                  ManagedRegister::NoRegister(), false);
+        __ CreateHandleScopeEntry(out_reg,
+                                  locked_object_handle_scope_offset,
+                                  ManagedRegister::NoRegister(),
+                                  /*null_allowed=*/ false);
       }
       main_jni_conv->Next();
     }
     if (main_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-      __ Call(main_jni_conv->CurrentParamRegister(),
-              Offset(jni_start),
-              main_jni_conv->InterproceduralScratchRegister());
+      __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start));
     } else {
-      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
-                          main_jni_conv->InterproceduralScratchRegister());
-      __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
+      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
+      __ CallFromThread(jni_start);
     }
     if (is_synchronized) {  // Check for exceptions from monitor enter.
-      __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
+      __ ExceptionPoll(main_out_arg_size);
     }
 
     // Store into stack_frame[saved_cookie_offset] the return value of JniMethodStart.
@@ -477,13 +480,13 @@
     FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     if (main_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-      __ CreateHandleScopeEntry(out_off, handle_scope_offset,
-                         mr_conv->InterproceduralScratchRegister(),
-                         false);
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset, /*null_allowed=*/ false);
     } else {
       ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-      __ CreateHandleScopeEntry(out_reg, handle_scope_offset,
-                         ManagedRegister::NoRegister(), false);
+      __ CreateHandleScopeEntry(out_reg,
+                                handle_scope_offset,
+                                ManagedRegister::NoRegister(),
+                                /*null_allowed=*/ false);
     }
   }
 
@@ -494,13 +497,10 @@
     // Register that will hold local indirect reference table
     if (main_jni_conv->IsCurrentParamInRegister()) {
       ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
-      DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
       __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
     } else {
       FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-      __ CopyRawPtrFromThread(jni_env,
-                              Thread::JniEnvOffset<kPointerSize>(),
-                              main_jni_conv->InterproceduralScratchRegister());
+      __ CopyRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
     }
   }
 
@@ -509,18 +509,13 @@
       ArtMethod::EntryPointFromJniOffset(InstructionSetPointerSize(instruction_set));
   if (UNLIKELY(is_critical_native)) {
     if (main_jni_conv->UseTailCall()) {
-      __ Jump(main_jni_conv->HiddenArgumentRegister(),
-              jni_entrypoint_offset,
-              main_jni_conv->InterproceduralScratchRegister());
+      __ Jump(main_jni_conv->HiddenArgumentRegister(), jni_entrypoint_offset);
     } else {
-      __ Call(main_jni_conv->HiddenArgumentRegister(),
-              jni_entrypoint_offset,
-              main_jni_conv->InterproceduralScratchRegister());
+      __ Call(main_jni_conv->HiddenArgumentRegister(), jni_entrypoint_offset);
     }
   } else {
     __ Call(FrameOffset(main_out_arg_size + mr_conv->MethodStackOffset().SizeValue()),
-            jni_entrypoint_offset,
-            main_jni_conv->InterproceduralScratchRegister());
+            jni_entrypoint_offset);
   }
 
   // 10. Fix differences in result widths.
@@ -601,7 +596,7 @@
     // Pass saved local reference state.
     if (end_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-      __ Copy(out_off, saved_cookie_offset, end_jni_conv->InterproceduralScratchRegister(), 4);
+      __ Copy(out_off, saved_cookie_offset, 4);
     } else {
       ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
       __ Load(out_reg, saved_cookie_offset, 4);
@@ -611,25 +606,24 @@
       // Pass object for unlocking.
       if (end_jni_conv->IsCurrentParamOnStack()) {
         FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-        __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
-                           end_jni_conv->InterproceduralScratchRegister(),
-                           false);
+        __ CreateHandleScopeEntry(out_off,
+                                  locked_object_handle_scope_offset,
+                                  /*null_allowed=*/ false);
       } else {
         ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-        __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
-                           ManagedRegister::NoRegister(), false);
+        __ CreateHandleScopeEntry(out_reg,
+                                  locked_object_handle_scope_offset,
+                                  ManagedRegister::NoRegister(),
+                                  /*null_allowed=*/ false);
       }
       end_jni_conv->Next();
     }
     if (end_jni_conv->IsCurrentParamInRegister()) {
       __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-      __ Call(end_jni_conv->CurrentParamRegister(),
-              Offset(jni_end),
-              end_jni_conv->InterproceduralScratchRegister());
+      __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end));
     } else {
-      __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
-                          end_jni_conv->InterproceduralScratchRegister());
-      __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
+      __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset());
+      __ CallFromThread(jni_end);
     }
 
     // 13. Reload return value
@@ -651,7 +645,7 @@
   // 15. Process pending exceptions from JNI call or monitor exit.
   //     @CriticalNative methods do not need exception poll in the stub.
   if (LIKELY(!is_critical_native)) {
-    __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust= */);
+    __ ExceptionPoll(/* stack_adjust= */ 0);
   }
 
   // 16. Remove activation - need to restore callee save registers since the GC may have changed
@@ -685,14 +679,14 @@
 static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv) {
-  bool input_in_reg = mr_conv->IsCurrentParamInRegister();
+  // We spilled all registers, so use stack locations.
+  // TODO: Move args in registers for @CriticalNative.
+  bool input_in_reg = false;  // mr_conv->IsCurrentParamInRegister();
   bool output_in_reg = jni_conv->IsCurrentParamInRegister();
   FrameOffset handle_scope_offset(0);
   bool null_allowed = false;
   bool ref_param = jni_conv->IsCurrentParamAReference();
   CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
-  // input may be in register, on stack or both - but not none!
-  CHECK(input_in_reg || mr_conv->IsCurrentParamOnStack());
   if (output_in_reg) {  // output shouldn't straddle registers and stack
     CHECK(!jni_conv->IsCurrentParamOnStack());
   } else {
@@ -724,13 +718,12 @@
   } else if (!input_in_reg && !output_in_reg) {
     FrameOffset out_off = jni_conv->CurrentParamStackOffset();
     if (ref_param) {
-      __ CreateHandleScopeEntry(out_off, handle_scope_offset, mr_conv->InterproceduralScratchRegister(),
-                         null_allowed);
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset, null_allowed);
     } else {
       FrameOffset in_off = mr_conv->CurrentParamStackOffset();
       size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
-      __ Copy(out_off, in_off, mr_conv->InterproceduralScratchRegister(), param_size);
+      __ Copy(out_off, in_off, param_size);
     }
   } else if (!input_in_reg && output_in_reg) {
     FrameOffset in_off = mr_conv->CurrentParamStackOffset();
@@ -738,7 +731,10 @@
     // Check that incoming stack arguments are above the current stack frame.
     CHECK_GT(in_off.Uint32Value(), mr_conv->GetDisplacement().Uint32Value());
     if (ref_param) {
-      __ CreateHandleScopeEntry(out_reg, handle_scope_offset, ManagedRegister::NoRegister(), null_allowed);
+      __ CreateHandleScopeEntry(out_reg,
+                                handle_scope_offset,
+                                ManagedRegister::NoRegister(),
+                                null_allowed);
     } else {
       size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
@@ -752,8 +748,7 @@
     CHECK_LT(out_off.Uint32Value(), jni_conv->GetDisplacement().Uint32Value());
     if (ref_param) {
       // TODO: recycle value in in_reg rather than reload from handle scope
-      __ CreateHandleScopeEntry(out_off, handle_scope_offset, mr_conv->InterproceduralScratchRegister(),
-                         null_allowed);
+      __ CreateHandleScopeEntry(out_off, handle_scope_offset, null_allowed);
     } else {
       size_t param_size = mr_conv->CurrentParamSize();
       CHECK_EQ(param_size, jni_conv->CurrentParamSize());
@@ -764,7 +759,7 @@
         // store where input straddles registers and stack
         CHECK_EQ(param_size, 8u);
         FrameOffset in_off = mr_conv->CurrentParamStackOffset();
-        __ StoreSpanning(out_off, in_reg, in_off, mr_conv->InterproceduralScratchRegister());
+        __ StoreSpanning(out_off, in_reg, in_off);
       }
     }
   }
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 4e643ba..6776f12 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -26,7 +26,12 @@
 namespace art {
 namespace x86 {
 
-static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size");
+static constexpr Register kManagedCoreArgumentRegisters[] = {
+    EAX, ECX, EDX, EBX
+};
+static constexpr size_t kManagedCoreArgumentRegistersCount =
+    arraysize(kManagedCoreArgumentRegisters);
+static constexpr size_t kManagedFpArgumentRegistersCount = 4u;
 
 static constexpr ManagedRegister kCalleeSaveRegisters[] = {
     // Core registers.
@@ -67,14 +72,6 @@
 
 // Calling convention
 
-ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
-  return X86ManagedRegister::FromCpuRegister(ECX);
-}
-
-ManagedRegister X86JniCallingConvention::InterproceduralScratchRegister() const {
-  return X86ManagedRegister::FromCpuRegister(ECX);
-}
-
 ManagedRegister X86JniCallingConvention::ReturnScratchRegister() const {
   return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
 }
@@ -113,49 +110,58 @@
   return X86ManagedRegister::FromCpuRegister(EAX);
 }
 
+void X86ManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
+  ManagedRuntimeCallingConvention::ResetIterator(displacement);
+  gpr_arg_count_ = 1u;  // Skip EAX for ArtMethod*
+}
+
+void X86ManagedRuntimeCallingConvention::Next() {
+  if (!IsCurrentParamAFloatOrDouble()) {
+    gpr_arg_count_ += IsCurrentParamALong() ? 2u : 1u;
+  }
+  ManagedRuntimeCallingConvention::Next();
+}
+
 bool X86ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
-  return false;  // Everything is passed by stack
+  if (IsCurrentParamAFloatOrDouble()) {
+    return itr_float_and_doubles_ < kManagedFpArgumentRegistersCount;
+  } else {
+    // Don't split a long between the last register and the stack.
+    size_t extra_regs = IsCurrentParamALong() ? 1u : 0u;
+    return gpr_arg_count_ + extra_regs < kManagedCoreArgumentRegistersCount;
+  }
 }
 
 bool X86ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  // We assume all parameters are on stack, args coming via registers are spilled as entry_spills.
-  return true;
+  return !IsCurrentParamInRegister();
 }
 
 ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamRegister() {
-  ManagedRegister res = ManagedRegister::NoRegister();
-  if (!IsCurrentParamAFloatOrDouble()) {
-    switch (gpr_arg_count_) {
-      case 0:
-        res = X86ManagedRegister::FromCpuRegister(ECX);
-        break;
-      case 1:
-        res = X86ManagedRegister::FromCpuRegister(EDX);
-        break;
-      case 2:
-        // Don't split a long between the last register and the stack.
-        if (IsCurrentParamALong()) {
-          return ManagedRegister::NoRegister();
-        }
-        res = X86ManagedRegister::FromCpuRegister(EBX);
-        break;
-    }
-  } else if (itr_float_and_doubles_ < 4) {
+  DCHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
     // First four float parameters are passed via XMM0..XMM3
-    res = X86ManagedRegister::FromXmmRegister(
-                                 static_cast<XmmRegister>(XMM0 + itr_float_and_doubles_));
+    XmmRegister reg = static_cast<XmmRegister>(XMM0 + itr_float_and_doubles_);
+    return X86ManagedRegister::FromXmmRegister(reg);
+  } else {
+    if (IsCurrentParamALong()) {
+      switch (gpr_arg_count_) {
+        case 1:
+          static_assert(kManagedCoreArgumentRegisters[1] == ECX);
+          static_assert(kManagedCoreArgumentRegisters[2] == EDX);
+          return X86ManagedRegister::FromRegisterPair(ECX_EDX);
+        case 2:
+          static_assert(kManagedCoreArgumentRegisters[2] == EDX);
+          static_assert(kManagedCoreArgumentRegisters[3] == EBX);
+          return X86ManagedRegister::FromRegisterPair(EDX_EBX);
+        default:
+          LOG(FATAL) << "UNREACHABLE";
+          UNREACHABLE();
+      }
+    } else {
+      Register core_reg = kManagedCoreArgumentRegisters[gpr_arg_count_];
+      return X86ManagedRegister::FromCpuRegister(core_reg);
+    }
   }
-  return res;
-}
-
-ManagedRegister X86ManagedRuntimeCallingConvention::CurrentParamHighLongRegister() {
-  ManagedRegister res = ManagedRegister::NoRegister();
-  DCHECK(IsCurrentParamALong());
-  switch (gpr_arg_count_) {
-    case 0: res = X86ManagedRegister::FromCpuRegister(EDX); break;
-    case 1: res = X86ManagedRegister::FromCpuRegister(EBX); break;
-  }
-  return res;
 }
 
 FrameOffset X86ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
@@ -164,48 +170,6 @@
                      (itr_slots_ * kFramePointerSize));  // offset into in args
 }
 
-const ManagedRegisterEntrySpills& X86ManagedRuntimeCallingConvention::EntrySpills() {
-  // We spill the argument registers on X86 to free them up for scratch use, we then assume
-  // all arguments are on the stack.
-  if (entry_spills_.size() == 0) {
-    ResetIterator(FrameOffset(0));
-    while (HasNext()) {
-      ManagedRegister in_reg = CurrentParamRegister();
-      bool is_long = IsCurrentParamALong();
-      if (!in_reg.IsNoRegister()) {
-        int32_t size = IsParamADouble(itr_args_) ? 8 : 4;
-        int32_t spill_offset = CurrentParamStackOffset().Uint32Value();
-        ManagedRegisterSpill spill(in_reg, size, spill_offset);
-        entry_spills_.push_back(spill);
-        if (is_long) {
-          // special case, as we need a second register here.
-          in_reg = CurrentParamHighLongRegister();
-          DCHECK(!in_reg.IsNoRegister());
-          // We have to spill the second half of the long.
-          ManagedRegisterSpill spill2(in_reg, size, spill_offset + 4);
-          entry_spills_.push_back(spill2);
-        }
-
-        // Keep track of the number of GPRs allocated.
-        if (!IsCurrentParamAFloatOrDouble()) {
-          if (is_long) {
-            // Long was allocated in 2 registers.
-            gpr_arg_count_ += 2;
-          } else {
-            gpr_arg_count_++;
-          }
-        }
-      } else if (is_long) {
-        // We need to skip the unused last register, which is empty.
-        // If we are already out of registers, this is harmless.
-        gpr_arg_count_ += 2;
-      }
-      Next();
-    }
-  }
-  return entry_spills_;
-}
-
 // JNI calling convention
 
 X86JniCallingConvention::X86JniCallingConvention(bool is_static,
@@ -326,7 +290,6 @@
                       [](ManagedRegister callee_save) constexpr {
                         return callee_save.Equals(X86ManagedRegister::FromCpuRegister(EAX));
                       }));
-  DCHECK(!InterproceduralScratchRegister().Equals(X86ManagedRegister::FromCpuRegister(EAX)));
   return X86ManagedRegister::FromCpuRegister(EAX);
 }
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 1273e8d..6f22c2b 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -30,23 +30,21 @@
                                         is_synchronized,
                                         shorty,
                                         PointerSize::k32),
-        gpr_arg_count_(0) {}
+        gpr_arg_count_(1u) {}  // Skip EAX for ArtMethod*
   ~X86ManagedRuntimeCallingConvention() override {}
   // Calling convention
   ManagedRegister ReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
+  void ResetIterator(FrameOffset displacement) override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
+  void Next() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
   FrameOffset CurrentParamStackOffset() override;
-  const ManagedRegisterEntrySpills& EntrySpills() override;
 
  private:
   int gpr_arg_count_;
-  ManagedRegister CurrentParamHighLongRegister();
-  ManagedRegisterEntrySpills entry_spills_;
   DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention);
 };
 
@@ -61,7 +59,6 @@
   // Calling convention
   ManagedRegister ReturnRegister() override;
   ManagedRegister IntReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // JNI calling convention
   size_t FrameSize() const override;
   size_t OutArgSize() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 9013b02..579347f 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -27,6 +27,11 @@
 namespace art {
 namespace x86_64 {
 
+static constexpr Register kCoreArgumentRegisters[] = {
+    RDI, RSI, RDX, RCX, R8, R9
+};
+static_assert(kMaxIntLikeRegisterArguments == arraysize(kCoreArgumentRegisters));
+
 static constexpr ManagedRegister kCalleeSaveRegisters[] = {
     // Core registers.
     X86_64ManagedRegister::FromCpuRegister(RBX),
@@ -87,14 +92,6 @@
 
 // Calling convention
 
-ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
-  return X86_64ManagedRegister::FromCpuRegister(RAX);
-}
-
-ManagedRegister X86_64JniCallingConvention::InterproceduralScratchRegister() const {
-  return X86_64ManagedRegister::FromCpuRegister(RAX);
-}
-
 ManagedRegister X86_64JniCallingConvention::ReturnScratchRegister() const {
   return ManagedRegister::NoRegister();  // No free regs, so assembler uses push/pop
 }
@@ -130,58 +127,37 @@
 }
 
 bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
-  return !IsCurrentParamOnStack();
+  if (IsCurrentParamAFloatOrDouble()) {
+    return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    return /* method */ 1u + non_fp_arg_number < kMaxIntLikeRegisterArguments;
+  }
 }
 
 bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
-  // We assume all parameters are on stack, args coming via registers are spilled as entry_spills
-  return true;
+  return !IsCurrentParamInRegister();
 }
 
 ManagedRegister X86_64ManagedRuntimeCallingConvention::CurrentParamRegister() {
-  ManagedRegister res = ManagedRegister::NoRegister();
-  if (!IsCurrentParamAFloatOrDouble()) {
-    switch (itr_args_ - itr_float_and_doubles_) {
-    case 0: res = X86_64ManagedRegister::FromCpuRegister(RSI); break;
-    case 1: res = X86_64ManagedRegister::FromCpuRegister(RDX); break;
-    case 2: res = X86_64ManagedRegister::FromCpuRegister(RCX); break;
-    case 3: res = X86_64ManagedRegister::FromCpuRegister(R8); break;
-    case 4: res = X86_64ManagedRegister::FromCpuRegister(R9); break;
-    }
-  } else if (itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments) {
+  DCHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
     // First eight float parameters are passed via XMM0..XMM7
-    res = X86_64ManagedRegister::FromXmmRegister(
-                                 static_cast<FloatRegister>(XMM0 + itr_float_and_doubles_));
+    FloatRegister fp_reg = static_cast<FloatRegister>(XMM0 + itr_float_and_doubles_);
+    return X86_64ManagedRegister::FromXmmRegister(fp_reg);
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    Register core_reg = kCoreArgumentRegisters[/* method */ 1u + non_fp_arg_number];
+    return X86_64ManagedRegister::FromCpuRegister(core_reg);
   }
-  return res;
 }
 
 FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
-  CHECK(IsCurrentParamOnStack());
   return FrameOffset(displacement_.Int32Value() +  // displacement
                      static_cast<size_t>(kX86_64PointerSize) +  // Method ref
                      itr_slots_ * sizeof(uint32_t));  // offset into in args
 }
 
-const ManagedRegisterEntrySpills& X86_64ManagedRuntimeCallingConvention::EntrySpills() {
-  // We spill the argument registers on X86 to free them up for scratch use, we then assume
-  // all arguments are on the stack.
-  if (entry_spills_.size() == 0) {
-    ResetIterator(FrameOffset(0));
-    while (HasNext()) {
-      ManagedRegister in_reg = CurrentParamRegister();
-      if (!in_reg.IsNoRegister()) {
-        int32_t size = IsParamALongOrDouble(itr_args_) ? 8 : 4;
-        int32_t spill_offset = CurrentParamStackOffset().Uint32Value();
-        ManagedRegisterSpill spill(in_reg, size, spill_offset);
-        entry_spills_.push_back(spill);
-      }
-      Next();
-    }
-  }
-  return entry_spills_;
-}
-
 // JNI calling convention
 
 X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static,
@@ -334,7 +310,6 @@
                       [](ManagedRegister callee_save) constexpr {
                         return callee_save.Equals(X86_64ManagedRegister::FromCpuRegister(R11));
                       }));
-  DCHECK(!InterproceduralScratchRegister().Equals(X86_64ManagedRegister::FromCpuRegister(R11)));
   return X86_64ManagedRegister::FromCpuRegister(R11);
 }
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 37b5978..d043a3e 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -33,16 +33,14 @@
   ~X86_64ManagedRuntimeCallingConvention() override {}
   // Calling convention
   ManagedRegister ReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // Managed runtime calling convention
   ManagedRegister MethodRegister() override;
   bool IsCurrentParamInRegister() override;
   bool IsCurrentParamOnStack() override;
   ManagedRegister CurrentParamRegister() override;
   FrameOffset CurrentParamStackOffset() override;
-  const ManagedRegisterEntrySpills& EntrySpills() override;
+
  private:
-  ManagedRegisterEntrySpills entry_spills_;
   DISALLOW_COPY_AND_ASSIGN(X86_64ManagedRuntimeCallingConvention);
 };
 
@@ -56,7 +54,6 @@
   // Calling convention
   ManagedRegister ReturnRegister() override;
   ManagedRegister IntReturnRegister() override;
-  ManagedRegister InterproceduralScratchRegister() const override;
   // JNI calling convention
   size_t FrameSize() const override;
   size_t OutArgSize() const override;
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index ffb58ac..4efbe9f 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -76,8 +76,7 @@
 
 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
                                           ManagedRegister method_reg,
-                                          ArrayRef<const ManagedRegister> callee_save_regs,
-                                          const ManagedRegisterEntrySpills& entry_spills) {
+                                          ArrayRef<const ManagedRegister> callee_save_regs) {
   // If we're creating an actual frame with the method, enforce managed stack alignment,
   // otherwise only the native stack alignment.
   if (method_reg.IsNoRegister()) {
@@ -123,25 +122,6 @@
     CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
     asm_.StoreToOffset(kStoreWord, r0, sp, 0);
   }
-
-  // Write out entry spills.
-  int32_t offset = frame_size + kFramePointerSize;
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    ArmManagedRegister reg = spill.AsArm();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      offset += spill.getSize();
-    } else if (reg.IsCoreRegister()) {
-      asm_.StoreToOffset(kStoreWord, AsVIXLRegister(reg), sp, offset);
-      offset += 4;
-    } else if (reg.IsSRegister()) {
-      asm_.StoreSToOffset(AsVIXLSRegister(reg), sp, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      asm_.StoreDToOffset(AsVIXLDRegister(reg), sp, offset);
-      offset += 8;
-    }
-  }
 }
 
 void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -271,27 +251,35 @@
 
 void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest,
                                              ManagedRegister msrc,
-                                             FrameOffset in_off,
-                                             ManagedRegister mscratch) {
+                                             FrameOffset in_off) {
   vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4);
 }
 
-void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest,
-                                       FrameOffset src,
-                                       ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
 }
 
+void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest,
+                                       ManagedRegister base,
+                                       MemberOffset offs,
+                                       bool unpoison_reference) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  vixl32::Register scratch = temps.Acquire();
+  asm_.LoadFromOffset(kLoadWord, scratch, AsVIXLRegister(base.AsArm()), offs.Int32Value());
+  if (unpoison_reference) {
+    asm_.MaybeUnpoisonHeapReference(scratch);
+  }
+  asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
+}
+
 void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest,
                                        ManagedRegister mbase,
                                        MemberOffset offs,
@@ -318,12 +306,9 @@
   UNIMPLEMENTED(FATAL);
 }
 
-void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
-                                                     uint32_t imm,
-                                                     ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadImmediate(scratch, imm);
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
 }
@@ -345,12 +330,9 @@
   asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                    ThreadOffset32 thr_offs,
-                                                    ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value());
 }
@@ -362,11 +344,9 @@
 }
 
 void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                                        FrameOffset fr_offs,
-                                                        ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+                                                        FrameOffset fr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.AddConstant(scratch, sp, fr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value());
 }
@@ -389,12 +369,25 @@
                                     ManagedRegister msrc,
                                     size_t size  ATTRIBUTE_UNUSED) {
   ArmManagedRegister dst = mdst.AsArm();
+  if (kIsDebugBuild) {
+    // Check that the destination is not a scratch register.
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    if (dst.IsCoreRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLRegister(dst)));
+    } else if (dst.IsDRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLDRegister(dst)));
+    } else if (dst.IsSRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLSRegister(dst)));
+    } else {
+      CHECK(dst.IsRegisterPair()) << dst;
+      CHECK(!temps.IsAvailable(AsVIXLRegisterPairLow(dst)));
+      CHECK(!temps.IsAvailable(AsVIXLRegisterPairHigh(dst)));
+    }
+  }
   ArmManagedRegister src = msrc.AsArm();
   if (!dst.Equals(src)) {
     if (dst.IsCoreRegister()) {
       CHECK(src.IsCoreRegister()) << src;
-      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-      temps.Exclude(AsVIXLRegister(dst));
       ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src));
     } else if (dst.IsDRegister()) {
       if (src.IsDRegister()) {
@@ -427,14 +420,10 @@
   }
 }
 
-void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest,
-                                    FrameOffset src,
-                                    ManagedRegister mscratch,
-                                    size_t size) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
-  CHECK(size == 4 || size == 8) << size;
+void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   if (size == 4) {
     asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
     asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
@@ -535,11 +524,9 @@
 
 void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                       FrameOffset handle_scope_offset,
-                                                      ManagedRegister mscratch,
                                                       bool null_allowed) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   if (null_allowed) {
     asm_.LoadFromOffset(kLoadWord, scratch, sp, handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -579,33 +566,26 @@
   // TODO: not validating references.
 }
 
-void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase,
-                                    Offset offset,
-                                    ManagedRegister mscratch) {
+void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
   ___ Bx(scratch);
 }
 
-void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase,
-                                    Offset offset,
-                                    ManagedRegister mscratch) {
+void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
   ___ Blx(scratch);
   // TODO: place reference map on call.
 }
 
-void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   // Call *(*(SP + base) + offset)
   asm_.LoadFromOffset(kLoadWord, scratch, sp, base.Int32Value());
   asm_.LoadFromOffset(kLoadWord, scratch, scratch, offset.Int32Value());
@@ -613,29 +593,26 @@
   // TODO: place reference map on call
 }
 
-void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                              ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
 }
 
-void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister mtr) {
+void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(AsVIXLRegister(mtr.AsArm()));
-  ___ Mov(AsVIXLRegister(mtr.AsArm()), tr);
+  temps.Exclude(AsVIXLRegister(dest.AsArm()));
+  ___ Mov(AsVIXLRegister(dest.AsArm()), tr);
 }
 
-void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset,
-                                                ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset) {
   asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+void ArmVIXLJNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kAapcsStackAlignment);
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   exception_blocks_.emplace_back(
-      new ArmVIXLJNIMacroAssembler::ArmException(mscratch.AsArm(), stack_adjust));
+      new ArmVIXLJNIMacroAssembler::ArmException(scratch, stack_adjust));
   asm_.LoadFromOffset(kLoadWord,
                       scratch,
                       tr,
@@ -656,23 +633,22 @@
   ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
 }
 
-void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                    JNIMacroUnaryCondition condition,
-                                    ManagedRegister mtest) {
+void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
-  vixl::aarch32::Register test = AsVIXLRegister(mtest.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(test);
-  switch (condition) {
+  vixl32::Register scratch = temps.Acquire();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  ___ Ldr(scratch, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
-      ___ CompareAndBranchIfZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ CompareAndBranchIfNonZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     default:
-      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 }
@@ -689,7 +665,7 @@
     DecreaseFrameSize(exception->stack_adjust_);
   }
 
-  vixl::aarch32::Register scratch = AsVIXLRegister(exception->scratch_);
+  vixl32::Register scratch = exception->scratch_;
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(scratch);
   // Pass exception object as argument.
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 1724671..2bd571e 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -51,8 +51,7 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
@@ -67,18 +66,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -101,17 +95,19 @@
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset32 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs,
                           FrameOffset fr_offs,
                           ManagedRegister scratch) override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
@@ -152,9 +148,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister mtr) override;
-  void GetCurrentThread(FrameOffset dest_offset,
-                        ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -169,7 +164,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst.
@@ -182,23 +176,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset32 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
@@ -210,13 +204,13 @@
  private:
   class ArmException {
    private:
-    ArmException(ArmManagedRegister scratch, size_t stack_adjust)
+    ArmException(vixl32::Register scratch, size_t stack_adjust)
         : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
     vixl32::Label* Entry() { return &exception_entry_; }
 
     // Register used for passing Thread::Current()->exception_ .
-    const ArmManagedRegister scratch_;
+    const vixl32::Register scratch_;
 
     // Stack adjust for ExceptionPool.
     const size_t stack_adjust_;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 5b46971..c2aef83 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -51,11 +51,11 @@
   ___ FinalizeCode();
 }
 
-void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
+void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  ___ Mov(reg_x(dest.AsArm64().AsXRegister()), reg_x(TR));
 }
 
-void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
+void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
   StoreToOffset(TR, SP, offset.Int32Value());
 }
 
@@ -162,41 +162,31 @@
   StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
 }
 
-void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs,
-                                                   uint32_t imm,
-                                                   ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
+void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Mov(scratch.X(), imm);  // TODO: Use W register.
+  ___ Str(scratch, MEM_OP(reg_x(SP), offs.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
-                                                      FrameOffset fr_offs,
-                                                      ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, FrameOffset fr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Add(scratch, reg_x(SP), fr_offs.Int32Value());
+  ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  Register temp = temps.AcquireX();
-  ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+  Register scratch = temps.AcquireX();
+  ___ Mov(scratch, reg_x(SP));
+  ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off,
-                                           ManagedRegister m_source,
-                                           FrameOffset in_off,
-                                           ManagedRegister m_scratch) {
-  Arm64ManagedRegister source = m_source.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
+void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off ATTRIBUTE_UNUSED,
+                                           ManagedRegister m_source ATTRIBUTE_UNUSED,
+                                           FrameOffset in_off ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);  // This case is not applicable to ARM64.
 }
 
 // Load routines.
@@ -338,6 +328,19 @@
 // Copying routines.
 void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
+  if (kIsDebugBuild) {
+    // Check that the destination is not a scratch register.
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    if (dst.IsXRegister()) {
+      CHECK(!temps.IsAvailable(reg_x(dst.AsXRegister())));
+    } else if (dst.IsWRegister()) {
+      CHECK(!temps.IsAvailable(reg_w(dst.AsWRegister())));
+    } else if (dst.IsSRegister()) {
+      CHECK(!temps.IsAvailable(reg_s(dst.AsSRegister())));
+    } else {
+      CHECK(!temps.IsAvailable(reg_d(dst.AsDRegister())));
+    }
+  }
   Arm64ManagedRegister src = m_src.AsArm64();
   if (!dst.Equals(src)) {
     if (dst.IsXRegister()) {
@@ -365,13 +368,11 @@
   }
 }
 
-void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                  ThreadOffset64 tr_offs,
-                                                  ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+  ___ Str(scratch, MEM_OP(sp, fr_offs.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
@@ -383,31 +384,38 @@
   StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
-void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
-                  SP, src.Int32Value());
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
-                 SP, dest.Int32Value());
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value()));
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
-                                  FrameOffset src,
-                                  ManagedRegister m_scratch,
-                                  size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                     ManagedRegister base,
+                                     MemberOffset offs,
+                                     bool unpoison_reference) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsArm64().AsXRegister()), offs.Int32Value()));
+  if (unpoison_reference) {
+    asm_.MaybeUnpoisonHeapReference(scratch);
   }
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = (size == 8) ? temps.AcquireX() : temps.AcquireW();
+  if (size < 8u || IsAligned<8u>(src.Int32Value()) || src.Int32Value() < 0x100) {
+    ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value()));
+  } else {
+    // TODO: Let the macro assembler deal with this case as well (uses another scratch register).
+    ___ Mov(scratch.X(), src.Int32Value());
+    ___ Ldr(scratch, MEM_OP(reg_x(SP), scratch.X()));
+  }
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
@@ -539,35 +547,34 @@
   // TODO: not validating references.
 }
 
-void Arm64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+void Arm64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Br(reg_x(scratch.AsXRegister()));
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+  ___ Br(scratch);
 }
 
-void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+  ___ Blr(scratch);
 }
 
-void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
+void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
+  ___ Ldr(scratch, MEM_OP(reg_x(SP), base.Int32Value()));
+  ___ Ldr(scratch, MEM_OP(scratch, offs.Int32Value()));
+  ___ Blr(scratch);
 }
 
-void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                            ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
@@ -601,23 +608,23 @@
 
 void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                     FrameOffset handle_scope_offset,
-                                                    ManagedRegister m_scratch,
                                                     bool null_allowed) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   if (null_allowed) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
-                    handle_scope_offset.Int32Value());
+    // TODO: Clean this up; load to temp2 (W register), use xzr for CSEL, reorder ADD earlier.
+    Register scratch2 = temps.AcquireW();
+    ___ Ldr(scratch.W(), MEM_OP(reg_x(SP), handle_scope_offset.Int32Value()));
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
-    // Move this logic in add constants with flags.
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
+    ___ Cmp(scratch.W(), 0);
+    ___ Add(scratch2.X(), reg_x(SP), handle_scope_offset.Int32Value());
+    ___ Csel(scratch, scratch2.X(), scratch, ne);
   } else {
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
+    ___ Add(scratch, reg_x(SP), handle_scope_offset.Int32Value());
   }
-  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
+  ___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
@@ -636,14 +643,13 @@
   ___ Bind(&exit);
 }
 
-void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
+void Arm64JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kStackAlignment);
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
-  LoadFromOffset(scratch.AsXRegister(),
-                 TR,
-                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()));
+  ___ Cbnz(scratch, exception_blocks_.back()->Entry());
 }
 
 std::unique_ptr<JNIMacroLabel> Arm64JNIMacroAssembler::CreateLabel() {
@@ -655,20 +661,23 @@
   ___ B(Arm64JNIMacroLabel::Cast(label)->AsArm64());
 }
 
-void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                  JNIMacroUnaryCondition condition,
-                                  ManagedRegister test) {
+void Arm64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
-  switch (condition) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  switch (cond) {
+    // TODO: Use `scratch` instead of `scratch.X()`.
     case JNIMacroUnaryCondition::kZero:
-      ___ Cbz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbz(scratch.X(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ Cbnz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbnz(scratch.X(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     default:
-      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 }
@@ -680,8 +689,8 @@
 
 void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  Register temp = temps.AcquireX();
+  temps.Exclude(exception->scratch_);
+  Register scratch = temps.AcquireX();
 
   // Bind exception poll entry.
   ___ Bind(exception->Entry());
@@ -690,20 +699,19 @@
   }
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
-  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp,
+  ___ Mov(reg_x(X0), exception->scratch_);
+  ___ Ldr(scratch,
           MEM_OP(reg_x(TR),
                  QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
 
-  ___ Blr(temp);
+  ___ Blr(scratch);
   // Call should never return.
   ___ Brk();
 }
 
 void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
                                         ManagedRegister method_reg,
-                                        ArrayRef<const ManagedRegister> callee_save_regs,
-                                        const ManagedRegisterEntrySpills& entry_spills) {
+                                        ArrayRef<const ManagedRegister> callee_save_regs) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
   CPURegList fp_reg_list(CPURegister::kVRegister, kDRegSize, 0);
@@ -735,28 +743,6 @@
     DCHECK(X0 == method_reg.AsArm64().AsXRegister());
     StoreToOffset(X0, SP, 0);
   }
-
-  // Write out entry spills
-  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    Arm64ManagedRegister reg = spill.AsArm64();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      offset += spill.getSize();
-    } else if (reg.IsXRegister()) {
-      StoreToOffset(reg.AsXRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsWRegister()) {
-      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    }
-  }
 }
 
 void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 54592a3..64b5595 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -54,8 +54,7 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
@@ -69,15 +68,10 @@
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
-  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
   void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -92,13 +86,15 @@
 
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset64 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
             Offset src_offset,
@@ -135,8 +131,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -151,7 +147,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst.
@@ -163,36 +158,36 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset64 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
  private:
   class Arm64Exception {
    public:
-    Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+    Arm64Exception(vixl::aarch64::Register scratch, size_t stack_adjust)
         : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
     vixl::aarch64::Label* Entry() { return &exception_entry_; }
 
     // Register used for passing Thread::Current()->exception_ .
-    const Arm64ManagedRegister scratch_;
+    const vixl::aarch64::Register scratch_;
 
     // Stack adjust for ExceptionPool.
     const size_t stack_adjust_;
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 5fa0b3c..0a7cf11 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -364,14 +364,13 @@
     std::ifstream f1_in(f1);
     std::ifstream f2_in(f2);
 
-    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
-                             std::istreambuf_iterator<char>(),
-                             std::istreambuf_iterator<char>(f2_in));
-
-    f1_in.close();
-    f2_in.close();
-
-    return result;
+    bool read1_ok, read2_ok;
+    char c1, c2;
+    do {
+      read1_ok = static_cast<bool>(f1_in >> c1);
+      read2_ok = static_cast<bool>(f2_in >> c2);
+    } while (read1_ok && read2_ok && c1 == c2);
+    return !read1_ok && !read2_ok;  // Did we reach the end of both streams?
   }
 
   // Compile the given assembly code and extract the binary, if possible. Put result into res.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index c9ece1d..4e1518b 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -224,9 +224,19 @@
   ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
   const ManagedRegister method_register = ArmManagedRegister::FromCoreRegister(R0);
+  const ManagedRegister hidden_arg_register = ArmManagedRegister::FromCoreRegister(R4);
   const ManagedRegister scratch_register = ArmManagedRegister::FromCoreRegister(R12);
 
-  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs);
+
+  // Spill arguments.
+  mr_conv->ResetIterator(FrameOffset(frame_size));
+  for (; mr_conv->HasNext(); mr_conv->Next()) {
+    if (mr_conv->IsCurrentParamInRegister()) {
+      size_t size = mr_conv->IsCurrentParamALongOrDouble() ? 8u : 4u;
+      __ Store(mr_conv->CurrentParamStackOffset(), mr_conv->CurrentParamRegister(), size);
+    }
+  }
   __ IncreaseFrameSize(32);
 
   // Loads
@@ -249,33 +259,33 @@
   __ Store(FrameOffset(1024), method_register, 4);
   __ Store(FrameOffset(4092), scratch_register, 4);
   __ Store(FrameOffset(4096), scratch_register, 4);
-  __ StoreImmediateToFrame(FrameOffset(48), 0xFF, scratch_register);
-  __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF, scratch_register);
+  __ StoreImmediateToFrame(FrameOffset(48), 0xFF);
+  __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF);
   __ StoreRawPtr(FrameOffset(48), scratch_register);
   __ StoreRef(FrameOffset(48), scratch_register);
-  __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48), scratch_register);
-  __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096), scratch_register);
+  __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48));
+  __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096));
   __ StoreStackPointerToThread(ThreadOffset32(512));
 
   // Other
-  __ Call(method_register, FrameOffset(48), scratch_register);
-  __ Copy(FrameOffset(48), FrameOffset(44), scratch_register, 4);
-  __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512), scratch_register);
-  __ CopyRef(FrameOffset(48), FrameOffset(44), scratch_register);
+  __ Call(method_register, FrameOffset(48));
+  __ Copy(FrameOffset(48), FrameOffset(44), 4);
+  __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512));
+  __ CopyRef(FrameOffset(48), FrameOffset(44));
   __ GetCurrentThread(method_register);
-  __ GetCurrentThread(FrameOffset(48), scratch_register);
-  __ Move(scratch_register, method_register, 4);
+  __ GetCurrentThread(FrameOffset(48));
+  __ Move(hidden_arg_register, method_register, 4);
   __ VerifyObject(scratch_register, false);
 
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, true);
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, false);
   __ CreateHandleScopeEntry(method_register, FrameOffset(48), scratch_register, true);
-  __ CreateHandleScopeEntry(FrameOffset(48), FrameOffset(64), scratch_register, true);
+  __ CreateHandleScopeEntry(FrameOffset(48), FrameOffset(64), true);
   __ CreateHandleScopeEntry(method_register, FrameOffset(0), scratch_register, true);
   __ CreateHandleScopeEntry(method_register, FrameOffset(1025), scratch_register, true);
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(1025), scratch_register, true);
 
-  __ ExceptionPoll(scratch_register, 0);
+  __ ExceptionPoll(0);
 
   // Push the target out of range of branch emitted by ExceptionPoll.
   for (int i = 0; i < 64; i++) {
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 6475607..d6b1c50 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -51,7 +51,7 @@
   "  a4:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
   "  a8:	4648      	mov	r0, r9\n",
   "  aa:	f8cd 9030 	str.w	r9, [sp, #48]	; 0x30\n",
-  "  ae:	4684      	mov	ip, r0\n",
+  "  ae:	4604      	mov	r4, r0\n",
   "  b0:	f1bc 0f00 	cmp.w	ip, #0\n",
   "  b4:	bf18      	it	ne\n",
   "  b6:	f10d 0c30 	addne.w	ip, sp, #48	; 0x30\n",
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index bbe0f73..48b3f01 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -63,8 +63,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size,
                           ManagedRegister method_reg,
-                          ArrayRef<const ManagedRegister> callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+                          ArrayRef<const ManagedRegister> callee_save_regs) = 0;
 
   // Emit code that will remove an activation from the stack
   //
@@ -83,18 +82,16 @@
   virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
   virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
 
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) = 0;
 
   virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister scratch) = 0;
+                                        FrameOffset fr_offs) = 0;
 
   virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
 
   virtual void StoreSpanning(FrameOffset dest,
                              ManagedRegister src,
-                             FrameOffset in_off,
-                             ManagedRegister scratch) = 0;
+                             FrameOffset in_off) = 0;
 
   // Load routines
   virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
@@ -117,17 +114,19 @@
   // Copying routines
   virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
 
-  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
-                                    ThreadOffset<kPointerSize> thr_offs,
-                                    ManagedRegister scratch) = 0;
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset<kPointerSize> thr_offs) = 0;
 
   virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
                                   FrameOffset fr_offs,
                                   ManagedRegister scratch) = 0;
 
-  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+  virtual void CopyRef(FrameOffset dest, FrameOffset src) = 0;
+  virtual void CopyRef(FrameOffset dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
 
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+  virtual void Copy(FrameOffset dest, FrameOffset src, size_t size) = 0;
 
   virtual void Copy(FrameOffset dest,
                     ManagedRegister src_base,
@@ -170,8 +169,8 @@
   virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
 
   // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+  virtual void GetCurrentThread(ManagedRegister dest) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset) = 0;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -186,7 +185,6 @@
   // value is null and null_allowed.
   virtual void CreateHandleScopeEntry(FrameOffset out_off,
                                       FrameOffset handlescope_offset,
-                                      ManagedRegister scratch,
                                       bool null_allowed) = 0;
 
   // src holds a handle scope entry (Object**) load this into dst
@@ -198,23 +196,23 @@
   virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  virtual void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Jump(ManagedRegister base, Offset offset) = 0;
 
   // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+  virtual void Call(ManagedRegister base, Offset offset) = 0;
+  virtual void Call(FrameOffset base, Offset offset) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset) = 0;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+  virtual void ExceptionPoll(size_t stack_adjust) = 0;
 
   // Create a new label that can be used with Jump/Bind calls.
   virtual std::unique_ptr<JNIMacroLabel> CreateLabel() = 0;
   // Emit an unconditional jump to the label.
   virtual void Jump(JNIMacroLabel* label) = 0;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  virtual void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) = 0;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  virtual void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) = 0;
   // Code at this offset will serve as the target for the Jump call.
   virtual void Bind(JNIMacroLabel* label) = 0;
 
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index f20750b..2afdc04 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -84,52 +84,6 @@
 static_assert(std::is_trivially_copyable<ManagedRegister>::value,
               "ManagedRegister should be trivially copyable");
 
-class ManagedRegisterSpill : public ManagedRegister {
- public:
-  // ManagedRegisterSpill contains information about data type size and location in caller frame
-  // These additional attributes could be defined by calling convention (EntrySpills)
-  ManagedRegisterSpill(const ManagedRegister& other, uint32_t size, uint32_t spill_offset)
-      : ManagedRegister(other), size_(size), spill_offset_(spill_offset)  { }
-
-  explicit ManagedRegisterSpill(const ManagedRegister& other)
-      : ManagedRegister(other), size_(-1), spill_offset_(-1) { }
-
-  ManagedRegisterSpill(const ManagedRegister& other, int32_t size)
-      : ManagedRegister(other), size_(size), spill_offset_(-1) { }
-
-  int32_t getSpillOffset() const {
-    return spill_offset_;
-  }
-
-  int32_t getSize() const {
-    return size_;
-  }
-
- private:
-  int32_t size_;
-  int32_t spill_offset_;
-};
-
-class ManagedRegisterEntrySpills : public std::vector<ManagedRegisterSpill> {
- public:
-  // The ManagedRegister does not have information about size and offset.
-  // In this case it's size and offset determined by BuildFrame (assembler)
-  void push_back(ManagedRegister x) {
-    ManagedRegisterSpill spill(x);
-    std::vector<ManagedRegisterSpill>::push_back(spill);
-  }
-
-  void push_back(ManagedRegister x, int32_t size) {
-    ManagedRegisterSpill spill(x, size);
-    std::vector<ManagedRegisterSpill>::push_back(spill);
-  }
-
-  void push_back(ManagedRegisterSpill x) {
-    std::vector<ManagedRegisterSpill>::push_back(x);
-  }
- private:
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_UTILS_MANAGED_REGISTER_H_
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index f4ea004..e4ce338 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -24,6 +24,12 @@
 namespace art {
 namespace x86 {
 
+static Register GetScratchRegister() {
+  // ECX is an argument register on entry and gets spilled in BuildFrame().
+  // After that, we can use it as a scratch register.
+  return ECX;
+}
+
 // Slowpath entered when Thread::Current()->_exception is non-null
 class X86ExceptionSlowPath final : public SlowPath {
  public:
@@ -46,8 +52,7 @@
 
 void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
                                       ManagedRegister method_reg,
-                                      ArrayRef<const ManagedRegister> spill_regs,
-                                      const ManagedRegisterEntrySpills& entry_spills) {
+                                      ArrayRef<const ManagedRegister> spill_regs) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(4);  // Return address on stack.
   if (frame_size == kFramePointerSize) {
@@ -81,21 +86,6 @@
     cfi().AdjustCFAOffset(kFramePointerSize);
   }
   DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
-
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
 }
 
 void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -180,17 +170,14 @@
   __ movl(Address(ESP, dest), src.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   __ movl(Address(ESP, dest), Immediate(imm));
 }
 
-void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                                    FrameOffset fr_offs,
-                                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) {
+  Register scratch = GetScratchRegister();
+  __ leal(scratch, Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
@@ -199,8 +186,7 @@
 
 void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
                                          ManagedRegister /*src*/,
-                                         FrameOffset /*in_off*/,
-                                         ManagedRegister /*scratch*/) {
+                                         FrameOffset /*in_off*/) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
 }
 
@@ -315,6 +301,7 @@
 }
 
 void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  DCHECK(!mdest.Equals(X86ManagedRegister::FromCpuRegister(GetScratchRegister())));
   X86ManagedRegister dest = mdest.AsX86();
   X86ManagedRegister src = msrc.AsX86();
   if (!dest.Equals(src)) {
@@ -340,20 +327,28 @@
   }
 }
 
-void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
-  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
 }
 
-void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                ThreadOffset32 thr_offs,
-                                                ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                   ManagedRegister base,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(scratch);
+  }
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(thr_offs));
+  __ movl(Address(ESP, fr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
@@ -365,18 +360,14 @@
   __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
+  if (size == 8) {
+    __ movl(scratch, Address(ESP, FrameOffset(src.Int32Value() + 4)));
+    __ movl(Address(ESP, FrameOffset(dest.Int32Value() + 4)), scratch);
   }
 }
 
@@ -466,21 +457,19 @@
 
 void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                   FrameOffset handle_scope_offset,
-                                                  ManagedRegister mscratch,
                                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
+  Register scratch = GetScratchRegister();
   if (null_allowed) {
     Label null_arg;
-    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ movl(scratch, Address(ESP, handle_scope_offset));
+    __ testl(scratch, scratch);
     __ j(kZero, &null_arg);
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
     __ Bind(&null_arg);
   } else {
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
   }
-  Store(out_off, scratch, 4);
+  __ movl(Address(ESP, out_off), scratch);
 }
 
 // Given a handle scope entry, load the associated reference.
@@ -508,42 +497,41 @@
   // TODO: not validating references
 }
 
-void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
 }
 
-void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
   // TODO: place reference map on call
 }
 
-void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset) {
+  Register scratch = GetScratchRegister();
   __ movl(scratch, Address(ESP, base));
   __ call(Address(scratch, offset));
 }
 
-void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
   __ fs()->call(Address::Absolute(offset));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  __ fs()->movl(dest.AsX86().AsCpuRegister(),
                 Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
-  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch);
 }
 
-void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+void X86JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   X86ExceptionSlowPath* slow = new (__ GetAllocator()) X86ExceptionSlowPath(stack_adjust);
   __ GetBuffer()->EnqueueSlowPath(slow);
   __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
@@ -559,13 +547,11 @@
   __ jmp(X86JNIMacroLabel::Cast(label)->AsX86());
 }
 
-void X86JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                JNIMacroUnaryCondition condition,
-                                ManagedRegister test) {
+void X86JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
   art::x86::Condition x86_cond;
-  switch (condition) {
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
       x86_cond = art::x86::kZero;
       break;
@@ -573,18 +559,19 @@
       x86_cond = art::x86::kNotZero;
       break;
     default:
-      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 
+  // TODO: Compare the memory location with immediate 0.
+  Register scratch = GetScratchRegister();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  __ fs()->movl(scratch, Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()));
+
   // TEST reg, reg
   // Jcc <Offset>
-  __ testl(test.AsX86().AsCpuRegister(), test.AsX86().AsCpuRegister());
+  __ testl(scratch, scratch);
   __ j(x86_cond, X86JNIMacroLabel::Cast(label)->AsX86());
-
-
-  // X86 also has JCZX, JECZX, however it's not worth it to implement
-  // because we aren't likely to codegen with ECX+kZero check.
 }
 
 void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 7bf2f98..1223471 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -44,8 +44,7 @@
   // Emit code that will create an activation on the stack
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size,
@@ -60,16 +59,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -88,16 +84,18 @@
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset32 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
             size_t size) override;
@@ -123,20 +121,23 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) override;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) override;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) override;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override;
@@ -147,23 +148,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset32 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 993cf95..411d64b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -2334,19 +2334,15 @@
   };
   ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
-  // Three random entry spills.
-  ManagedRegisterEntrySpills entry_spills;
-  ManagedRegisterSpill spill(ManagedFromCpu(x86_64::RAX), 8, 0);
-  entry_spills.push_back(spill);
-  ManagedRegisterSpill spill2(ManagedFromCpu(x86_64::RBX), 8, 8);
-  entry_spills.push_back(spill2);
-  ManagedRegisterSpill spill3(ManagedFromFpu(x86_64::XMM1), 8, 16);
-  entry_spills.push_back(spill3);
-
   x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills);
+  assembler->BuildFrame(frame_size, method_reg, spill_regs);
+
+  // Three random entry spills.
+  assembler->Store(FrameOffset(frame_size + 0u), ManagedFromCpu(x86_64::RAX), /* size= */ 8u);
+  assembler->Store(FrameOffset(frame_size + 8u), ManagedFromCpu(x86_64::RBX), /* size= */ 8u);
+  assembler->Store(FrameOffset(frame_size + 16u), ManagedFromFpu(x86_64::XMM1), /* size= */ 8u);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index ffe9020..ee4ae2e 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -36,12 +36,16 @@
 static constexpr size_t kNativeStackAlignment = 16;
 static_assert(kNativeStackAlignment == kStackAlignment);
 
+static inline CpuRegister GetScratchRegister() {
+  // TODO: Use R11 in line with Optimizing.
+  return CpuRegister(RAX);
+}
+
 #define __ asm_.
 
 void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
                                          ManagedRegister method_reg,
-                                         ArrayRef<const ManagedRegister> spill_regs,
-                                         const ManagedRegisterEntrySpills& entry_spills) {
+                                         ArrayRef<const ManagedRegister> spill_regs) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(8);  // Return address on stack.
   // Note: @CriticalNative tail call is not used (would have frame_size == kFramePointerSize).
@@ -86,28 +90,6 @@
   if (method_reg.IsRegister()) {
     __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
   }
-
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    if (spill.AsX86_64().IsCpuRegister()) {
-      if (spill.getSize() == 8) {
-        __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                spill.AsX86_64().AsCpuRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                spill.AsX86_64().AsCpuRegister());
-      }
-    } else {
-      if (spill.getSize() == 8) {
-        __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                 spill.AsX86_64().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                 spill.AsX86_64().AsXmmRegister());
-      }
-    }
-  }
 }
 
 void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -215,19 +197,15 @@
   __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
 }
 
-void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
-                                                    uint32_t imm,
-                                                    ManagedRegister) {
+void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   __ movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
 }
 
 void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                                       FrameOffset fr_offs,
-                                                       ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
-  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+                                                       FrameOffset fr_offs) {
+  CpuRegister scratch = GetScratchRegister();
+  __ leaq(scratch, Address(CpuRegister(RSP), fr_offs));
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch);
 }
 
 void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
@@ -236,8 +214,7 @@
 
 void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
                                             ManagedRegister /*src*/,
-                                            FrameOffset /*in_off*/,
-                                            ManagedRegister /*scratch*/) {
+                                            FrameOffset /*in_off*/) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
 }
 
@@ -363,6 +340,7 @@
 }
 
 void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  DCHECK(!mdest.Equals(X86_64ManagedRegister::FromCpuRegister(GetScratchRegister().AsRegister())));
   X86_64ManagedRegister dest = mdest.AsX86_64();
   X86_64ManagedRegister src = msrc.AsX86_64();
   if (!dest.Equals(src)) {
@@ -388,20 +366,28 @@
   }
 }
 
-void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
-  __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  CpuRegister scratch = GetScratchRegister();
+  __ movl(scratch, Address(CpuRegister(RSP), src));
+  __ movl(Address(CpuRegister(RSP), dest), scratch);
 }
 
-void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                   ThreadOffset64 thr_offs,
-                                                   ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
-  Store(fr_offs, scratch, 8);
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                      ManagedRegister base,
+                                      MemberOffset offs,
+                                      bool unpoison_reference) {
+  CpuRegister scratch = GetScratchRegister();
+  __ movl(scratch, Address(base.AsX86_64().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(scratch);
+  }
+  __ movl(Address(CpuRegister(RSP), dest), scratch);
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) {
+  CpuRegister scratch = GetScratchRegister();
+  __ gs()->movq(scratch, Address::Absolute(thr_offs, true));
+  __ movq(Address(CpuRegister(RSP), fr_offs), scratch);
 }
 
 void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
@@ -413,19 +399,18 @@
   __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
 }
 
-void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
-                                   FrameOffset src,
-                                   ManagedRegister mscratch,
-                                   size_t size) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  CpuRegister scratch = GetScratchRegister();
+  if (size == 8) {
+    // TODO: Use MOVQ.
+    __ movl(scratch, Address(CpuRegister(RSP), src));
+    __ movl(Address(CpuRegister(RSP), dest), scratch);
+    __ movl(scratch, Address(CpuRegister(RSP), FrameOffset(src.Int32Value() + 4)));
+    __ movl(Address(CpuRegister(RSP), FrameOffset(dest.Int32Value() + 4)), scratch);
   } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
+    __ movl(scratch, Address(CpuRegister(RSP), src));
+    __ movl(Address(CpuRegister(RSP), dest), scratch);
   }
 }
 
@@ -521,21 +506,19 @@
 
 void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                      FrameOffset handle_scope_offset,
-                                                     ManagedRegister mscratch,
                                                      bool null_allowed) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
+  CpuRegister scratch = GetScratchRegister();
   if (null_allowed) {
     Label null_arg;
-    __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ movl(scratch, Address(CpuRegister(RSP), handle_scope_offset));
+    __ testl(scratch, scratch);
     __ j(kZero, &null_arg);
-    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ leaq(scratch, Address(CpuRegister(RSP), handle_scope_offset));
     __ Bind(&null_arg);
   } else {
-    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ leaq(scratch, Address(CpuRegister(RSP), handle_scope_offset));
   }
-  Store(out_off, scratch, 8);
+  __ movq(Address(CpuRegister(RSP), out_off), scratch);
 }
 
 // Given a handle scope entry, load the associated reference.
@@ -563,39 +546,38 @@
   // TODO: not validating references
 }
 
-void X86_64JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86_64JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   X86_64ManagedRegister base = mbase.AsX86_64();
   CHECK(base.IsCpuRegister());
   __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
 }
 
-void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   X86_64ManagedRegister base = mbase.AsX86_64();
   CHECK(base.IsCpuRegister());
   __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
   // TODO: place reference map on call
 }
 
-void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset) {
+  CpuRegister scratch = GetScratchRegister();
   __ movq(scratch, Address(CpuRegister(RSP), base));
   __ call(Address(scratch, offset));
 }
 
-void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) {
   __ gs()->call(Address::Absolute(offset, true));
 }
 
-void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  __ gs()->movq(tr.AsX86_64().AsCpuRegister(),
+void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  __ gs()->movq(dest.AsX86_64().AsCpuRegister(),
                 Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
 }
 
-void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  __ gs()->movq(scratch.AsCpuRegister(),
-                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
-  __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
+void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  CpuRegister scratch = GetScratchRegister();
+  __ gs()->movq(scratch, Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+  __ movq(Address(CpuRegister(RSP), offset), scratch);
 }
 
 // Slowpath entered when Thread::Current()->_exception is non-null
@@ -607,7 +589,7 @@
   const size_t stack_adjust_;
 };
 
-void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+void X86_64JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   X86_64ExceptionSlowPath* slow = new (__ GetAllocator()) X86_64ExceptionSlowPath(stack_adjust);
   __ GetBuffer()->EnqueueSlowPath(slow);
   __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true),
@@ -624,13 +606,11 @@
   __ jmp(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
 }
 
-void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                   JNIMacroUnaryCondition condition,
-                                   ManagedRegister test) {
+void X86_64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
   art::x86_64::Condition x86_64_cond;
-  switch (condition) {
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
       x86_64_cond = art::x86_64::kZero;
       break;
@@ -638,13 +618,18 @@
       x86_64_cond = art::x86_64::kNotZero;
       break;
     default:
-      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 
+  // TODO: Compare the memory location with immediate 0.
+  CpuRegister scratch = GetScratchRegister();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  __ gs()->movl(scratch, Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), true));
+
   // TEST reg, reg
   // Jcc <Offset>
-  __ testq(test.AsX86_64().AsCpuRegister(), test.AsX86_64().AsCpuRegister());
+  __ testq(scratch, scratch);
   __ j(x86_64_cond, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
 }
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index d3f1fce..4592eba 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -45,8 +45,7 @@
   // Emit code that will create an activation on the stack
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size,
@@ -61,18 +60,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -93,16 +87,18 @@
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset64 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
@@ -145,8 +141,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -161,7 +157,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst
@@ -173,23 +168,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset64 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;