Move entry spills determination to JNI compiler.

The calling convention no longer describes entry spills as
spilling is the JNI compiler's responsibility. This allows
future improvements, such as spilling registers directly to
the HandleScope or outgoing stack args.

Remove the notion of interprocedural scratch register from
calling conventions and let assemblers deal with all scratch
register uses. The remaining JNI assembler APIs that take
scratch registers are currently unused and can be removed.

Also fix a bug in disassembly comparison for tests; the
contents of two files were considered identical if the
second one just contained additional data.

This change fully preserves the generated code and adds TODO
comments where doing so results in weird or suboptimal code.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: Prebuilt boot image is unchanged.
Test: aosp_taimen-userdebug boots.
Bug: 12189621
Change-Id: Ic26a670276920313cd907a6eda8d982cf0abfd81
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index ffb58ac..4efbe9f 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -76,8 +76,7 @@
 
 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
                                           ManagedRegister method_reg,
-                                          ArrayRef<const ManagedRegister> callee_save_regs,
-                                          const ManagedRegisterEntrySpills& entry_spills) {
+                                          ArrayRef<const ManagedRegister> callee_save_regs) {
   // If we're creating an actual frame with the method, enforce managed stack alignment,
   // otherwise only the native stack alignment.
   if (method_reg.IsNoRegister()) {
@@ -123,25 +122,6 @@
     CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
     asm_.StoreToOffset(kStoreWord, r0, sp, 0);
   }
-
-  // Write out entry spills.
-  int32_t offset = frame_size + kFramePointerSize;
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    ArmManagedRegister reg = spill.AsArm();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      offset += spill.getSize();
-    } else if (reg.IsCoreRegister()) {
-      asm_.StoreToOffset(kStoreWord, AsVIXLRegister(reg), sp, offset);
-      offset += 4;
-    } else if (reg.IsSRegister()) {
-      asm_.StoreSToOffset(AsVIXLSRegister(reg), sp, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      asm_.StoreDToOffset(AsVIXLDRegister(reg), sp, offset);
-      offset += 8;
-    }
-  }
 }
 
 void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -271,27 +251,35 @@
 
 void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest,
                                              ManagedRegister msrc,
-                                             FrameOffset in_off,
-                                             ManagedRegister mscratch) {
+                                             FrameOffset in_off) {
   vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4);
 }
 
-void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest,
-                                       FrameOffset src,
-                                       ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
 }
 
+void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest,
+                                       ManagedRegister base,
+                                       MemberOffset offs,
+                                       bool unpoison_reference) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  vixl32::Register scratch = temps.Acquire();
+  asm_.LoadFromOffset(kLoadWord, scratch, AsVIXLRegister(base.AsArm()), offs.Int32Value());
+  if (unpoison_reference) {
+    asm_.MaybeUnpoisonHeapReference(scratch);
+  }
+  asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
+}
+
 void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest,
                                        ManagedRegister mbase,
                                        MemberOffset offs,
@@ -318,12 +306,9 @@
   UNIMPLEMENTED(FATAL);
 }
 
-void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
-                                                     uint32_t imm,
-                                                     ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadImmediate(scratch, imm);
   asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
 }
@@ -345,12 +330,9 @@
   asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                    ThreadOffset32 thr_offs,
-                                                    ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value());
 }
@@ -362,11 +344,9 @@
 }
 
 void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                                        FrameOffset fr_offs,
-                                                        ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+                                                        FrameOffset fr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.AddConstant(scratch, sp, fr_offs.Int32Value());
   asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value());
 }
@@ -389,12 +369,25 @@
                                     ManagedRegister msrc,
                                     size_t size  ATTRIBUTE_UNUSED) {
   ArmManagedRegister dst = mdst.AsArm();
+  if (kIsDebugBuild) {
+    // Check that the destination is not a scratch register.
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    if (dst.IsCoreRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLRegister(dst)));
+    } else if (dst.IsDRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLDRegister(dst)));
+    } else if (dst.IsSRegister()) {
+      CHECK(!temps.IsAvailable(AsVIXLSRegister(dst)));
+    } else {
+      CHECK(dst.IsRegisterPair()) << dst;
+      CHECK(!temps.IsAvailable(AsVIXLRegisterPairLow(dst)));
+      CHECK(!temps.IsAvailable(AsVIXLRegisterPairHigh(dst)));
+    }
+  }
   ArmManagedRegister src = msrc.AsArm();
   if (!dst.Equals(src)) {
     if (dst.IsCoreRegister()) {
       CHECK(src.IsCoreRegister()) << src;
-      UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-      temps.Exclude(AsVIXLRegister(dst));
       ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src));
     } else if (dst.IsDRegister()) {
       if (src.IsDRegister()) {
@@ -427,14 +420,10 @@
   }
 }
 
-void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest,
-                                    FrameOffset src,
-                                    ManagedRegister mscratch,
-                                    size_t size) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
-  CHECK(size == 4 || size == 8) << size;
+void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   if (size == 4) {
     asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value());
     asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value());
@@ -535,11 +524,9 @@
 
 void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                       FrameOffset handle_scope_offset,
-                                                      ManagedRegister mscratch,
                                                       bool null_allowed) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   if (null_allowed) {
     asm_.LoadFromOffset(kLoadWord, scratch, sp, handle_scope_offset.Int32Value());
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
@@ -579,33 +566,26 @@
   // TODO: not validating references.
 }
 
-void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase,
-                                    Offset offset,
-                                    ManagedRegister mscratch) {
+void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
   ___ Bx(scratch);
 }
 
-void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase,
-                                    Offset offset,
-                                    ManagedRegister mscratch) {
+void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
   ___ Blx(scratch);
   // TODO: place reference map on call.
 }
 
-void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   // Call *(*(SP + base) + offset)
   asm_.LoadFromOffset(kLoadWord, scratch, sp, base.Int32Value());
   asm_.LoadFromOffset(kLoadWord, scratch, scratch, offset.Int32Value());
@@ -613,29 +593,26 @@
   // TODO: place reference map on call
 }
 
-void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED,
-                                              ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL);
 }
 
-void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister mtr) {
+void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(AsVIXLRegister(mtr.AsArm()));
-  ___ Mov(AsVIXLRegister(mtr.AsArm()), tr);
+  temps.Exclude(AsVIXLRegister(dest.AsArm()));
+  ___ Mov(AsVIXLRegister(dest.AsArm()), tr);
 }
 
-void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset,
-                                                ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset) {
   asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
 }
 
-void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
+void ArmVIXLJNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kAapcsStackAlignment);
-  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(scratch);
+  vixl32::Register scratch = temps.Acquire();
   exception_blocks_.emplace_back(
-      new ArmVIXLJNIMacroAssembler::ArmException(mscratch.AsArm(), stack_adjust));
+      new ArmVIXLJNIMacroAssembler::ArmException(scratch, stack_adjust));
   asm_.LoadFromOffset(kLoadWord,
                       scratch,
                       tr,
@@ -656,23 +633,22 @@
   ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
 }
 
-void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                    JNIMacroUnaryCondition condition,
-                                    ManagedRegister mtest) {
+void ArmVIXLJNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
-  vixl::aarch32::Register test = AsVIXLRegister(mtest.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(test);
-  switch (condition) {
+  vixl32::Register scratch = temps.Acquire();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  ___ Ldr(scratch, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
-      ___ CompareAndBranchIfZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ CompareAndBranchIfNonZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      ___ CompareAndBranchIfNonZero(scratch, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
       break;
     default:
-      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 }
@@ -689,7 +665,7 @@
     DecreaseFrameSize(exception->stack_adjust_);
   }
 
-  vixl::aarch32::Register scratch = AsVIXLRegister(exception->scratch_);
+  vixl32::Register scratch = exception->scratch_;
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(scratch);
   // Pass exception object as argument.
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 1724671..2bd571e 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -51,8 +51,7 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
@@ -67,18 +66,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -101,17 +95,19 @@
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset32 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs,
                           FrameOffset fr_offs,
                           ManagedRegister scratch) override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
@@ -152,9 +148,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister mtr) override;
-  void GetCurrentThread(FrameOffset dest_offset,
-                        ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -169,7 +164,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst.
@@ -182,23 +176,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset32 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
@@ -210,13 +204,13 @@
  private:
   class ArmException {
    private:
-    ArmException(ArmManagedRegister scratch, size_t stack_adjust)
+    ArmException(vixl32::Register scratch, size_t stack_adjust)
         : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
     vixl32::Label* Entry() { return &exception_entry_; }
 
     // Register used for passing Thread::Current()->exception_ .
-    const ArmManagedRegister scratch_;
+    const vixl32::Register scratch_;
 
     // Stack adjust for ExceptionPool.
     const size_t stack_adjust_;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 5b46971..c2aef83 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -51,11 +51,11 @@
   ___ FinalizeCode();
 }
 
-void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  ___ Mov(reg_x(tr.AsArm64().AsXRegister()), reg_x(TR));
+void Arm64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  ___ Mov(reg_x(dest.AsArm64().AsXRegister()), reg_x(TR));
 }
 
-void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister /* scratch */) {
+void Arm64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
   StoreToOffset(TR, SP, offset.Int32Value());
 }
 
@@ -162,41 +162,31 @@
   StoreToOffset(src.AsXRegister(), SP, offs.Int32Value());
 }
 
-void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs,
-                                                   uint32_t imm,
-                                                   ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadImmediate(scratch.AsXRegister(), imm);
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP,
-                 offs.Int32Value());
+void Arm64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset offs, uint32_t imm) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Mov(scratch.X(), imm);  // TODO: Use W register.
+  ___ Str(scratch, MEM_OP(reg_x(SP), offs.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs,
-                                                      FrameOffset fr_offs,
-                                                      ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  AddConstant(scratch.AsXRegister(), SP, fr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
+void Arm64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 tr_offs, FrameOffset fr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Add(scratch, reg_x(SP), fr_offs.Int32Value());
+  ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 tr_offs) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  Register temp = temps.AcquireX();
-  ___ Mov(temp, reg_x(SP));
-  ___ Str(temp, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+  Register scratch = temps.AcquireX();
+  ___ Mov(scratch, reg_x(SP));
+  ___ Str(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off,
-                                           ManagedRegister m_source,
-                                           FrameOffset in_off,
-                                           ManagedRegister m_scratch) {
-  Arm64ManagedRegister source = m_source.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  StoreToOffset(source.AsXRegister(), SP, dest_off.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), SP, in_off.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, dest_off.Int32Value() + 8);
+void Arm64JNIMacroAssembler::StoreSpanning(FrameOffset dest_off ATTRIBUTE_UNUSED,
+                                           ManagedRegister m_source ATTRIBUTE_UNUSED,
+                                           FrameOffset in_off ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL);  // This case is not applicable to ARM64.
 }
 
 // Load routines.
@@ -338,6 +328,19 @@
 // Copying routines.
 void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, ManagedRegister m_src, size_t size) {
   Arm64ManagedRegister dst = m_dst.AsArm64();
+  if (kIsDebugBuild) {
+    // Check that the destination is not a scratch register.
+    UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+    if (dst.IsXRegister()) {
+      CHECK(!temps.IsAvailable(reg_x(dst.AsXRegister())));
+    } else if (dst.IsWRegister()) {
+      CHECK(!temps.IsAvailable(reg_w(dst.AsWRegister())));
+    } else if (dst.IsSRegister()) {
+      CHECK(!temps.IsAvailable(reg_s(dst.AsSRegister())));
+    } else {
+      CHECK(!temps.IsAvailable(reg_d(dst.AsDRegister())));
+    }
+  }
   Arm64ManagedRegister src = m_src.AsArm64();
   if (!dst.Equals(src)) {
     if (dst.IsXRegister()) {
@@ -365,13 +368,11 @@
   }
 }
 
-void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                  ThreadOffset64 tr_offs,
-                                                  ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
-  StoreToOffset(scratch.AsXRegister(), SP, fr_offs.Int32Value());
+void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), tr_offs.Int32Value()));
+  ___ Str(scratch, MEM_OP(sp, fr_offs.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 tr_offs,
@@ -383,31 +384,38 @@
   StoreToOffset(scratch.AsXRegister(), TR, tr_offs.Int32Value());
 }
 
-void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(),
-                  SP, src.Int32Value());
-  StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(),
-                 SP, dest.Int32Value());
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value()));
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
 }
 
-void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
-                                  FrameOffset src,
-                                  ManagedRegister m_scratch,
-                                  size_t size) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
-  CHECK(size == 4 || size == 8) << size;
-  if (size == 4) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP, src.Int32Value());
-    StoreWToOffset(kStoreWord, scratch.AsOverlappingWRegister(), SP, dest.Int32Value());
-  } else if (size == 8) {
-    LoadFromOffset(scratch.AsXRegister(), SP, src.Int32Value());
-    StoreToOffset(scratch.AsXRegister(), SP, dest.Int32Value());
-  } else {
-    UNIMPLEMENTED(FATAL) << "We only support Copy() of size 4 and 8";
+void Arm64JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                     ManagedRegister base,
+                                     MemberOffset offs,
+                                     bool unpoison_reference) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsArm64().AsXRegister()), offs.Int32Value()));
+  if (unpoison_reference) {
+    asm_.MaybeUnpoisonHeapReference(scratch);
   }
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
+}
+
+void Arm64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = (size == 8) ? temps.AcquireX() : temps.AcquireW();
+  if (size < 8u || IsAligned<8u>(src.Int32Value()) || src.Int32Value() < 0x100) {
+    ___ Ldr(scratch, MEM_OP(reg_x(SP), src.Int32Value()));
+  } else {
+    // TODO: Let the macro assembler deal with this case as well (uses another scratch register).
+    ___ Mov(scratch.X(), src.Int32Value());
+    ___ Ldr(scratch, MEM_OP(reg_x(SP), scratch.X()));
+  }
+  ___ Str(scratch, MEM_OP(reg_x(SP), dest.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::Copy(FrameOffset dest,
@@ -539,35 +547,34 @@
   // TODO: not validating references.
 }
 
-void Arm64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+void Arm64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Br(reg_x(scratch.AsXRegister()));
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+  ___ Br(scratch);
 }
 
-void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) {
   Arm64ManagedRegister base = m_base.AsArm64();
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
   CHECK(base.IsXRegister()) << base;
-  CHECK(scratch.IsXRegister()) << scratch;
-  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
+  ___ Ldr(scratch, MEM_OP(reg_x(base.AsXRegister()), offs.Int32Value()));
+  ___ Blr(scratch);
 }
 
-void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scratch) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
+void Arm64JNIMacroAssembler::Call(FrameOffset base, Offset offs) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   // Call *(*(SP + base) + offset)
-  LoadFromOffset(scratch.AsXRegister(), SP, base.Int32Value());
-  LoadFromOffset(scratch.AsXRegister(), scratch.AsXRegister(), offs.Int32Value());
-  ___ Blr(reg_x(scratch.AsXRegister()));
+  ___ Ldr(scratch, MEM_OP(reg_x(SP), base.Int32Value()));
+  ___ Ldr(scratch, MEM_OP(scratch, offs.Int32Value()));
+  ___ Blr(scratch);
 }
 
-void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED,
-                                            ManagedRegister scratch ATTRIBUTE_UNUSED) {
+void Arm64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset ATTRIBUTE_UNUSED) {
   UNIMPLEMENTED(FATAL) << "Unimplemented Call() variant";
 }
 
@@ -601,23 +608,23 @@
 
 void Arm64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                     FrameOffset handle_scope_offset,
-                                                    ManagedRegister m_scratch,
                                                     bool null_allowed) {
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
-  CHECK(scratch.IsXRegister()) << scratch;
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   if (null_allowed) {
-    LoadWFromOffset(kLoadWord, scratch.AsOverlappingWRegister(), SP,
-                    handle_scope_offset.Int32Value());
+    // TODO: Clean this up; load to temp2 (W register), use xzr for CSEL, reorder ADD earlier.
+    Register scratch2 = temps.AcquireW();
+    ___ Ldr(scratch.W(), MEM_OP(reg_x(SP), handle_scope_offset.Int32Value()));
     // Null values get a handle scope entry value of 0.  Otherwise, the handle scope entry is
     // the address in the handle scope holding the reference.
     // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset)
-    ___ Cmp(reg_w(scratch.AsOverlappingWRegister()), 0);
-    // Move this logic in add constants with flags.
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), ne);
+    ___ Cmp(scratch.W(), 0);
+    ___ Add(scratch2.X(), reg_x(SP), handle_scope_offset.Int32Value());
+    ___ Csel(scratch, scratch2.X(), scratch, ne);
   } else {
-    AddConstant(scratch.AsXRegister(), SP, handle_scope_offset.Int32Value(), al);
+    ___ Add(scratch, reg_x(SP), handle_scope_offset.Int32Value());
   }
-  StoreToOffset(scratch.AsXRegister(), SP, out_off.Int32Value());
+  ___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value()));
 }
 
 void Arm64JNIMacroAssembler::LoadReferenceFromHandleScope(ManagedRegister m_out_reg,
@@ -636,14 +643,13 @@
   ___ Bind(&exit);
 }
 
-void Arm64JNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) {
+void Arm64JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   CHECK_ALIGNED(stack_adjust, kStackAlignment);
-  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  Register scratch = temps.AcquireX();
   exception_blocks_.emplace_back(new Arm64Exception(scratch, stack_adjust));
-  LoadFromOffset(scratch.AsXRegister(),
-                 TR,
-                 Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
-  ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()));
+  ___ Cbnz(scratch, exception_blocks_.back()->Entry());
 }
 
 std::unique_ptr<JNIMacroLabel> Arm64JNIMacroAssembler::CreateLabel() {
@@ -655,20 +661,23 @@
   ___ B(Arm64JNIMacroLabel::Cast(label)->AsArm64());
 }
 
-void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                  JNIMacroUnaryCondition condition,
-                                  ManagedRegister test) {
+void Arm64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
-  switch (condition) {
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  Register scratch = temps.AcquireW();
+  ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+  switch (cond) {
+    // TODO: Use `scratch` instead of `scratch.X()`.
     case JNIMacroUnaryCondition::kZero:
-      ___ Cbz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbz(scratch.X(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     case JNIMacroUnaryCondition::kNotZero:
-      ___ Cbnz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      ___ Cbnz(scratch.X(), Arm64JNIMacroLabel::Cast(label)->AsArm64());
       break;
     default:
-      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 }
@@ -680,8 +689,8 @@
 
 void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
-  temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
-  Register temp = temps.AcquireX();
+  temps.Exclude(exception->scratch_);
+  Register scratch = temps.AcquireX();
 
   // Bind exception poll entry.
   ___ Bind(exception->Entry());
@@ -690,20 +699,19 @@
   }
   // Pass exception object as argument.
   // Don't care about preserving X0 as this won't return.
-  ___ Mov(reg_x(X0), reg_x(exception->scratch_.AsXRegister()));
-  ___ Ldr(temp,
+  ___ Mov(reg_x(X0), exception->scratch_);
+  ___ Ldr(scratch,
           MEM_OP(reg_x(TR),
                  QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException).Int32Value()));
 
-  ___ Blr(temp);
+  ___ Blr(scratch);
   // Call should never return.
   ___ Brk();
 }
 
 void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size,
                                         ManagedRegister method_reg,
-                                        ArrayRef<const ManagedRegister> callee_save_regs,
-                                        const ManagedRegisterEntrySpills& entry_spills) {
+                                        ArrayRef<const ManagedRegister> callee_save_regs) {
   // Setup VIXL CPURegList for callee-saves.
   CPURegList core_reg_list(CPURegister::kRegister, kXRegSize, 0);
   CPURegList fp_reg_list(CPURegister::kVRegister, kDRegSize, 0);
@@ -735,28 +743,6 @@
     DCHECK(X0 == method_reg.AsArm64().AsXRegister());
     StoreToOffset(X0, SP, 0);
   }
-
-  // Write out entry spills
-  int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    Arm64ManagedRegister reg = spill.AsArm64();
-    if (reg.IsNoRegister()) {
-      // only increment stack offset.
-      offset += spill.getSize();
-    } else if (reg.IsXRegister()) {
-      StoreToOffset(reg.AsXRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsWRegister()) {
-      StoreWToOffset(kStoreWord, reg.AsWRegister(), SP, offset);
-      offset += 4;
-    } else if (reg.IsDRegister()) {
-      StoreDToOffset(reg.AsDRegister(), SP, offset);
-      offset += 8;
-    } else if (reg.IsSRegister()) {
-      StoreSToOffset(reg.AsSRegister(), SP, offset);
-      offset += 4;
-    }
-  }
 }
 
 void Arm64JNIMacroAssembler::RemoveFrame(size_t frame_size,
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 54592a3..64b5595 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -54,8 +54,7 @@
   // Emit code that will create an activation on the stack.
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack.
   void RemoveFrame(size_t frame_size,
@@ -69,15 +68,10 @@
   void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
-  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
   void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines.
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -92,13 +86,15 @@
 
   // Copying routines.
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset64 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
             Offset src_offset,
@@ -135,8 +131,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current().
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -151,7 +147,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst.
@@ -163,36 +158,36 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset].
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset64 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
  private:
   class Arm64Exception {
    public:
-    Arm64Exception(Arm64ManagedRegister scratch, size_t stack_adjust)
+    Arm64Exception(vixl::aarch64::Register scratch, size_t stack_adjust)
         : scratch_(scratch), stack_adjust_(stack_adjust) {}
 
     vixl::aarch64::Label* Entry() { return &exception_entry_; }
 
     // Register used for passing Thread::Current()->exception_ .
-    const Arm64ManagedRegister scratch_;
+    const vixl::aarch64::Register scratch_;
 
     // Stack adjust for ExceptionPool.
     const size_t stack_adjust_;
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 5fa0b3c..0a7cf11 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -364,14 +364,13 @@
     std::ifstream f1_in(f1);
     std::ifstream f2_in(f2);
 
-    bool result = std::equal(std::istreambuf_iterator<char>(f1_in),
-                             std::istreambuf_iterator<char>(),
-                             std::istreambuf_iterator<char>(f2_in));
-
-    f1_in.close();
-    f2_in.close();
-
-    return result;
+    bool read1_ok, read2_ok;
+    char c1, c2;
+    do {
+      read1_ok = static_cast<bool>(f1_in >> c1);
+      read2_ok = static_cast<bool>(f2_in >> c2);
+    } while (read1_ok && read2_ok && c1 == c2);
+    return !read1_ok && !read2_ok;  // Did we reach the end of both streams?
   }
 
   // Compile the given assembly code and extract the binary, if possible. Put result into res.
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index c9ece1d..4e1518b 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -224,9 +224,19 @@
   ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
 
   const ManagedRegister method_register = ArmManagedRegister::FromCoreRegister(R0);
+  const ManagedRegister hidden_arg_register = ArmManagedRegister::FromCoreRegister(R4);
   const ManagedRegister scratch_register = ArmManagedRegister::FromCoreRegister(R12);
 
-  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
+  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs);
+
+  // Spill arguments.
+  mr_conv->ResetIterator(FrameOffset(frame_size));
+  for (; mr_conv->HasNext(); mr_conv->Next()) {
+    if (mr_conv->IsCurrentParamInRegister()) {
+      size_t size = mr_conv->IsCurrentParamALongOrDouble() ? 8u : 4u;
+      __ Store(mr_conv->CurrentParamStackOffset(), mr_conv->CurrentParamRegister(), size);
+    }
+  }
   __ IncreaseFrameSize(32);
 
   // Loads
@@ -249,33 +259,33 @@
   __ Store(FrameOffset(1024), method_register, 4);
   __ Store(FrameOffset(4092), scratch_register, 4);
   __ Store(FrameOffset(4096), scratch_register, 4);
-  __ StoreImmediateToFrame(FrameOffset(48), 0xFF, scratch_register);
-  __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF, scratch_register);
+  __ StoreImmediateToFrame(FrameOffset(48), 0xFF);
+  __ StoreImmediateToFrame(FrameOffset(48), 0xFFFFFF);
   __ StoreRawPtr(FrameOffset(48), scratch_register);
   __ StoreRef(FrameOffset(48), scratch_register);
-  __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48), scratch_register);
-  __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096), scratch_register);
+  __ StoreSpanning(FrameOffset(48), method_register, FrameOffset(48));
+  __ StoreStackOffsetToThread(ThreadOffset32(512), FrameOffset(4096));
   __ StoreStackPointerToThread(ThreadOffset32(512));
 
   // Other
-  __ Call(method_register, FrameOffset(48), scratch_register);
-  __ Copy(FrameOffset(48), FrameOffset(44), scratch_register, 4);
-  __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512), scratch_register);
-  __ CopyRef(FrameOffset(48), FrameOffset(44), scratch_register);
+  __ Call(method_register, FrameOffset(48));
+  __ Copy(FrameOffset(48), FrameOffset(44), 4);
+  __ CopyRawPtrFromThread(FrameOffset(44), ThreadOffset32(512));
+  __ CopyRef(FrameOffset(48), FrameOffset(44));
   __ GetCurrentThread(method_register);
-  __ GetCurrentThread(FrameOffset(48), scratch_register);
-  __ Move(scratch_register, method_register, 4);
+  __ GetCurrentThread(FrameOffset(48));
+  __ Move(hidden_arg_register, method_register, 4);
   __ VerifyObject(scratch_register, false);
 
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, true);
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(48), scratch_register, false);
   __ CreateHandleScopeEntry(method_register, FrameOffset(48), scratch_register, true);
-  __ CreateHandleScopeEntry(FrameOffset(48), FrameOffset(64), scratch_register, true);
+  __ CreateHandleScopeEntry(FrameOffset(48), FrameOffset(64), true);
   __ CreateHandleScopeEntry(method_register, FrameOffset(0), scratch_register, true);
   __ CreateHandleScopeEntry(method_register, FrameOffset(1025), scratch_register, true);
   __ CreateHandleScopeEntry(scratch_register, FrameOffset(1025), scratch_register, true);
 
-  __ ExceptionPoll(scratch_register, 0);
+  __ ExceptionPoll(0);
 
   // Push the target out of range of branch emitted by ExceptionPoll.
   for (int i = 0; i < 64; i++) {
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 6475607..d6b1c50 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -51,7 +51,7 @@
   "  a4:	f8cd c030 	str.w	ip, [sp, #48]	; 0x30\n",
   "  a8:	4648      	mov	r0, r9\n",
   "  aa:	f8cd 9030 	str.w	r9, [sp, #48]	; 0x30\n",
-  "  ae:	4684      	mov	ip, r0\n",
+  "  ae:	4604      	mov	r4, r0\n",
   "  b0:	f1bc 0f00 	cmp.w	ip, #0\n",
   "  b4:	bf18      	it	ne\n",
   "  b6:	f10d 0c30 	addne.w	ip, sp, #48	; 0x30\n",
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index bbe0f73..48b3f01 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -63,8 +63,7 @@
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size,
                           ManagedRegister method_reg,
-                          ArrayRef<const ManagedRegister> callee_save_regs,
-                          const ManagedRegisterEntrySpills& entry_spills) = 0;
+                          ArrayRef<const ManagedRegister> callee_save_regs) = 0;
 
   // Emit code that will remove an activation from the stack
   //
@@ -83,18 +82,16 @@
   virtual void StoreRef(FrameOffset dest, ManagedRegister src) = 0;
   virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src) = 0;
 
-  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) = 0;
+  virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) = 0;
 
   virtual void StoreStackOffsetToThread(ThreadOffset<kPointerSize> thr_offs,
-                                        FrameOffset fr_offs,
-                                        ManagedRegister scratch) = 0;
+                                        FrameOffset fr_offs) = 0;
 
   virtual void StoreStackPointerToThread(ThreadOffset<kPointerSize> thr_offs) = 0;
 
   virtual void StoreSpanning(FrameOffset dest,
                              ManagedRegister src,
-                             FrameOffset in_off,
-                             ManagedRegister scratch) = 0;
+                             FrameOffset in_off) = 0;
 
   // Load routines
   virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
@@ -117,17 +114,19 @@
   // Copying routines
   virtual void Move(ManagedRegister dest, ManagedRegister src, size_t size) = 0;
 
-  virtual void CopyRawPtrFromThread(FrameOffset fr_offs,
-                                    ThreadOffset<kPointerSize> thr_offs,
-                                    ManagedRegister scratch) = 0;
+  virtual void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset<kPointerSize> thr_offs) = 0;
 
   virtual void CopyRawPtrToThread(ThreadOffset<kPointerSize> thr_offs,
                                   FrameOffset fr_offs,
                                   ManagedRegister scratch) = 0;
 
-  virtual void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) = 0;
+  virtual void CopyRef(FrameOffset dest, FrameOffset src) = 0;
+  virtual void CopyRef(FrameOffset dest,
+                       ManagedRegister base,
+                       MemberOffset offs,
+                       bool unpoison_reference) = 0;
 
-  virtual void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) = 0;
+  virtual void Copy(FrameOffset dest, FrameOffset src, size_t size) = 0;
 
   virtual void Copy(FrameOffset dest,
                     ManagedRegister src_base,
@@ -170,8 +169,8 @@
   virtual void ZeroExtend(ManagedRegister mreg, size_t size) = 0;
 
   // Exploit fast access in managed code to Thread::Current()
-  virtual void GetCurrentThread(ManagedRegister tr) = 0;
-  virtual void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) = 0;
+  virtual void GetCurrentThread(ManagedRegister dest) = 0;
+  virtual void GetCurrentThread(FrameOffset dest_offset) = 0;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -186,7 +185,6 @@
   // value is null and null_allowed.
   virtual void CreateHandleScopeEntry(FrameOffset out_off,
                                       FrameOffset handlescope_offset,
-                                      ManagedRegister scratch,
                                       bool null_allowed) = 0;
 
   // src holds a handle scope entry (Object**) load this into dst
@@ -198,23 +196,23 @@
   virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  virtual void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+  virtual void Jump(ManagedRegister base, Offset offset) = 0;
 
   // Call to address held at [base+offset]
-  virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
-  virtual void CallFromThread(ThreadOffset<kPointerSize> offset, ManagedRegister scratch) = 0;
+  virtual void Call(ManagedRegister base, Offset offset) = 0;
+  virtual void Call(FrameOffset base, Offset offset) = 0;
+  virtual void CallFromThread(ThreadOffset<kPointerSize> offset) = 0;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
+  virtual void ExceptionPoll(size_t stack_adjust) = 0;
 
   // Create a new label that can be used with Jump/Bind calls.
   virtual std::unique_ptr<JNIMacroLabel> CreateLabel() = 0;
   // Emit an unconditional jump to the label.
   virtual void Jump(JNIMacroLabel* label) = 0;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  virtual void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) = 0;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  virtual void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) = 0;
   // Code at this offset will serve as the target for the Jump call.
   virtual void Bind(JNIMacroLabel* label) = 0;
 
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index f20750b..2afdc04 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -84,52 +84,6 @@
 static_assert(std::is_trivially_copyable<ManagedRegister>::value,
               "ManagedRegister should be trivially copyable");
 
-class ManagedRegisterSpill : public ManagedRegister {
- public:
-  // ManagedRegisterSpill contains information about data type size and location in caller frame
-  // These additional attributes could be defined by calling convention (EntrySpills)
-  ManagedRegisterSpill(const ManagedRegister& other, uint32_t size, uint32_t spill_offset)
-      : ManagedRegister(other), size_(size), spill_offset_(spill_offset)  { }
-
-  explicit ManagedRegisterSpill(const ManagedRegister& other)
-      : ManagedRegister(other), size_(-1), spill_offset_(-1) { }
-
-  ManagedRegisterSpill(const ManagedRegister& other, int32_t size)
-      : ManagedRegister(other), size_(size), spill_offset_(-1) { }
-
-  int32_t getSpillOffset() const {
-    return spill_offset_;
-  }
-
-  int32_t getSize() const {
-    return size_;
-  }
-
- private:
-  int32_t size_;
-  int32_t spill_offset_;
-};
-
-class ManagedRegisterEntrySpills : public std::vector<ManagedRegisterSpill> {
- public:
-  // The ManagedRegister does not have information about size and offset.
-  // In this case it's size and offset determined by BuildFrame (assembler)
-  void push_back(ManagedRegister x) {
-    ManagedRegisterSpill spill(x);
-    std::vector<ManagedRegisterSpill>::push_back(spill);
-  }
-
-  void push_back(ManagedRegister x, int32_t size) {
-    ManagedRegisterSpill spill(x, size);
-    std::vector<ManagedRegisterSpill>::push_back(spill);
-  }
-
-  void push_back(ManagedRegisterSpill x) {
-    std::vector<ManagedRegisterSpill>::push_back(x);
-  }
- private:
-};
-
 }  // namespace art
 
 #endif  // ART_COMPILER_UTILS_MANAGED_REGISTER_H_
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index f4ea004..e4ce338 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -24,6 +24,12 @@
 namespace art {
 namespace x86 {
 
+static Register GetScratchRegister() {
+  // ECX is an argument register on entry and gets spilled in BuildFrame().
+  // After that, we can use it as a scratch register.
+  return ECX;
+}
+
 // Slowpath entered when Thread::Current()->_exception is non-null
 class X86ExceptionSlowPath final : public SlowPath {
  public:
@@ -46,8 +52,7 @@
 
 void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
                                       ManagedRegister method_reg,
-                                      ArrayRef<const ManagedRegister> spill_regs,
-                                      const ManagedRegisterEntrySpills& entry_spills) {
+                                      ArrayRef<const ManagedRegister> spill_regs) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(4);  // Return address on stack.
   if (frame_size == kFramePointerSize) {
@@ -81,21 +86,6 @@
     cfi().AdjustCFAOffset(kFramePointerSize);
   }
   DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
-
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
 }
 
 void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -180,17 +170,14 @@
   __ movl(Address(ESP, dest), src.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   __ movl(Address(ESP, dest), Immediate(imm));
 }
 
-void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                                    FrameOffset fr_offs,
-                                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) {
+  Register scratch = GetScratchRegister();
+  __ leal(scratch, Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
@@ -199,8 +186,7 @@
 
 void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
                                          ManagedRegister /*src*/,
-                                         FrameOffset /*in_off*/,
-                                         ManagedRegister /*scratch*/) {
+                                         FrameOffset /*in_off*/) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
 }
 
@@ -315,6 +301,7 @@
 }
 
 void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  DCHECK(!mdest.Equals(X86ManagedRegister::FromCpuRegister(GetScratchRegister())));
   X86ManagedRegister dest = mdest.AsX86();
   X86ManagedRegister src = msrc.AsX86();
   if (!dest.Equals(src)) {
@@ -340,20 +327,28 @@
   }
 }
 
-void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
-  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
 }
 
-void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                ThreadOffset32 thr_offs,
-                                                ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                   ManagedRegister base,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(scratch);
+  }
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(thr_offs));
+  __ movl(Address(ESP, fr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
@@ -365,18 +360,14 @@
   __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
+  if (size == 8) {
+    __ movl(scratch, Address(ESP, FrameOffset(src.Int32Value() + 4)));
+    __ movl(Address(ESP, FrameOffset(dest.Int32Value() + 4)), scratch);
   }
 }
 
@@ -466,21 +457,19 @@
 
 void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                   FrameOffset handle_scope_offset,
-                                                  ManagedRegister mscratch,
                                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
+  Register scratch = GetScratchRegister();
   if (null_allowed) {
     Label null_arg;
-    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ movl(scratch, Address(ESP, handle_scope_offset));
+    __ testl(scratch, scratch);
     __ j(kZero, &null_arg);
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
     __ Bind(&null_arg);
   } else {
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
   }
-  Store(out_off, scratch, 4);
+  __ movl(Address(ESP, out_off), scratch);
 }
 
 // Given a handle scope entry, load the associated reference.
@@ -508,42 +497,41 @@
   // TODO: not validating references
 }
 
-void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
 }
 
-void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
   // TODO: place reference map on call
 }
 
-void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset) {
+  Register scratch = GetScratchRegister();
   __ movl(scratch, Address(ESP, base));
   __ call(Address(scratch, offset));
 }
 
-void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
   __ fs()->call(Address::Absolute(offset));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  __ fs()->movl(dest.AsX86().AsCpuRegister(),
                 Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
-  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch);
 }
 
-void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+void X86JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   X86ExceptionSlowPath* slow = new (__ GetAllocator()) X86ExceptionSlowPath(stack_adjust);
   __ GetBuffer()->EnqueueSlowPath(slow);
   __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
@@ -559,13 +547,11 @@
   __ jmp(X86JNIMacroLabel::Cast(label)->AsX86());
 }
 
-void X86JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                JNIMacroUnaryCondition condition,
-                                ManagedRegister test) {
+void X86JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
   art::x86::Condition x86_cond;
-  switch (condition) {
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
       x86_cond = art::x86::kZero;
       break;
@@ -573,18 +559,19 @@
       x86_cond = art::x86::kNotZero;
       break;
     default:
-      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 
+  // TODO: Compare the memory location with immediate 0.
+  Register scratch = GetScratchRegister();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  __ fs()->movl(scratch, Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()));
+
   // TEST reg, reg
   // Jcc <Offset>
-  __ testl(test.AsX86().AsCpuRegister(), test.AsX86().AsCpuRegister());
+  __ testl(scratch, scratch);
   __ j(x86_cond, X86JNIMacroLabel::Cast(label)->AsX86());
-
-
-  // X86 also has JCZX, JECZX, however it's not worth it to implement
-  // because we aren't likely to codegen with ECX+kZero check.
 }
 
 void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 7bf2f98..1223471 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -44,8 +44,7 @@
   // Emit code that will create an activation on the stack
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size,
@@ -60,16 +59,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -88,16 +84,18 @@
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset32 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
             size_t size) override;
@@ -123,20 +121,23 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) override;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) override;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) override;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override;
@@ -147,23 +148,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset32 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;
 
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 993cf95..411d64b 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -2334,19 +2334,15 @@
   };
   ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
 
-  // Three random entry spills.
-  ManagedRegisterEntrySpills entry_spills;
-  ManagedRegisterSpill spill(ManagedFromCpu(x86_64::RAX), 8, 0);
-  entry_spills.push_back(spill);
-  ManagedRegisterSpill spill2(ManagedFromCpu(x86_64::RBX), 8, 8);
-  entry_spills.push_back(spill2);
-  ManagedRegisterSpill spill3(ManagedFromFpu(x86_64::XMM1), 8, 16);
-  entry_spills.push_back(spill3);
-
   x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
 
   size_t frame_size = 10 * kStackAlignment;
-  assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills);
+  assembler->BuildFrame(frame_size, method_reg, spill_regs);
+
+  // Three random entry spills.
+  assembler->Store(FrameOffset(frame_size + 0u), ManagedFromCpu(x86_64::RAX), /* size= */ 8u);
+  assembler->Store(FrameOffset(frame_size + 8u), ManagedFromCpu(x86_64::RBX), /* size= */ 8u);
+  assembler->Store(FrameOffset(frame_size + 16u), ManagedFromFpu(x86_64::XMM1), /* size= */ 8u);
 
   // Construct assembly text counterpart.
   std::ostringstream str;
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index ffe9020..ee4ae2e 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -36,12 +36,16 @@
 static constexpr size_t kNativeStackAlignment = 16;
 static_assert(kNativeStackAlignment == kStackAlignment);
 
+static inline CpuRegister GetScratchRegister() {
+  // TODO: Use R11 in line with Optimizing.
+  return CpuRegister(RAX);
+}
+
 #define __ asm_.
 
 void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
                                          ManagedRegister method_reg,
-                                         ArrayRef<const ManagedRegister> spill_regs,
-                                         const ManagedRegisterEntrySpills& entry_spills) {
+                                         ArrayRef<const ManagedRegister> spill_regs) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(8);  // Return address on stack.
   // Note: @CriticalNative tail call is not used (would have frame_size == kFramePointerSize).
@@ -86,28 +90,6 @@
   if (method_reg.IsRegister()) {
     __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
   }
-
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    if (spill.AsX86_64().IsCpuRegister()) {
-      if (spill.getSize() == 8) {
-        __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                spill.AsX86_64().AsCpuRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movl(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                spill.AsX86_64().AsCpuRegister());
-      }
-    } else {
-      if (spill.getSize() == 8) {
-        __ movsd(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                 spill.AsX86_64().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movss(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()),
-                 spill.AsX86_64().AsXmmRegister());
-      }
-    }
-  }
 }
 
 void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -215,19 +197,15 @@
   __ movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister());
 }
 
-void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest,
-                                                    uint32_t imm,
-                                                    ManagedRegister) {
+void X86_64JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   __ movl(Address(CpuRegister(RSP), dest), Immediate(imm));  // TODO(64) movq?
 }
 
 void X86_64JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                                       FrameOffset fr_offs,
-                                                       ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), fr_offs));
-  __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
+                                                       FrameOffset fr_offs) {
+  CpuRegister scratch = GetScratchRegister();
+  __ leaq(scratch, Address(CpuRegister(RSP), fr_offs));
+  __ gs()->movq(Address::Absolute(thr_offs, true), scratch);
 }
 
 void X86_64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 thr_offs) {
@@ -236,8 +214,7 @@
 
 void X86_64JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
                                             ManagedRegister /*src*/,
-                                            FrameOffset /*in_off*/,
-                                            ManagedRegister /*scratch*/) {
+                                            FrameOffset /*in_off*/) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
 }
 
@@ -363,6 +340,7 @@
 }
 
 void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  DCHECK(!mdest.Equals(X86_64ManagedRegister::FromCpuRegister(GetScratchRegister().AsRegister())));
   X86_64ManagedRegister dest = mdest.AsX86_64();
   X86_64ManagedRegister src = msrc.AsX86_64();
   if (!dest.Equals(src)) {
@@ -388,20 +366,28 @@
   }
 }
 
-void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), src));
-  __ movl(Address(CpuRegister(RSP), dest), scratch.AsCpuRegister());
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  CpuRegister scratch = GetScratchRegister();
+  __ movl(scratch, Address(CpuRegister(RSP), src));
+  __ movl(Address(CpuRegister(RSP), dest), scratch);
 }
 
-void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                   ThreadOffset64 thr_offs,
-                                                   ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
-  __ gs()->movq(scratch.AsCpuRegister(), Address::Absolute(thr_offs, true));
-  Store(fr_offs, scratch, 8);
+void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                      ManagedRegister base,
+                                      MemberOffset offs,
+                                      bool unpoison_reference) {
+  CpuRegister scratch = GetScratchRegister();
+  __ movl(scratch, Address(base.AsX86_64().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(scratch);
+  }
+  __ movl(Address(CpuRegister(RSP), dest), scratch);
+}
+
+void X86_64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) {
+  CpuRegister scratch = GetScratchRegister();
+  __ gs()->movq(scratch, Address::Absolute(thr_offs, true));
+  __ movq(Address(CpuRegister(RSP), fr_offs), scratch);
 }
 
 void X86_64JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset64 thr_offs,
@@ -413,19 +399,18 @@
   __ gs()->movq(Address::Absolute(thr_offs, true), scratch.AsCpuRegister());
 }
 
-void X86_64JNIMacroAssembler::Copy(FrameOffset dest,
-                                   FrameOffset src,
-                                   ManagedRegister mscratch,
-                                   size_t size) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
+void X86_64JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  CpuRegister scratch = GetScratchRegister();
+  if (size == 8) {
+    // TODO: Use MOVQ.
+    __ movl(scratch, Address(CpuRegister(RSP), src));
+    __ movl(Address(CpuRegister(RSP), dest), scratch);
+    __ movl(scratch, Address(CpuRegister(RSP), FrameOffset(src.Int32Value() + 4)));
+    __ movl(Address(CpuRegister(RSP), FrameOffset(dest.Int32Value() + 4)), scratch);
   } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
+    __ movl(scratch, Address(CpuRegister(RSP), src));
+    __ movl(Address(CpuRegister(RSP), dest), scratch);
   }
 }
 
@@ -521,21 +506,19 @@
 
 void X86_64JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                      FrameOffset handle_scope_offset,
-                                                     ManagedRegister mscratch,
                                                      bool null_allowed) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  CHECK(scratch.IsCpuRegister());
+  CpuRegister scratch = GetScratchRegister();
   if (null_allowed) {
     Label null_arg;
-    __ movl(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
-    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ movl(scratch, Address(CpuRegister(RSP), handle_scope_offset));
+    __ testl(scratch, scratch);
     __ j(kZero, &null_arg);
-    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ leaq(scratch, Address(CpuRegister(RSP), handle_scope_offset));
     __ Bind(&null_arg);
   } else {
-    __ leaq(scratch.AsCpuRegister(), Address(CpuRegister(RSP), handle_scope_offset));
+    __ leaq(scratch, Address(CpuRegister(RSP), handle_scope_offset));
   }
-  Store(out_off, scratch, 8);
+  __ movq(Address(CpuRegister(RSP), out_off), scratch);
 }
 
 // Given a handle scope entry, load the associated reference.
@@ -563,39 +546,38 @@
   // TODO: not validating references
 }
 
-void X86_64JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86_64JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   X86_64ManagedRegister base = mbase.AsX86_64();
   CHECK(base.IsCpuRegister());
   __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
 }
 
-void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   X86_64ManagedRegister base = mbase.AsX86_64();
   CHECK(base.IsCpuRegister());
   __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
   // TODO: place reference map on call
 }
 
-void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister();
+void X86_64JNIMacroAssembler::Call(FrameOffset base, Offset offset) {
+  CpuRegister scratch = GetScratchRegister();
   __ movq(scratch, Address(CpuRegister(RSP), base));
   __ call(Address(scratch, offset));
 }
 
-void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset, ManagedRegister /*mscratch*/) {
+void X86_64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) {
   __ gs()->call(Address::Absolute(offset, true));
 }
 
-void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  __ gs()->movq(tr.AsX86_64().AsCpuRegister(),
+void X86_64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  __ gs()->movq(dest.AsX86_64().AsCpuRegister(),
                 Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
 }
 
-void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegister mscratch) {
-  X86_64ManagedRegister scratch = mscratch.AsX86_64();
-  __ gs()->movq(scratch.AsCpuRegister(),
-                Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
-  __ movq(Address(CpuRegister(RSP), offset), scratch.AsCpuRegister());
+void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  CpuRegister scratch = GetScratchRegister();
+  __ gs()->movq(scratch, Address::Absolute(Thread::SelfOffset<kX86_64PointerSize>(), true));
+  __ movq(Address(CpuRegister(RSP), offset), scratch);
 }
 
 // Slowpath entered when Thread::Current()->_exception is non-null
@@ -607,7 +589,7 @@
   const size_t stack_adjust_;
 };
 
-void X86_64JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+void X86_64JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   X86_64ExceptionSlowPath* slow = new (__ GetAllocator()) X86_64ExceptionSlowPath(stack_adjust);
   __ GetBuffer()->EnqueueSlowPath(slow);
   __ gs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>(), true),
@@ -624,13 +606,11 @@
   __ jmp(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
 }
 
-void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                   JNIMacroUnaryCondition condition,
-                                   ManagedRegister test) {
+void X86_64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
   art::x86_64::Condition x86_64_cond;
-  switch (condition) {
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
       x86_64_cond = art::x86_64::kZero;
       break;
@@ -638,13 +618,18 @@
       x86_64_cond = art::x86_64::kNotZero;
       break;
     default:
-      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 
+  // TODO: Compare the memory location with immediate 0.
+  CpuRegister scratch = GetScratchRegister();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  __ gs()->movl(scratch, Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), true));
+
   // TEST reg, reg
   // Jcc <Offset>
-  __ testq(test.AsX86_64().AsCpuRegister(), test.AsX86_64().AsCpuRegister());
+  __ testq(scratch, scratch);
   __ j(x86_64_cond, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
 }
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index d3f1fce..4592eba 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -45,8 +45,7 @@
   // Emit code that will create an activation on the stack
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size,
@@ -61,18 +60,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset64 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset64 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest,
-                     ManagedRegister src,
-                     FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -93,16 +87,18 @@
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset64 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest,
             ManagedRegister src_base,
@@ -145,8 +141,8 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
@@ -161,7 +157,6 @@
   // value is null and null_allowed.
   void CreateHandleScopeEntry(FrameOffset out_off,
                               FrameOffset handlescope_offset,
-                              ManagedRegister scratch,
                               bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst
@@ -173,23 +168,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset64 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;