Move entry spills determination to JNI compiler.

The calling convention no longer describes entry spills as
spilling is the JNI compiler's responsibility. This allows
future improvements, such as spilling registers directly to
the HandleScope or outgoing stack args.

Remove the notion of interprocedural scratch register from
calling conventions and let assemblers deal with all scratch
register uses. The remaining JNI assembler APIs that take
scratch registers are currently unused and can be removed.

Also fix a bug in disassembly comparison for tests; the
contents of two files were considered identical if the
second one just contained additional data.

This change fully preserves the generated code and adds TODO
comments where doing so results in weird or suboptimal code.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: Prebuilt boot image is unchanged.
Test: aosp_taimen-userdebug boots.
Bug: 12189621
Change-Id: Ic26a670276920313cd907a6eda8d982cf0abfd81
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index f4ea004..e4ce338 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -24,6 +24,12 @@
 namespace art {
 namespace x86 {
 
+static Register GetScratchRegister() {
+  // ECX is an argument register on entry and gets spilled in BuildFrame().
+  // After that, we can use it as a scratch register.
+  return ECX;
+}
+
 // Slowpath entered when Thread::Current()->_exception is non-null
 class X86ExceptionSlowPath final : public SlowPath {
  public:
@@ -46,8 +52,7 @@
 
 void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
                                       ManagedRegister method_reg,
-                                      ArrayRef<const ManagedRegister> spill_regs,
-                                      const ManagedRegisterEntrySpills& entry_spills) {
+                                      ArrayRef<const ManagedRegister> spill_regs) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(4);  // Return address on stack.
   if (frame_size == kFramePointerSize) {
@@ -81,21 +86,6 @@
     cfi().AdjustCFAOffset(kFramePointerSize);
   }
   DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
-
-  for (const ManagedRegisterSpill& spill : entry_spills) {
-    if (spill.AsX86().IsCpuRegister()) {
-      int offset = frame_size + spill.getSpillOffset();
-      __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister());
-    } else {
-      DCHECK(spill.AsX86().IsXmmRegister());
-      if (spill.getSize() == 8) {
-        __ movsd(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      } else {
-        CHECK_EQ(spill.getSize(), 4);
-        __ movss(Address(ESP, frame_size + spill.getSpillOffset()), spill.AsX86().AsXmmRegister());
-      }
-    }
-  }
 }
 
 void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
@@ -180,17 +170,14 @@
   __ movl(Address(ESP, dest), src.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister) {
+void X86JNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm) {
   __ movl(Address(ESP, dest), Immediate(imm));
 }
 
-void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                                    FrameOffset fr_offs,
-                                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ leal(scratch.AsCpuRegister(), Address(ESP, fr_offs));
-  __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) {
+  Register scratch = GetScratchRegister();
+  __ leal(scratch, Address(ESP, fr_offs));
+  __ fs()->movl(Address::Absolute(thr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) {
@@ -199,8 +186,7 @@
 
 void X86JNIMacroAssembler::StoreSpanning(FrameOffset /*dst*/,
                                          ManagedRegister /*src*/,
-                                         FrameOffset /*in_off*/,
-                                         ManagedRegister /*scratch*/) {
+                                         FrameOffset /*in_off*/) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
 }
 
@@ -315,6 +301,7 @@
 }
 
 void X86JNIMacroAssembler::Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) {
+  DCHECK(!mdest.Equals(X86ManagedRegister::FromCpuRegister(GetScratchRegister())));
   X86ManagedRegister dest = mdest.AsX86();
   X86ManagedRegister src = msrc.AsX86();
   if (!dest.Equals(src)) {
@@ -340,20 +327,28 @@
   }
 }
 
-void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ movl(scratch.AsCpuRegister(), Address(ESP, src));
-  __ movl(Address(ESP, dest), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
 }
 
-void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs,
-                                                ThreadOffset32 thr_offs,
-                                                ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(thr_offs));
-  Store(fr_offs, scratch, 4);
+void X86JNIMacroAssembler::CopyRef(FrameOffset dest,
+                                   ManagedRegister base,
+                                   MemberOffset offs,
+                                   bool unpoison_reference) {
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(base.AsX86().AsCpuRegister(), offs));
+  if (unpoison_reference) {
+    __ MaybeUnpoisonHeapReference(scratch);
+  }
+  __ movl(Address(ESP, dest), scratch);
+}
+
+void X86JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(thr_offs));
+  __ movl(Address(ESP, fr_offs), scratch);
 }
 
 void X86JNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs,
@@ -365,18 +360,14 @@
   __ fs()->movl(Address::Absolute(thr_offs), scratch.AsCpuRegister());
 }
 
-void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src,
-                        ManagedRegister mscratch,
-                        size_t size) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  if (scratch.IsCpuRegister() && size == 8) {
-    Load(scratch, src, 4);
-    Store(dest, scratch, 4);
-    Load(scratch, FrameOffset(src.Int32Value() + 4), 4);
-    Store(FrameOffset(dest.Int32Value() + 4), scratch, 4);
-  } else {
-    Load(scratch, src, size);
-    Store(dest, scratch, size);
+void X86JNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
+  DCHECK(size == 4 || size == 8) << size;
+  Register scratch = GetScratchRegister();
+  __ movl(scratch, Address(ESP, src));
+  __ movl(Address(ESP, dest), scratch);
+  if (size == 8) {
+    __ movl(scratch, Address(ESP, FrameOffset(src.Int32Value() + 4)));
+    __ movl(Address(ESP, FrameOffset(dest.Int32Value() + 4)), scratch);
   }
 }
 
@@ -466,21 +457,19 @@
 
 void X86JNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off,
                                                   FrameOffset handle_scope_offset,
-                                                  ManagedRegister mscratch,
                                                   bool null_allowed) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  CHECK(scratch.IsCpuRegister());
+  Register scratch = GetScratchRegister();
   if (null_allowed) {
     Label null_arg;
-    __ movl(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
-    __ testl(scratch.AsCpuRegister(), scratch.AsCpuRegister());
+    __ movl(scratch, Address(ESP, handle_scope_offset));
+    __ testl(scratch, scratch);
     __ j(kZero, &null_arg);
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
     __ Bind(&null_arg);
   } else {
-    __ leal(scratch.AsCpuRegister(), Address(ESP, handle_scope_offset));
+    __ leal(scratch, Address(ESP, handle_scope_offset));
   }
-  Store(out_off, scratch, 4);
+  __ movl(Address(ESP, out_off), scratch);
 }
 
 // Given a handle scope entry, load the associated reference.
@@ -508,42 +497,41 @@
   // TODO: not validating references
 }
 
-void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
 }
 
-void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
+void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
   __ call(Address(base.AsCpuRegister(), offset.Int32Value()));
   // TODO: place reference map on call
 }
 
-void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
+void X86JNIMacroAssembler::Call(FrameOffset base, Offset offset) {
+  Register scratch = GetScratchRegister();
   __ movl(scratch, Address(ESP, base));
   __ call(Address(scratch, offset));
 }
 
-void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset, ManagedRegister /*mscratch*/) {
+void X86JNIMacroAssembler::CallFromThread(ThreadOffset32 offset) {
   __ fs()->call(Address::Absolute(offset));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister tr) {
-  __ fs()->movl(tr.AsX86().AsCpuRegister(),
+void X86JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  __ fs()->movl(dest.AsX86().AsCpuRegister(),
                 Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
 }
 
-void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset,
-                                    ManagedRegister mscratch) {
-  X86ManagedRegister scratch = mscratch.AsX86();
-  __ fs()->movl(scratch.AsCpuRegister(), Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
-  __ movl(Address(ESP, offset), scratch.AsCpuRegister());
+void X86JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  Register scratch = GetScratchRegister();
+  __ fs()->movl(scratch, Address::Absolute(Thread::SelfOffset<kX86PointerSize>()));
+  __ movl(Address(ESP, offset), scratch);
 }
 
-void X86JNIMacroAssembler::ExceptionPoll(ManagedRegister /*scratch*/, size_t stack_adjust) {
+void X86JNIMacroAssembler::ExceptionPoll(size_t stack_adjust) {
   X86ExceptionSlowPath* slow = new (__ GetAllocator()) X86ExceptionSlowPath(stack_adjust);
   __ GetBuffer()->EnqueueSlowPath(slow);
   __ fs()->cmpl(Address::Absolute(Thread::ExceptionOffset<kX86PointerSize>()), Immediate(0));
@@ -559,13 +547,11 @@
   __ jmp(X86JNIMacroLabel::Cast(label)->AsX86());
 }
 
-void X86JNIMacroAssembler::Jump(JNIMacroLabel* label,
-                                JNIMacroUnaryCondition condition,
-                                ManagedRegister test) {
+void X86JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
   CHECK(label != nullptr);
 
   art::x86::Condition x86_cond;
-  switch (condition) {
+  switch (cond) {
     case JNIMacroUnaryCondition::kZero:
       x86_cond = art::x86::kZero;
       break;
@@ -573,18 +559,19 @@
       x86_cond = art::x86::kNotZero;
       break;
     default:
-      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(cond);
       UNREACHABLE();
   }
 
+  // TODO: Compare the memory location with immediate 0.
+  Register scratch = GetScratchRegister();
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+  __ fs()->movl(scratch, Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>()));
+
   // TEST reg, reg
   // Jcc <Offset>
-  __ testl(test.AsX86().AsCpuRegister(), test.AsX86().AsCpuRegister());
+  __ testl(scratch, scratch);
   __ j(x86_cond, X86JNIMacroLabel::Cast(label)->AsX86());
-
-
-  // X86 also has JCZX, JECZX, however it's not worth it to implement
-  // because we aren't likely to codegen with ECX+kZero check.
 }
 
 void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 7bf2f98..1223471 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -44,8 +44,7 @@
   // Emit code that will create an activation on the stack
   void BuildFrame(size_t frame_size,
                   ManagedRegister method_reg,
-                  ArrayRef<const ManagedRegister> callee_save_regs,
-                  const ManagedRegisterEntrySpills& entry_spills) override;
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
 
   // Emit code that will remove an activation from the stack
   void RemoveFrame(size_t frame_size,
@@ -60,16 +59,13 @@
   void StoreRef(FrameOffset dest, ManagedRegister src) override;
   void StoreRawPtr(FrameOffset dest, ManagedRegister src) override;
 
-  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override;
+  void StoreImmediateToFrame(FrameOffset dest, uint32_t imm) override;
 
-  void StoreStackOffsetToThread(ThreadOffset32 thr_offs,
-                                FrameOffset fr_offs,
-                                ManagedRegister scratch) override;
+  void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs) override;
 
   void StoreStackPointerToThread(ThreadOffset32 thr_offs) override;
 
-  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off,
-                     ManagedRegister scratch) override;
+  void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off) override;
 
   // Load routines
   void Load(ManagedRegister dest, FrameOffset src, size_t size) override;
@@ -88,16 +84,18 @@
   // Copying routines
   void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
 
-  void CopyRawPtrFromThread(FrameOffset fr_offs,
-                            ThreadOffset32 thr_offs,
-                            ManagedRegister scratch) override;
+  void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
 
   void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
       override;
 
-  void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override;
+  void CopyRef(FrameOffset dest, FrameOffset src) override;
+  void CopyRef(FrameOffset dest,
+               ManagedRegister base,
+               MemberOffset offs,
+               bool unpoison_reference) override;
 
-  void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override;
+  void Copy(FrameOffset dest, FrameOffset src, size_t size) override;
 
   void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch,
             size_t size) override;
@@ -123,20 +121,23 @@
   void ZeroExtend(ManagedRegister mreg, size_t size) override;
 
   // Exploit fast access in managed code to Thread::Current()
-  void GetCurrentThread(ManagedRegister tr) override;
-  void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override;
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset dest_offset) override;
 
   // Set up out_reg to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed. in_reg holds a possibly stale reference
   // that can be used to avoid loading the handle scope entry to see if the value is
   // null.
-  void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset,
-                              ManagedRegister in_reg, bool null_allowed) override;
+  void CreateHandleScopeEntry(ManagedRegister out_reg,
+                              FrameOffset handlescope_offset,
+                              ManagedRegister in_reg,
+                              bool null_allowed) override;
 
   // Set up out_off to hold a Object** into the handle scope, or to be null if the
   // value is null and null_allowed.
-  void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset,
-                              ManagedRegister scratch, bool null_allowed) override;
+  void CreateHandleScopeEntry(FrameOffset out_off,
+                              FrameOffset handlescope_offset,
+                              bool null_allowed) override;
 
   // src holds a handle scope entry (Object**) load this into dst
   void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override;
@@ -147,23 +148,23 @@
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
   // Jump to address held at [base+offset] (used for tail calls).
-  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+  void Jump(ManagedRegister base, Offset offset) override;
 
   // Call to address held at [base+offset]
-  void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
-  void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
-  void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override;
+  void Call(ManagedRegister base, Offset offset) override;
+  void Call(FrameOffset base, Offset offset) override;
+  void CallFromThread(ThreadOffset32 offset) override;
 
   // Generate code to check if Thread::Current()->exception_ is non-null
   // and branch to a ExceptionSlowPath if it is.
-  void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override;
+  void ExceptionPoll(size_t stack_adjust) override;
 
   // Create a new label that can be used with Jump/Bind calls.
   std::unique_ptr<JNIMacroLabel> CreateLabel() override;
   // Emit an unconditional jump to the label.
   void Jump(JNIMacroLabel* label) override;
-  // Emit a conditional jump to the label by applying a unary condition test to the register.
-  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
   // Code at this offset will serve as the target for the Jump call.
   void Bind(JNIMacroLabel* label) override;