Numerous fixes to enable PromoteRegs, though it's still broken.

- Fixed ThrowNullPointerFromCode launchpad to load the array length
  directly into the necessary arg reg without clobbering the array
  pointer, since that value may be live afterwards.

- genArrayPut use a temporary reg for bytes if the source reg is >= 4,
  since x86 can't express this.

- Fixed the order that core regs are spilled and unspilled.

- Correctly emit instructions when base == rBP and disp == 0.

- Added checks to the compiler to ensure that byte opcodes aren't used
  on registers that can't be byte accessed.

- Fixed generation of a number of ops which use byte opcodes, including
  floating point comparison, int-to-byte, and and-int/lit16.

- Added rBP, rSI, and rDI to spill registers for the x86 jni compiler.

- Various fixes and additions to the x86 disassembler.

Change-Id: I365fe7dec5cc64d181248fd58e90789f100b45e7
diff --git a/src/oat/jni/x86/calling_convention_x86.cc b/src/oat/jni/x86/calling_convention_x86.cc
index 1f66d71..1cd849c 100644
--- a/src/oat/jni/x86/calling_convention_x86.cc
+++ b/src/oat/jni/x86/calling_convention_x86.cc
@@ -106,11 +106,21 @@
 
 // JNI calling convention
 
-std::vector<ManagedRegister> X86JniCallingConvention::callee_save_regs_;
+X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
+                                                 const char* shorty)
+    : JniCallingConvention(is_static, is_synchronized, shorty) {
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
+}
+
+uint32_t X86JniCallingConvention::CoreSpillMask() const {
+  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+}
 
 size_t X86JniCallingConvention::FrameSize() {
-  // Return address, Method* and local reference segment state
-  size_t frame_data_size = 3 * kPointerSize;
+  // Method*, return address and callee save area size, local reference segment state
+  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kPointerSize;
   // References plus 2 words for SIRT header
   size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
   // Plus return value spill area size
diff --git a/src/oat/jni/x86/calling_convention_x86.h b/src/oat/jni/x86/calling_convention_x86.h
index 3bf4f7c..959a37f 100644
--- a/src/oat/jni/x86/calling_convention_x86.h
+++ b/src/oat/jni/x86/calling_convention_x86.h
@@ -45,8 +45,7 @@
 
 class X86JniCallingConvention : public JniCallingConvention {
  public:
-  X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : JniCallingConvention(is_static, is_synchronized, shorty) {}
+  explicit X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
   virtual ~X86JniCallingConvention() {}
   // Calling convention
   virtual ManagedRegister ReturnRegister();
@@ -55,13 +54,10 @@
   virtual size_t FrameSize();
   virtual size_t OutArgSize();
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
-    DCHECK(callee_save_regs_.empty());
     return callee_save_regs_;
   }
   virtual ManagedRegister ReturnScratchRegister() const;
-  virtual uint32_t CoreSpillMask() const {
-    return 0;
-  }
+  virtual uint32_t CoreSpillMask() const;
   virtual uint32_t FpSpillMask() const {
     return 0;
   }
@@ -75,7 +71,8 @@
   virtual size_t NumberOfOutgoingStackArgs();
 
  private:
-  static std::vector<ManagedRegister> callee_save_regs_;
+  // TODO: these values aren't unique and can be shared amongst instances
+  std::vector<ManagedRegister> callee_save_regs_;
 
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
diff --git a/src/oat/utils/x86/assembler_x86.cc b/src/oat/utils/x86/assembler_x86.cc
index 28b17f5..b7f0c1f 100644
--- a/src/oat/utils/x86/assembler_x86.cc
+++ b/src/oat/utils/x86/assembler_x86.cc
@@ -1415,10 +1415,12 @@
                               const std::vector<ManagedRegister>& spill_regs,
                               const std::vector<ManagedRegister>& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    pushl(spill_regs.at(i).AsX86().AsCpuRegister());
+  }
   // return address then method on stack
-  addl(ESP, Immediate(-frame_size + kPointerSize /*method*/ +
-                      kPointerSize /*return address*/));
+  addl(ESP, Immediate(-frame_size + (spill_regs.size() * kPointerSize) +
+                      kPointerSize /*method*/ + kPointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     movl(Address(ESP, frame_size + kPointerSize + (i * kPointerSize)),
@@ -1429,8 +1431,10 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
-  addl(ESP, Immediate(frame_size - kPointerSize));
+  addl(ESP, Immediate(frame_size - (spill_regs.size() * kPointerSize) - kPointerSize));
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    popl(spill_regs.at(i).AsX86().AsCpuRegister());
+  }
   ret();
 }