Calling convention support for cross 64/32 compilation.

Add REX support for x86-64 operands.

Change-Id: I093ae26fb8c111d54b8c72166f054984564c04c6
diff --git a/compiler/utils/arm/assembler_arm.cc b/compiler/utils/arm/assembler_arm.cc
index 59eb98e..effc38e 100644
--- a/compiler/utils/arm/assembler_arm.cc
+++ b/compiler/utils/arm/assembler_arm.cc
@@ -1436,6 +1436,8 @@
   mov(rd, ShifterOperand(rm, ROR, 0), cond);
 }
 
+constexpr size_t kFramePointerSize = 4;
+
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& callee_save_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
@@ -1453,8 +1455,8 @@
   PushList(push_list);
 
   // Increase frame to required size.
-  CHECK_GT(frame_size, pushed_values * kPointerSize);  // Must be at least space to push Method*
-  size_t adjust = frame_size - (pushed_values * kPointerSize);
+  CHECK_GT(frame_size, pushed_values * kFramePointerSize);  // Must at least have space for Method*.
+  size_t adjust = frame_size - (pushed_values * kFramePointerSize);
   IncreaseFrameSize(adjust);
 
   // Write out Method*.
@@ -1463,7 +1465,7 @@
   // Write out entry spills.
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     Register reg = entry_spills.at(i).AsArm().AsCoreRegister();
-    StoreToOffset(kStoreWord, reg, SP, frame_size + kPointerSize + (i * kPointerSize));
+    StoreToOffset(kStoreWord, reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize));
   }
 }
 
@@ -1480,8 +1482,8 @@
   }
 
   // Decrease frame to start of callee saves
-  CHECK_GT(frame_size, pop_values * kPointerSize);
-  size_t adjust = frame_size - (pop_values * kPointerSize);
+  CHECK_GT(frame_size, pop_values * kFramePointerSize);
+  size_t adjust = frame_size - (pop_values * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
   // Pop callee saves and PC
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 5b2c8ba..a11c2da 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -575,6 +575,8 @@
   ___ Brk();
 }
 
+constexpr size_t kFramePointerSize = 8;
+
 void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                         const std::vector<ManagedRegister>& callee_save_regs,
                         const ManagedRegisterEntrySpills& entry_spills) {
@@ -589,8 +591,8 @@
   ___ PushCalleeSavedRegisters();
 
   // Increate frame to required size - must be at least space to push Method*.
-  CHECK_GT(frame_size, kCalleeSavedRegsSize * kPointerSize);
-  size_t adjust = frame_size - (kCalleeSavedRegsSize * kPointerSize);
+  CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
+  size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   IncreaseFrameSize(adjust);
 
   // Write Method*.
@@ -600,7 +602,7 @@
   // TODO: we can implement a %2 STRP variant of StoreToOffset.
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     Register reg = entry_spills.at(i).AsArm64().AsCoreRegister();
-    StoreToOffset(reg, SP, frame_size + kPointerSize + (i * kPointerSize));
+    StoreToOffset(reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize));
   }
 }
 
@@ -610,10 +612,10 @@
   // For now we only check that the size of the frame is greater than the
   // no of APCS callee saved regs [X19, X30] [D8, D15].
   CHECK_EQ(callee_save_regs.size(), kCalleeSavedRegsSize);
-  CHECK_GT(frame_size, kCalleeSavedRegsSize * kPointerSize);
+  CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize);
 
   // Decrease frame size to start of callee saved regs.
-  size_t adjust = frame_size - (kCalleeSavedRegsSize * kPointerSize);
+  size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize);
   DecreaseFrameSize(adjust);
 
   // Pop callee saved and return to LR.
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 99c29f1..45d3a97 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -536,6 +536,8 @@
   Sdc1(reg, base, offset);
 }
 
+constexpr size_t kFramePointerSize = 4;
+
 void MipsAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                                const std::vector<ManagedRegister>& callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
@@ -545,10 +547,10 @@
   IncreaseFrameSize(frame_size);
 
   // Push callee saves and return address
-  int stack_offset = frame_size - kPointerSize;
+  int stack_offset = frame_size - kFramePointerSize;
   StoreToOffset(kStoreWord, RA, SP, stack_offset);
   for (int i = callee_save_regs.size() - 1; i >= 0; --i) {
-    stack_offset -= kPointerSize;
+    stack_offset -= kFramePointerSize;
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     StoreToOffset(kStoreWord, reg, SP, stack_offset);
   }
@@ -559,7 +561,7 @@
   // Write out entry spills.
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     Register reg = entry_spills.at(i).AsMips().AsCoreRegister();
-    StoreToOffset(kStoreWord, reg, SP, frame_size + kPointerSize + (i * kPointerSize));
+    StoreToOffset(kStoreWord, reg, SP, frame_size + kFramePointerSize + (i * kFramePointerSize));
   }
 }
 
@@ -568,11 +570,11 @@
   CHECK_ALIGNED(frame_size, kStackAlignment);
 
   // Pop callee saves and return address
-  int stack_offset = frame_size - (callee_save_regs.size() * kPointerSize) - kPointerSize;
+  int stack_offset = frame_size - (callee_save_regs.size() * kFramePointerSize) - kFramePointerSize;
   for (size_t i = 0; i < callee_save_regs.size(); ++i) {
     Register reg = callee_save_regs.at(i).AsMips().AsCoreRegister();
     LoadFromOffset(kLoadWord, reg, SP, stack_offset);
-    stack_offset += kPointerSize;
+    stack_offset += kFramePointerSize;
   }
   LoadFromOffset(kLoadWord, RA, SP, stack_offset);
 
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index aac8b01..6043c17 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1396,6 +1396,8 @@
   EmitOperand(reg_or_opcode, Operand(operand));
 }
 
+constexpr size_t kFramePointerSize = 4;
+
 void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
                               const std::vector<ManagedRegister>& spill_regs,
                               const ManagedRegisterEntrySpills& entry_spills) {
@@ -1404,11 +1406,11 @@
     pushl(spill_regs.at(i).AsX86().AsCpuRegister());
   }
   // return address then method on stack
-  addl(ESP, Immediate(-frame_size + (spill_regs.size() * kPointerSize) +
-                      kPointerSize /*method*/ + kPointerSize /*return address*/));
+  addl(ESP, Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
+                      kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
   for (size_t i = 0; i < entry_spills.size(); ++i) {
-    movl(Address(ESP, frame_size + kPointerSize + (i * kPointerSize)),
+    movl(Address(ESP, frame_size + kFramePointerSize + (i * kFramePointerSize)),
          entry_spills.at(i).AsX86().AsCpuRegister());
   }
 }
@@ -1416,7 +1418,7 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addl(ESP, Immediate(frame_size - (spill_regs.size() * kPointerSize) - kPointerSize));
+  addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     popl(spill_regs.at(i).AsX86().AsCpuRegister());
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 52b9382..7d02c7c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1513,15 +1513,30 @@
 }
 
 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
-  UNIMPLEMENTED(FATAL);
+  uint8_t rex = operand.rex();
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
 }
 
 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
-  UNIMPLEMENTED(FATAL);
+  uint8_t rex = operand.rex();
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
+  }
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
 }
 
 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
-  UNIMPLEMENTED(FATAL);
+  uint8_t rex = operand.rex();
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
+  }
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
 }
 
 void X86_64Assembler::EmitRex64(CpuRegister reg) {
@@ -1532,7 +1547,13 @@
 }
 
 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
-  UNIMPLEMENTED(FATAL);
+  uint8_t rex = 0x48 | operand.rex();  // REX.W000
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
+  }
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
@@ -1540,19 +1561,27 @@
 }
 
 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
-  UNIMPLEMENTED(FATAL);
+  uint8_t rex = 0x40 | operand.rex();  // REX.0000
+  if (dst.NeedsRex()) {
+    rex |= 0x44;  // REX.0R00
+  }
+  if (rex != 0) {
+    EmitUint8(rex);
+  }
 }
 
+constexpr size_t kFramePointerSize = 8;
+
 void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs,
-                              const ManagedRegisterEntrySpills& entry_spills) {
+                                 const std::vector<ManagedRegister>& spill_regs,
+                                 const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
     pushq(spill_regs.at(i).AsX86_64().AsCpuRegister());
   }
   // return address then method on stack
-  addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kPointerSize) +
-                                   kPointerSize /*method*/ + kPointerSize /*return address*/));
+  addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) +
+                                   kFramePointerSize /*method*/ + kFramePointerSize /*return address*/));
   pushq(method_reg.AsX86_64().AsCpuRegister());
 
   for (size_t i = 0; i < entry_spills.size(); ++i) {
@@ -1579,7 +1608,7 @@
 void X86_64Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  addq(CpuRegister(RSP), Immediate(frame_size - (spill_regs.size() * kPointerSize) - kPointerSize));
+  addq(CpuRegister(RSP), Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize));
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     popq(spill_regs.at(i).AsX86_64().AsCpuRegister());
   }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 1d42d89..4738dcb 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -68,6 +68,10 @@
     return static_cast<Register>(encoding_at(1) & 7);
   }
 
+  uint8_t rex() const {
+    return rex_;
+  }
+
   int8_t disp8() const {
     CHECK_GE(length_, 2);
     return static_cast<int8_t>(encoding_[length_ - 1]);
@@ -81,27 +85,33 @@
   }
 
   bool IsRegister(CpuRegister reg) const {
-    CHECK(!reg.NeedsRex()) << "TODO: rex support:" << reg;
     return ((encoding_[0] & 0xF8) == 0xC0)  // Addressing mode is register only.
-        && ((encoding_[0] & 0x07) == reg.LowBits());  // Register codes match.
+        && ((encoding_[0] & 0x07) == reg.LowBits())  // Register codes match.
+        && (reg.NeedsRex() == ((rex_ & 1) != 0));  // REX.000B bits match.
   }
 
  protected:
   // Operand can be sub classed (e.g: Address).
-  Operand() : length_(0) { }
+  Operand() : rex_(0), length_(0) { }
 
-  void SetModRM(int mod, CpuRegister rm) {
+  void SetModRM(uint8_t mod, CpuRegister rm) {
     CHECK_EQ(mod & ~3, 0);
-    CHECK(!rm.NeedsRex());
-    encoding_[0] = (mod << 6) | static_cast<uint8_t>(rm.AsRegister());
+    if (rm.NeedsRex()) {
+      rex_ |= 0x41;  // REX.000B
+    }
+    encoding_[0] = (mod << 6) | rm.LowBits();
     length_ = 1;
   }
 
   void SetSIB(ScaleFactor scale, CpuRegister index, CpuRegister base) {
-    CHECK(!index.NeedsRex()) << "TODO: rex support: " << index;
-    CHECK(!base.NeedsRex()) << "TODO: rex support: " << base;
     CHECK_EQ(length_, 1);
     CHECK_EQ(scale & ~3, 0);
+    if (base.NeedsRex()) {
+      rex_ |= 0x41;  // REX.000B
+    }
+    if (index.NeedsRex()) {
+      rex_ |= 0x42;  // REX.00X0
+    }
     encoding_[1] = (scale << 6) | (static_cast<uint8_t>(index.AsRegister()) << 3) |
         static_cast<uint8_t>(base.AsRegister());
     length_ = 2;
@@ -120,8 +130,9 @@
   }
 
  private:
-  byte length_;
-  byte encoding_[6];
+  uint8_t rex_;
+  uint8_t length_;
+  uint8_t encoding_[6];
 
   explicit Operand(CpuRegister reg) { SetModRM(3, reg); }