Fix host architecture for 64bit.

Also, hack x86 assembler for use as a x86-64 trampoline compiler's assembler.
Implement missing x86-64 quick resolution trampoline.
Add x86-64 to the quick elf writer.

Change-Id: I08216c67014a83492ada12898ab8000218ba7bb4
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index c25925d..6012421 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -40,13 +40,19 @@
 
 TARGET_INSTRUCTION_SET_FEATURES := $(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
+ifeq ($(BUILD_HOST_64bit),)
+host_arch := x86
+else
+host_arch := x86_64
+endif
+
 $(HOST_CORE_IMG_OUT): $(HOST_CORE_DEX_FILES) $(DEX2OAT_DEPENDENCY)
 	@echo "host dex2oat: $@ ($?)"
 	@mkdir -p $(dir $@)
 	$(hide) $(DEX2OAT) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$(PRELOADED_CLASSES) $(addprefix \
 		--dex-file=,$(HOST_CORE_DEX_FILES)) $(addprefix --dex-location=,$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$(HOST_CORE_OAT_OUT) \
 		--oat-location=$(HOST_CORE_OAT) --image=$(HOST_CORE_IMG_OUT) --base=$(LIBART_IMG_HOST_BASE_ADDRESS) \
-		--instruction-set=$(HOST_ARCH) --host --android-root=$(HOST_OUT)
+		--instruction-set=$(host_arch) --host --android-root=$(HOST_OUT)
 
 $(TARGET_CORE_IMG_OUT): $(TARGET_CORE_DEX_FILES) $(DEX2OAT_DEPENDENCY)
 	@echo "target dex2oat: $@ ($?)"
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index d884bc0..17c2e94 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -88,7 +88,8 @@
       return RoundUp(offset, kArmAlignment);
     case kMips:
       return RoundUp(offset, kMipsAlignment);
-    case kX86:
+    case kX86:  // Fall-through.
+    case kX86_64:
       return RoundUp(offset, kX86Alignment);
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 4b823ef..a6daa5d 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -377,6 +377,11 @@
       elf_header.e_flags = 0;
       break;
     }
+    case kX86_64: {
+      elf_header.e_machine = EM_X86_64;
+      elf_header.e_flags = 0;
+      break;
+    }
     case kMips: {
       elf_header.e_machine = EM_MIPS;
       elf_header.e_flags = (EF_MIPS_NOREORDER |
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index 32ae558..3e13e44 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -100,6 +100,23 @@
 }
 }  // namespace x86
 
+namespace x86_64 {
+static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset offset) {
+  UniquePtr<x86::X86Assembler> assembler(static_cast<x86::X86Assembler*>(Assembler::Create(kX86_64)));
+
+  // All x86 trampolines call via the Thread* held in gs.
+  __ gs()->jmp(x86::Address::Absolute(offset, true));
+  __ int3();
+
+  size_t cs = assembler->CodeSize();
+  UniquePtr<std::vector<uint8_t> > entry_stub(new std::vector<uint8_t>(cs));
+  MemoryRegion code(&(*entry_stub)[0], entry_stub->size());
+  assembler->FinalizeInstructions(code);
+
+  return entry_stub.release();
+}
+}  // namespace x86_64
+
 const std::vector<uint8_t>* CreateTrampoline(InstructionSet isa, EntryPointCallingConvention abi,
                                              ThreadOffset offset) {
   switch (isa) {
@@ -110,6 +127,8 @@
       return mips::CreateTrampoline(abi, offset);
     case kX86:
       return x86::CreateTrampoline(offset);
+    case kX86_64:
+      return x86_64::CreateTrampoline(offset);
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << isa;
       return NULL;
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index 6732476..a7cb278 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -111,7 +111,8 @@
       return new arm64::Arm64Assembler();
     case kMips:
       return new mips::MipsAssembler();
-    case kX86:
+    case kX86:  // Fall-through.
+    case kX86_64:
       return new x86::X86Assembler();
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 136d248..26300e0 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1210,6 +1210,13 @@
   return this;
 }
 
+X86Assembler* X86Assembler::gs() {
+  // TODO: fs is a prefix and not an instruction
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x65);
+  return this;
+}
+
 void X86Assembler::AddImmediate(Register reg, const Immediate& imm) {
   int value = imm.value();
   if (value > 0) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 0fa8e00..e284d8c 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -192,15 +192,21 @@
     }
   }
 
-  static Address Absolute(uword addr) {
+  static Address Absolute(uword addr, bool has_rip = false) {
     Address result;
-    result.SetModRM(0, EBP);
-    result.SetDisp32(addr);
+    if (has_rip) {
+      result.SetModRM(0, ESP);
+      result.SetSIB(TIMES_1, ESP, EBP);
+      result.SetDisp32(addr);
+    } else {
+      result.SetModRM(0, EBP);
+      result.SetDisp32(addr);
+    }
     return result;
   }
 
-  static Address Absolute(ThreadOffset addr) {
-    return Absolute(addr.Int32Value());
+  static Address Absolute(ThreadOffset addr, bool has_rip = false) {
+    return Absolute(addr.Int32Value(), has_rip);
   }
 
  private:
@@ -210,9 +216,9 @@
 };
 
 
-class X86Assembler : public Assembler {
+class X86Assembler FINAL : public Assembler {
  public:
-  X86Assembler() {}
+  explicit X86Assembler() {}
   virtual ~X86Assembler() {}
 
   /*
@@ -427,6 +433,7 @@
   void mfence();
 
   X86Assembler* fs();
+  X86Assembler* gs();
 
   //
   // Macros for High-level operations.
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ec2713a..67a9e06 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -139,7 +139,7 @@
   UsageError("      Example: --android-root=out/host/linux-x86");
   UsageError("      Default: $ANDROID_ROOT");
   UsageError("");
-  UsageError("  --instruction-set=(arm|mips|x86): compile for a particular instruction");
+  UsageError("  --instruction-set=(arm|mips|x86|x86_64): compile for a particular instruction");
   UsageError("      set.");
   UsageError("      Example: --instruction-set=x86");
   UsageError("      Default: arm");
@@ -992,7 +992,10 @@
   }
 
   if (compiler_filter_string == NULL) {
-    if (image) {
+    if (instruction_set == kX86_64) {
+      // TODO: currently x86-64 is only interpreted.
+      compiler_filter_string = "interpret-only";
+    } else if (image) {
       compiler_filter_string = "speed";
     } else {
 #if ART_SMALL_MODE
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 4cd7880..863fa31 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -105,7 +105,7 @@
     movq 72(%rsp), %xmm7
     addq LITERAL(80), %rsp
     CFI_ADJUST_CFA_OFFSET(-80)
-    // Save callee and GPR args, mixed together to agree with core spills bitmap.
+    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
     POP rcx
     POP rdx
     POP rbx
@@ -652,9 +652,23 @@
      * Called to resolve an imt conflict.
      */
 UNIMPLEMENTED art_quick_imt_conflict_trampoline
-UNIMPLEMENTED art_quick_resolution_trampoline
 
-/* Proposed Generic JNI setup
+DEFINE_FUNCTION art_quick_resolution_trampoline
+    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    movq %gs:THREAD_SELF_OFFSET, %rdx
+    movq %rsp, %rcx
+    call PLT_SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
+    movq %rax, %r10               // Remember returned code pointer in R10.
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    testq %r10, %r10              // If code pointer is NULL goto deliver pending exception.
+    jz 1f
+    jmp *%r10                     // Tail call into method.
+1:
+    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
+    DELIVER_PENDING_EXCEPTION
+END_FUNCTION art_quick_resolution_trampoline
+
+/* Generic JNI frame layout:
  *
  * #-------------------#
  * |                   |
@@ -712,7 +726,6 @@
      */
 DEFINE_FUNCTION art_quick_generic_jni_trampoline
     // Save callee and GPR args, mixed together to agree with core spills bitmap.
-    // Save callee and GPR args, mixed together to agree with core spills bitmap.
     PUSH r15  // Callee save.
     PUSH r14  // Callee save.
     PUSH r13  // Callee save.
@@ -760,7 +773,7 @@
     //  gs:...   rbp      <= where they are
     movq %gs:THREAD_SELF_OFFSET, %rdi
     movq %rbp, %rsi
-    call PLT_SYMBOL(artQuickGenericJniTrampoline)
+    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
     test %rax, %rax                 // check whether code pointer is NULL, also indicates exception
     jz 1f
     // pop from the register-passing alloca