Add x86_64 code generation support

Utilizes r0..r7 in register allocator, implements spill/unsill
core regs as well as operations with stack pointer.

Change-Id: I973d5a1acb9aa735f6832df3d440185d9e896c67
Signed-off-by: Dmitry Petrochenko <dmitry.petrochenko@intel.com>
diff --git a/compiler/compilers.cc b/compiler/compilers.cc
index 79a85db..76838d7 100644
--- a/compiler/compilers.cc
+++ b/compiler/compilers.cc
@@ -111,7 +111,7 @@
       mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     case kX86_64:
-      mir_to_lir = X86CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
+      mir_to_lir = X86_64CodeGenerator(cu, cu->mir_graph.get(), &cu->arena);
       break;
     default:
       LOG(FATAL) << "Unexpected instruction set: " << cu->instruction_set;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 77e5649..a4052cd 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -174,6 +174,8 @@
                           ArenaAllocator* const arena);
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena);
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena);
 
 // Utility macros to traverse the LIR list.
 #define NEXT_LIR(lir) (lir->next)
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index b8481e2..c0c60d7 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -63,17 +63,24 @@
 { kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "16TI8", "fs:[!0d],!1d" }, \
   \
 { kX86 ## opname ## 32MR,  kMemReg,    mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32MR", "[!0r+!1d],!2r" }, \
+{ kX86 ## opname ## 64MR,  kMemReg64,  mem_use | IS_TERTIARY_OP |           REG_USE02  | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64MR", "[!0r+!1d],!2r" }, \
 { kX86 ## opname ## 32AR,  kArrayReg,  mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { 0,             0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
+{ kX86 ## opname ## 64AR,  kArrayReg64, mem_use | IS_QUIN_OP     |           REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \
 { kX86 ## opname ## 32TR,  kThreadReg, mem_use | IS_BINARY_OP   |           REG_USE1   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0,              0,        0 }, #opname "32TR", "fs:[!0d],!1r" }, \
 { kX86 ## opname ## 32RR,  kRegReg,              IS_BINARY_OP   | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RR", "!0r,!1r" }, \
 { kX86 ## opname ## 32RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## 64RM,  kRegMem,    IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01  | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## 32RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0,             0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
+{ kX86 ## opname ## 64RA,  kRegArray,  IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W,         0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \
 { kX86 ## opname ## 32RT,  kRegThread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0,              0,        0 }, #opname "32RT", "!0r,fs:[!1d]" }, \
+{ kX86 ## opname ## 64RT,  kReg64Thread, IS_LOAD | IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0,              0,        0 }, #opname "64RT", "!0r,fs:[!1d]" }, \
 { kX86 ## opname ## 32RI,  kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI,  kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI,  kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32MI", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI,  kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI,  kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0,        4 }, #opname "32TI", "fs:[!0d],!1d" }, \
 { kX86 ## opname ## 32RI8, kRegImm,              IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32RI8", "!0r,!1d" }, \
+{ kX86 ## opname ## 64RI8, kReg64Imm,            IS_BINARY_OP   | reg_def | REG_USE0   | SETS_CCODES | uses_ccodes, { REX_W,         0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "64RI8", "!0r,!1d" }, \
 { kX86 ## opname ## 32MI8, kMemImm,    mem_use | IS_TERTIARY_OP |           REG_USE0   | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \
 { kX86 ## opname ## 32AI8, kArrayImm,  mem_use | IS_QUIN_OP     |           REG_USE01  | SETS_CCODES | uses_ccodes, { 0,             0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \
 { kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP   |                        SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8,  0, 0, rm32_i8_modrm,  0,        1 }, #opname "32TI8", "fs:[!0d],!1d" }
@@ -164,16 +171,22 @@
   { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" },
 
   { kX86Mov32MR, kMemReg,    IS_STORE | IS_TERTIARY_OP | REG_USE02,      { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" },
+  { kX86Mov64MR, kMemReg64,  IS_STORE | IS_TERTIARY_OP | REG_USE02,      { REX_W,         0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" },
   { kX86Mov32AR, kArrayReg,  IS_STORE | IS_QUIN_OP     | REG_USE014,     { 0,             0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" },
+  { kX86Mov64AR, kArrayReg64, IS_STORE | IS_QUIN_OP     | REG_USE014,     { REX_W,        0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" },
   { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP   | REG_USE1,       { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" },
   { kX86Mov32RR, kRegReg,               IS_BINARY_OP   | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" },
   { kX86Mov32RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" },
+  { kX86Mov64RM, kRegMem,    IS_LOAD  | IS_TERTIARY_OP | REG_DEF0_USE1,  { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" },
   { kX86Mov32RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { 0,             0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" },
+  { kX86Mov64RA, kRegArray,  IS_LOAD  | IS_QUIN_OP     | REG_DEF0_USE12, { REX_W,         0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" },
   { kX86Mov32RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" },
+  { kX86Mov64RT, kRegThread, IS_LOAD  | IS_BINARY_OP   | REG_DEF0,       { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" },
   { kX86Mov32RI, kMovRegImm,            IS_BINARY_OP   | REG_DEF0,       { 0,             0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" },
   { kX86Mov32MI, kMemImm,    IS_STORE | IS_TERTIARY_OP | REG_USE0,       { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" },
   { kX86Mov32AI, kArrayImm,  IS_STORE | IS_QUIN_OP     | REG_USE01,      { 0,             0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" },
   { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" },
+  { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP,                    { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4 }, "Mov64TI", "fs:[!0d],!1d" },
 
   { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1,      { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" },
 
@@ -376,7 +389,7 @@
   { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" },
 };
 
-static size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
+size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) {
   size_t size = 0;
   if (entry->skeleton.prefix1 > 0) {
     ++size;
@@ -392,8 +405,10 @@
     }
   }
   ++size;  // modrm
-  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()) {
+  if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum()
+      || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) {
     // SP requires a SIB byte.
+    // GS access also needs a SIB byte for absolute adressing in 64-bit mode.
     ++size;
   }
   if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) {
@@ -421,16 +436,19 @@
       return 3;  // 1 byte of opcode + 2 prefixes
     case kRegOpcode:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false) - 1;  // substract 1 for modrm
+    case kReg64:
     case kReg:  // lir operands - 0: reg
       return ComputeSize(entry, 0, 0, false);
     case kMem:  // lir operands - 0: base, 1: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
+    case kMemReg64:
     case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
     case kMemRegImm:  // lir operands - 0: base, 1: disp, 2: reg 3: immediate
       return ComputeSize(entry, lir->operands[0], lir->operands[1], false);
+    case kArrayReg64:
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return ComputeSize(entry, lir->operands[0], lir->operands[3], true);
     case kThreadReg:  // lir operands - 0: disp, 1: reg
@@ -443,8 +461,10 @@
       return ComputeSize(entry, lir->operands[1], lir->operands[2], false);
     case kRegArray:   // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
       return ComputeSize(entry, lir->operands[1], lir->operands[4], true);
+    case kReg64Thread:  // lir operands - 0: reg, 1: disp
     case kRegThread:  // lir operands - 0: reg, 1: disp
       return ComputeSize(entry, 0, 0x12345678, false);  // displacement size is always 32bit
+    case kReg64Imm:
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
       size_t size = ComputeSize(entry, 0, 0, false);
       if (entry->skeleton.ax_opcode == 0) {
@@ -551,7 +571,12 @@
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) {
   if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
+    if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) {
+      // 64 bit adresses by GS, not FS
+      code_buffer_.push_back(THREAD_PREFIX_GS);
+    } else {
+      code_buffer_.push_back(entry->skeleton.prefix1);
+    }
     if (entry->skeleton.prefix2 != 0) {
       code_buffer_.push_back(entry->skeleton.prefix2);
     }
@@ -605,6 +630,19 @@
   }
 }
 
+void X86Mir2Lir::EmitModrmThread(uint8_t reg_or_opcode) {
+  if (Gen64Bit()) {
+    // Absolute adressing for GS access.
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rX86_SP.GetRegNum();
+    code_buffer_.push_back(modrm);
+    uint8_t sib = (0/*TIMES_1*/ << 6) | (rs_rX86_SP.GetRegNum() << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(sib);
+  } else {
+    uint8_t modrm = (0 << 6) | (reg_or_opcode << 3) | rs_rBP.GetRegNum();
+    code_buffer_.push_back(modrm);
+  }
+}
+
 void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp) {
   DCHECK_LT(RegStorage::RegNum(reg_or_opcode), 8);
   DCHECK_LT(RegStorage::RegNum(base), 8);
@@ -754,8 +792,7 @@
         << " in " << PrettyMethod(cu_->method_idx, *cu_->dex_file);
   }
   DCHECK_LT(RegStorage::RegNum(reg), 8);
-  uint8_t modrm = (0 << 6) | (RegStorage::RegNum(reg) << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(RegStorage::RegNum(reg));
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -810,14 +847,7 @@
 }
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) {
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
+  EmitPrefix(entry);
   if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
@@ -837,8 +867,7 @@
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) {
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -931,14 +960,7 @@
 }
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) {
-  if (entry->skeleton.prefix1 != 0) {
-    code_buffer_.push_back(entry->skeleton.prefix1);
-    if (entry->skeleton.prefix2 != 0) {
-      code_buffer_.push_back(entry->skeleton.prefix2);
-    }
-  } else {
-    DCHECK_EQ(0, entry->skeleton.prefix2);
-  }
+  EmitPrefix(entry);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1075,8 +1097,7 @@
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int disp) {
   DCHECK_NE(entry->skeleton.prefix1, 0);
   EmitPrefixAndOpcode(entry);
-  uint8_t modrm = (0 << 6) | (entry->skeleton.modrm_opcode << 3) | rs_rBP.GetRegNum();
-  code_buffer_.push_back(modrm);
+  EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
   code_buffer_.push_back((disp >> 16) & 0xFF);
@@ -1317,6 +1338,7 @@
       case kRegOpcode:  // lir operands - 0: reg
         EmitOpRegOpcode(entry, lir->operands[0]);
         break;
+      case kReg64:
       case kReg:  // lir operands - 0: reg
         EmitOpReg(entry, lir->operands[0]);
         break;
@@ -1326,6 +1348,7 @@
       case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
         EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]);
         break;
+      case kMemReg64:
       case kMemReg:  // lir operands - 0: base, 1: disp, 2: reg
         EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]);
         break;
@@ -1336,6 +1359,7 @@
         EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
+      case kArrayReg64:
       case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
         EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
@@ -1347,6 +1371,7 @@
         EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                      lir->operands[3], lir->operands[4]);
         break;
+      case kReg64Thread:  // lir operands - 0: reg, 1: disp
       case kRegThread:  // lir operands - 0: reg, 1: disp
         EmitRegThread(entry, lir->operands[0], lir->operands[1]);
         break;
@@ -1370,6 +1395,7 @@
         EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2],
                       lir->operands[3]);
         break;
+      case kReg64Imm:
       case kRegImm:  // lir operands - 0: reg, 1: immediate
         EmitRegImm(entry, lir->operands[0], lir->operands[1]);
         break;
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index cf2b10a..4673cc0 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -156,8 +156,13 @@
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
   NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
-  CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
-                          rs_rX86_ARG1, true);
+  if (Is64BitInstructionSet(cu_->instruction_set)) {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  } else {
+    CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pHandleFillArrayData), rs_rX86_ARG0,
+                            rs_rX86_ARG1, true);
+  }
 }
 
 void X86Mir2Lir::GenMoveException(RegLocation rl_dest) {
@@ -180,7 +185,11 @@
   int ct_offset = Is64BitInstructionSet(cu_->instruction_set) ?
       Thread::CardTableOffset<8>().Int32Value() :
       Thread::CardTableOffset<4>().Int32Value();
-  NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
+  if (Gen64Bit()) {
+    NewLIR2(kX86Mov64RT, reg_card_base.GetReg(), ct_offset);
+  } else {
+    NewLIR2(kX86Mov32RT, reg_card_base.GetReg(), ct_offset);
+  }
   OpRegRegImm(kOpLsr, reg_card_no, tgt_addr_reg, gc::accounting::CardTable::kCardShift);
   StoreBaseIndexed(reg_card_base, reg_card_no, reg_card_base, 0, kUnsignedByte);
   LIR* target = NewLIR0(kPseudoTargetLabel);
@@ -201,8 +210,7 @@
   LockTemp(rs_rX86_ARG2);
 
   /* Build frame, return address already on stack */
-  // TODO: 64 bit.
-  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - 4);
+  stack_decrement_ = OpRegImm(kOpSub, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
 
   /*
    * We can safely skip the stack overflow check if we're
@@ -233,7 +241,7 @@
                            false /* MarkSafepointPC */, false /* UseLink */);
         } else {
           m2l_->CallHelper(RegStorage::InvalidReg(), QUICK_ENTRYPOINT_OFFSET(4, pThrowStackOverflow),
-                                     false /* MarkSafepointPC */, false /* UseLink */);
+                           false /* MarkSafepointPC */, false /* UseLink */);
         }
       }
 
@@ -248,7 +256,7 @@
     // mov esp, ebp
     // in case a signal comes in that's not using an alternate signal stack and the large frame may
     // have moved us outside of the reserved area at the end of the stack.
-    // cmp rX86_SP, fs:[stack_end_]; jcc throw_slowpath
+    // cmp rs_rX86_SP, fs:[stack_end_]; jcc throw_slowpath
     if (Is64BitInstructionSet(cu_->instruction_set)) {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<8>());
     } else {
@@ -286,7 +294,7 @@
   NewLIR0(kPseudoMethodExit);
   UnSpillCoreRegs();
   /* Remove frame except for return address */
-  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - 4);
+  stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set));
   NewLIR0(kX86Ret);
 }
 
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 11e7ff9..f681813 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -24,7 +24,7 @@
 
 class X86Mir2Lir FINAL : public Mir2Lir {
   public:
-    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena);
+    X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
     // Required for target - codegen helpers.
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
@@ -325,10 +325,12 @@
     std::vector<uint8_t>* ReturnCallFrameInformation();
 
   private:
+    size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib);
     void EmitPrefix(const X86EncodingMap* entry);
     void EmitOpcode(const X86EncodingMap* entry);
     void EmitPrefixAndOpcode(const X86EncodingMap* entry);
     void EmitDisp(uint8_t base, int disp);
+    void EmitModrmThread(uint8_t reg_or_opcode);
     void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp);
     void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp);
     void EmitImm(const X86EncodingMap* entry, int imm);
@@ -578,6 +580,8 @@
      */
     void AnalyzeDoubleUse(RegLocation rl_use);
 
+    bool Gen64Bit() const  { return gen64bit_; }
+
     // Information derived from analysis of MIR
 
     // The compiler temporary for the code address of the method.
@@ -606,6 +610,9 @@
 
     // Epilogue increment of stack pointer.
     LIR* stack_increment_;
+
+    // 64-bit mode
+    bool gen64bit_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 22e554e..aec39ab 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -49,8 +49,13 @@
     case Instruction::REM_FLOAT_2ADDR:
     case Instruction::REM_FLOAT:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturn(true);
       StoreValue(rl_dest, rl_result);
       return;
@@ -106,8 +111,13 @@
     case Instruction::REM_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE:
       FlushAllRegs();   // Send everything to home location
-      CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
-                                              false);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2,
+                                                false);
+      } else {
+        CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2,
+                                                false);
+      }
       rl_result = GetReturnWide(true);
       StoreValueWide(rl_dest, rl_result);
       return;
@@ -268,10 +278,18 @@
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
+      }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      } else {
+        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
+      }
       return;
     default:
       LOG(INFO) << "Unexpected opcode: " << opcode;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 368234e..fbb1785 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -909,8 +909,13 @@
       }
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    new_index, m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      new_index, m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -944,8 +949,13 @@
       // Load array length to kArg1.
       m2l_->OpRegMem(kOpMov, m2l_->TargetReg(kArg1), array_base_, len_offset_);
       m2l_->LoadConstant(m2l_->TargetReg(kArg0), index_);
-      m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
-                                    m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      } else {
+        m2l_->CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(4, pThrowArrayBounds),
+                                      m2l_->TargetReg(kArg0), m2l_->TargetReg(kArg1), true);
+      }
     }
 
    private:
@@ -1390,12 +1400,22 @@
 void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) {
   DCHECK_EQ(kX86_64, cu_->instruction_set);
   X86OpCode opcode = kX86Bkpt;
-  switch (op) {
-  case kOpCmp: opcode = kX86Cmp32RT;  break;
-  case kOpMov: opcode = kX86Mov32RT;  break;
-  default:
-    LOG(FATAL) << "Bad opcode: " << op;
-    break;
+  if (Gen64Bit() && r_dest.Is64BitSolo()) {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp64RT;  break;
+    case kOpMov: opcode = kX86Mov64RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op;
+      break;
+    }
+  } else {
+    switch (op) {
+    case kOpCmp: opcode = kX86Cmp32RT;  break;
+    case kOpMov: opcode = kX86Mov32RT;  break;
+    default:
+      LOG(FATAL) << "Bad opcode: " << op;
+      break;
+    }
   }
   NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value());
 }
@@ -1862,8 +1882,8 @@
 
   // If Method* is already in a register, we can save a copy.
   RegLocation rl_method = mir_graph_->GetMethodLoc();
-  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() +
-    (sizeof(mirror::Class*) * type_idx);
+  int32_t offset_of_type = mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() +
+    (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx);
 
   if (rl_method.location == kLocPhysReg) {
     if (use_declaring_class) {
@@ -1917,8 +1937,13 @@
   if (needs_access_check) {
     // Check we have access to type_idx and if not throw IllegalAccessError,
     // Caller function returns Class* in kArg0.
-    CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
-                         type_idx, true);
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    } else {
+      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess),
+                           type_idx, true);
+    }
     OpRegCopy(class_reg, TargetReg(kRet0));
     LoadValueDirectFixed(rl_src, TargetReg(kArg0));
   } else if (use_declaring_class) {
@@ -1931,14 +1956,18 @@
     LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(),
                  class_reg);
     int32_t offset_of_type =
-        mirror::Array::DataOffset(sizeof(mirror::Class*)).Int32Value() + (sizeof(mirror::Class*)
+        mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>)
         * type_idx);
     LoadRefDisp(class_reg, offset_of_type, class_reg);
     if (!can_assume_type_is_in_dex_cache) {
       // Need to test presence of type in dex cache at runtime.
       LIR* hop_branch = OpCmpImmBranch(kCondNe, class_reg, 0, NULL);
       // Type is not resolved. Call out to helper, which will return resolved type in kRet0/kArg0.
-      CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      if (Is64BitInstructionSet(cu_->instruction_set)) {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(8, pInitializeType), type_idx, true);
+      } else {
+        CallRuntimeHelperImm(QUICK_ENTRYPOINT_OFFSET(4, pInitializeType), type_idx, true);
+      }
       OpRegCopy(TargetReg(kArg2), TargetReg(kRet0));  // Align usage with fast path.
       LoadValueDirectFixed(rl_src, TargetReg(kArg0));  /* Reload Ref. */
       // Rejoin code paths
@@ -1972,7 +2001,11 @@
       branchover = OpCmpBranch(kCondEq, TargetReg(kArg1), TargetReg(kArg2), NULL);
     }
     OpRegCopy(TargetReg(kArg0), TargetReg(kArg2));
-    OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    if (Is64BitInstructionSet(cu_->instruction_set)) {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(8, pInstanceofNonTrivial));
+    } else {
+      OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pInstanceofNonTrivial));
+    }
   }
   // TODO: only clobber when type isn't final?
   ClobberCallerSave();
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 2db9845..6f93f1b 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -26,53 +26,117 @@
 
 namespace art {
 
-static const RegStorage core_regs_arr[] = {
-    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP, rs_rBP, rs_rSI, rs_rDI
+static const RegStorage core_regs_arr_32[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI,
+};
+static const RegStorage core_regs_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI,
 #ifdef TARGET_REX_SUPPORT
     rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15
 #endif
 };
-static const RegStorage sp_regs_arr[] = {
+static const RegStorage sp_regs_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_regs_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_regs_arr[] = {
+static const RegStorage dp_regs_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_regs_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
-static const RegStorage reserved_regs_arr[] = {rs_rX86_SP};
-static const RegStorage core_temps_arr[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
-static const RegStorage sp_temps_arr[] = {
+static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32};
+static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64};
+static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX};
+static const RegStorage core_temps_arr_64[] = {
+    rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI,
+#ifdef TARGET_REX_SUPPORT
+    rs_r8, rs_r9, rs_r10, rs_r11
+#endif
+};
+static const RegStorage sp_temps_arr_32[] = {
+    rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
+};
+static const RegStorage sp_temps_arr_64[] = {
     rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7,
 #ifdef TARGET_REX_SUPPORT
     rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15
 #endif
 };
-static const RegStorage dp_temps_arr[] = {
+static const RegStorage dp_temps_arr_32[] = {
+    rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
+};
+static const RegStorage dp_temps_arr_64[] = {
     rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7,
 #ifdef TARGET_REX_SUPPORT
     rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15
 #endif
 };
 
-static const std::vector<RegStorage> core_regs(core_regs_arr,
-    core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0]));
-static const std::vector<RegStorage> sp_regs(sp_regs_arr,
-    sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0]));
-static const std::vector<RegStorage> dp_regs(dp_regs_arr,
-    dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0]));
-static const std::vector<RegStorage> reserved_regs(reserved_regs_arr,
-    reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0]));
-static const std::vector<RegStorage> core_temps(core_temps_arr,
-    core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0]));
-static const std::vector<RegStorage> sp_temps(sp_temps_arr,
-    sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0]));
-static const std::vector<RegStorage> dp_temps(dp_temps_arr,
-    dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0]));
+static const std::vector<RegStorage> core_regs_32(core_regs_arr_32,
+    core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0]));
+static const std::vector<RegStorage> core_regs_64(core_regs_arr_64,
+    core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0]));
+static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32,
+    sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0]));
+static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64,
+    sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0]));
+static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32,
+    dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0]));
+static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64,
+    dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0]));
+static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32,
+    reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0]));
+static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64,
+    reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0]));
+static const std::vector<RegStorage> core_temps_32(core_temps_arr_32,
+    core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0]));
+static const std::vector<RegStorage> core_temps_64(core_temps_arr_64,
+    core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0]));
+static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32,
+    sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0]));
+static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64,
+    sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0]));
+static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32,
+    dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0]));
+static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64,
+    dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0]));
+
+RegStorage rs_rX86_SP;
+
+X86NativeRegisterPool rX86_ARG0;
+X86NativeRegisterPool rX86_ARG1;
+X86NativeRegisterPool rX86_ARG2;
+X86NativeRegisterPool rX86_ARG3;
+X86NativeRegisterPool rX86_FARG0;
+X86NativeRegisterPool rX86_FARG1;
+X86NativeRegisterPool rX86_FARG2;
+X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_RET0;
+X86NativeRegisterPool rX86_RET1;
+X86NativeRegisterPool rX86_INVOKE_TGT;
+X86NativeRegisterPool rX86_COUNT;
+
+RegStorage rs_rX86_ARG0;
+RegStorage rs_rX86_ARG1;
+RegStorage rs_rX86_ARG2;
+RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_FARG0;
+RegStorage rs_rX86_FARG1;
+RegStorage rs_rX86_FARG2;
+RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_RET0;
+RegStorage rs_rX86_RET1;
+RegStorage rs_rX86_INVOKE_TGT;
+RegStorage rs_rX86_COUNT;
 
 RegLocation X86Mir2Lir::LocCReturn() {
   return x86_loc_c_return;
@@ -472,8 +536,13 @@
 }
 
 void X86Mir2Lir::CompilerInitializeRegAlloc() {
-  reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs, sp_regs, dp_regs, reserved_regs,
-                                        core_temps, sp_temps, dp_temps);
+  if (Gen64Bit()) {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, sp_regs_64, dp_regs_64, reserved_regs_64,
+                                        core_temps_64, sp_temps_64, dp_temps_64);
+  } else {
+    reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, sp_regs_32, dp_regs_32, reserved_regs_32,
+                                        core_temps_32, sp_temps_32, dp_temps_32);
+  }
 
   // Target-specific adjustments.
 
@@ -517,11 +586,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset = frame_size_ - (4 * num_core_spills_);
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -532,11 +601,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum());
-  int offset = frame_size_ - (4 * num_core_spills_);
+  int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
       LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg));
-      offset += 4;
+      offset += GetInstructionSetPointerSize(cu_->instruction_set);
     }
   }
 }
@@ -560,13 +629,13 @@
   return RegClassBySize(size);
 }
 
-X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena)
+X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit)
     : Mir2Lir(cu, mir_graph, arena),
       base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false),
       method_address_insns_(arena, 100, kGrowableArrayMisc),
       class_type_address_insns_(arena, 100, kGrowableArrayMisc),
       call_method_insns_(arena, 100, kGrowableArrayMisc),
-      stack_decrement_(nullptr), stack_increment_(nullptr) {
+      stack_decrement_(nullptr), stack_increment_(nullptr), gen64bit_(gen64bit) {
   if (kIsDebugBuild) {
     for (int i = 0; i < kX86Last; i++) {
       if (X86Mir2Lir::EncodingMap[i].opcode != i) {
@@ -576,11 +645,56 @@
       }
     }
   }
+  if (Gen64Bit()) {
+    rs_rX86_SP = rs_rX86_SP_64;
+
+    rs_rX86_ARG0 = rs_rDI;
+    rs_rX86_ARG1 = rs_rSI;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rCX;
+    rX86_ARG0 = rDI;
+    rX86_ARG1 = rSI;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rCX;
+    // TODO: ARG4(r8), ARG5(r9), floating point args.
+  } else {
+    rs_rX86_SP = rs_rX86_SP_32;
+
+    rs_rX86_ARG0 = rs_rAX;
+    rs_rX86_ARG1 = rs_rCX;
+    rs_rX86_ARG2 = rs_rDX;
+    rs_rX86_ARG3 = rs_rBX;
+    rX86_ARG0 = rAX;
+    rX86_ARG1 = rCX;
+    rX86_ARG2 = rDX;
+    rX86_ARG3 = rBX;
+  }
+  rs_rX86_FARG0 = rs_rAX;
+  rs_rX86_FARG1 = rs_rCX;
+  rs_rX86_FARG2 = rs_rDX;
+  rs_rX86_FARG3 = rs_rBX;
+  rs_rX86_RET0 = rs_rAX;
+  rs_rX86_RET1 = rs_rDX;
+  rs_rX86_INVOKE_TGT = rs_rAX;
+  rs_rX86_COUNT = rs_rCX;
+  rX86_FARG0 = rAX;
+  rX86_FARG1 = rCX;
+  rX86_FARG2 = rDX;
+  rX86_FARG3 = rBX;
+  rX86_RET0 = rAX;
+  rX86_RET1 = rDX;
+  rX86_INVOKE_TGT = rAX;
+  rX86_COUNT = rCX;
 }
 
 Mir2Lir* X86CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
                           ArenaAllocator* const arena) {
-  return new X86Mir2Lir(cu, mir_graph, arena);
+  return new X86Mir2Lir(cu, mir_graph, arena, false);
+}
+
+Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_graph,
+                          ArenaAllocator* const arena) {
+  return new X86Mir2Lir(cu, mir_graph, arena, true);
 }
 
 // Not used in x86
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 1da4f17..fb85318 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -130,32 +130,42 @@
   X86OpCode opcode = kX86Bkpt;
   bool byte_imm = IS_SIMM8(value);
   DCHECK(!r_dest_src1.IsFloat());
-  switch (op) {
-    case kOpLsl: opcode = kX86Sal32RI; break;
-    case kOpLsr: opcode = kX86Shr32RI; break;
-    case kOpAsr: opcode = kX86Sar32RI; break;
-    case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
-    case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
-    case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
-    // case kOpSbb: opcode = kX86Sbb32RI; break;
-    case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
-    case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
-    case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
-    case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov:
-      /*
-       * Moving the constant zero into register can be specialized as an xor of the register.
-       * However, that sets eflags while the move does not. For that reason here, always do
-       * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
-       */
-      opcode = kX86Mov32RI;
-      break;
-    case kOpMul:
-      opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
-      return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
-    default:
-      LOG(FATAL) << "Bad case in OpRegImm " << op;
+  if (r_dest_src1.Is64Bit()) {
+    switch (op) {
+      case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
+    }
+  } else {
+    switch (op) {
+      case kOpLsl: opcode = kX86Sal32RI; break;
+      case kOpLsr: opcode = kX86Shr32RI; break;
+      case kOpAsr: opcode = kX86Sar32RI; break;
+      case kOpAdd: opcode = byte_imm ? kX86Add32RI8 : kX86Add32RI; break;
+      case kOpOr:  opcode = byte_imm ? kX86Or32RI8  : kX86Or32RI;  break;
+      case kOpAdc: opcode = byte_imm ? kX86Adc32RI8 : kX86Adc32RI; break;
+      // case kOpSbb: opcode = kX86Sbb32RI; break;
+      case kOpAnd: opcode = byte_imm ? kX86And32RI8 : kX86And32RI; break;
+      case kOpSub: opcode = byte_imm ? kX86Sub32RI8 : kX86Sub32RI; break;
+      case kOpXor: opcode = byte_imm ? kX86Xor32RI8 : kX86Xor32RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
+      case kOpMov:
+        /*
+         * Moving the constant zero into register can be specialized as an xor of the register.
+         * However, that sets eflags while the move does not. For that reason here, always do
+         * the move and if caller is flexible, they should be calling LoadConstantNoClobber instead.
+         */
+        opcode = kX86Mov32RI;
+        break;
+      case kOpMul:
+        opcode = byte_imm ? kX86Imul32RRI8 : kX86Imul32RRI;
+        return NewLIR3(opcode, r_dest_src1.GetReg(), r_dest_src1.GetReg(), value);
+      default:
+        LOG(FATAL) << "Bad case in OpRegImm " << op;
+    }
   }
+  CHECK(!r_dest_src1.Is64Bit() || X86Mir2Lir::EncodingMap[opcode].kind == kReg64Imm) << "OpRegImm(" << op << ")";
   return NewLIR2(opcode, r_dest_src1.GetReg(), value);
 }
 
@@ -464,7 +474,7 @@
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     r4sib_no_index /* index */, 0 /* scale */, value /* disp */);
+                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -578,6 +588,13 @@
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_dest.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:  // TODO: update for reference decompression on 64-bit targets.
@@ -689,10 +706,6 @@
 
 LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
                               OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return LoadBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_dest,
                              size);
 }
@@ -711,11 +724,23 @@
       if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
       } else {
-        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        if (Gen64Bit()) {
+          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        } else {
+          // TODO(64): pair = true;
+          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
+        }
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
       break;
+    case kWord:
+      if (Gen64Bit()) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
+        CHECK_EQ(is_array, false);
+        CHECK_EQ(r_src.IsFloat(), false);
+        break;
+      }  // else fall-through to k32 case
     case k32:
     case kSingle:
     case kReference:
@@ -785,10 +810,6 @@
 
 LIR* X86Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement,
                                RegStorage r_src, OpSize size) {
-  // TODO: base this on target.
-  if (size == kWord) {
-    size = k32;
-  }
   return StoreBaseIndexedDisp(r_base, RegStorage::InvalidReg(), 0, displacement, r_src, size);
 }
 
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 77d716f..c8c2542 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -124,9 +124,10 @@
   rDX            = r2,
   r3             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 3,
   rBX            = r3,
-  r4sp           = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
-  rX86_SP        = r4sp,
-  r4sib_no_index = r4sp,
+  r4sp_32        = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_32     = r4sp_32,
+  r4sp_64        = RegStorage::k64BitSolo | RegStorage::kCoreRegister | 4,
+  rX86_SP_64     = r4sp_64,
   r5             = RegStorage::k32BitSolo | RegStorage::kCoreRegister | 5,
   rBP            = r5,
   r5sib_no_base  = r5,
@@ -191,8 +192,9 @@
 constexpr RegStorage rs_rDX = rs_r2;
 constexpr RegStorage rs_r3(RegStorage::kValid | r3);
 constexpr RegStorage rs_rBX = rs_r3;
-constexpr RegStorage rs_r4sp(RegStorage::kValid | r4sp);
-constexpr RegStorage rs_rX86_SP = rs_r4sp;
+constexpr RegStorage rs_rX86_SP_64(RegStorage::kValid | r4sp_64);
+constexpr RegStorage rs_rX86_SP_32(RegStorage::kValid | r4sp_32);
+extern RegStorage rs_rX86_SP;
 constexpr RegStorage rs_r5(RegStorage::kValid | r5);
 constexpr RegStorage rs_rBP = rs_r5;
 constexpr RegStorage rs_r6(RegStorage::kValid | r6);
@@ -228,35 +230,31 @@
 constexpr RegStorage rs_qr6(RegStorage::kValid | qr6);
 constexpr RegStorage rs_qr7(RegStorage::kValid | qr7);
 
-// TODO: elminate these #defines?
-#define rX86_ARG0 rAX
-#define rs_rX86_ARG0 rs_rAX
-#define rX86_ARG1 rCX
-#define rs_rX86_ARG1 rs_rCX
-#define rX86_ARG2 rDX
-#define rs_rX86_ARG2 rs_rDX
-#define rX86_ARG3 rBX
-#define rs_rX86_ARG3 rs_rBX
-#define rX86_FARG0 rAX
-#define rs_rX86_FARG0 rs_rAX
-#define rX86_FARG1 rCX
-#define rs_rX86_FARG1 rs_rCX
-#define rX86_FARG2 rDX
-#define rs_rX86_FARG2 rs_rDX
-#define rX86_FARG3 rBX
-#define rs_rX86_FARG3 rs_rBX
-#define rX86_RET0 rAX
-#define rs_rX86_RET0 rs_rAX
-#define rX86_RET1 rDX
-#define rs_rX86_RET1 rs_rDX
-#define rX86_INVOKE_TGT rAX
-#define rs_rX86_INVOKE_TGT rs_rAX
-#define rX86_LR RegStorage::kInvalidRegVal
-#define rX86_SUSPEND RegStorage::kInvalidRegVal
-#define rX86_SELF RegStorage::kInvalidRegVal
-#define rX86_COUNT rCX
-#define rs_rX86_COUNT rs_rCX
-#define rX86_PC RegStorage::kInvalidRegVal
+extern X86NativeRegisterPool rX86_ARG0;
+extern X86NativeRegisterPool rX86_ARG1;
+extern X86NativeRegisterPool rX86_ARG2;
+extern X86NativeRegisterPool rX86_ARG3;
+extern X86NativeRegisterPool rX86_FARG0;
+extern X86NativeRegisterPool rX86_FARG1;
+extern X86NativeRegisterPool rX86_FARG2;
+extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_RET0;
+extern X86NativeRegisterPool rX86_RET1;
+extern X86NativeRegisterPool rX86_INVOKE_TGT;
+extern X86NativeRegisterPool rX86_COUNT;
+
+extern RegStorage rs_rX86_ARG0;
+extern RegStorage rs_rX86_ARG1;
+extern RegStorage rs_rX86_ARG2;
+extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_FARG0;
+extern RegStorage rs_rX86_FARG1;
+extern RegStorage rs_rX86_FARG2;
+extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_RET0;
+extern RegStorage rs_rX86_RET1;
+extern RegStorage rs_rX86_INVOKE_TGT;
+extern RegStorage rs_rX86_COUNT;
 
 // RegisterLocation templates return values (r_V0, or r_V0/r_V1).
 const RegLocation x86_loc_c_return
@@ -313,10 +311,10 @@
   opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \
   opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \
   opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \
-  opcode ## 32MR, opcode ## 32AR, opcode ## 32TR,  \
-  opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \
-  opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
-  opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
+  opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR,  \
+  opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \
+  opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \
+  opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8
   BinaryOpCode(kX86Add),
   BinaryOpCode(kX86Or),
   BinaryOpCode(kX86Adc),
@@ -335,9 +333,9 @@
   kX86Mov16MR, kX86Mov16AR, kX86Mov16TR,
   kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT,
   kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI,
-  kX86Mov32MR, kX86Mov32AR, kX86Mov32TR,
-  kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT,
-  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI,
+  kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR,
+  kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT,
+  kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI,
   kX86Lea32RM,
   kX86Lea32RA,
   // RRC - Register Register ConditionCode - cond_opcode reg1, reg2
@@ -479,11 +477,11 @@
   kNullary,                                // Opcode that takes no arguments.
   kPrefix2Nullary,                         // Opcode that takes no arguments, but 2 prefixes.
   kRegOpcode,                              // Shorter form of R instruction kind (opcode+rd)
-  kReg, kMem, kArray,                      // R, M and A instruction kinds.
-  kMemReg, kArrayReg, kThreadReg,          // MR, AR and TR instruction kinds.
-  kRegReg, kRegMem, kRegArray, kRegThread,  // RR, RM, RA and RT instruction kinds.
+  kReg, kReg64, kMem, kArray,              // R, M and A instruction kinds.
+  kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg,          // MR, AR and TR instruction kinds.
+  kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread,  // RR, RM, RA and RT instruction kinds.
   kRegRegStore,                            // RR following the store modrm reg-reg encoding rather than the load.
-  kRegImm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
+  kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm,  // RI, MI, AI and TI instruction kinds.
   kRegRegImm, kRegMemImm, kRegArrayImm,    // RRI, RMI and RAI instruction kinds.
   kMovRegImm,                              // Shorter form move RI.
   kRegRegImmRev,                           // RRI with first reg in r/m
@@ -532,6 +530,11 @@
 
 // Segment override instruction prefix used for quick TLS access to Thread::Current().
 #define THREAD_PREFIX 0x64
+#define THREAD_PREFIX_GS 0x65
+
+// 64 Bit Operand Size
+#define REX_W 0x48
+// Extension of the ModR/M reg field
 
 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127))
 #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767))
diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h
index 979f516..3387c50 100644
--- a/compiler/dex/reg_storage.h
+++ b/compiler/dex/reg_storage.h
@@ -137,6 +137,10 @@
     return ((reg_ & k64BitMask) == k64Bits);
   }
 
+  bool Is64BitSolo() const {
+    return ((reg_ & kShapeMask) == k64BitSolo);
+  }
+
   bool IsPair() const {
     return ((reg_ & kShapeMask) == k64BitPair);
   }