Merge "art: rename aarch64 target to arm64"
diff --git a/build/Android.common.mk b/build/Android.common.mk
index 415d810..0f4ade3 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -182,6 +182,9 @@
 # Make host builds easier to debug and profile by not omitting the frame pointer.
 ART_HOST_CFLAGS += -fno-omit-frame-pointer
 
+# Workaround differences in inttypes.h.
+ART_HOST_CFLAGS += -D__STDC_FORMAT_MACROS=1
+
 # To use oprofile_android --callgraph, uncomment this and recompile with "mmm art -B -j16"
 # ART_TARGET_CFLAGS += -fno-omit-frame-pointer -marm -mapcs
 
diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc
index 75883b7..9e83210 100644
--- a/compiler/dex/local_value_numbering.cc
+++ b/compiler/dex/local_value_numbering.cc
@@ -380,9 +380,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Establish value number for loaded register. Note use of memory version.
@@ -421,9 +418,6 @@
           }
           mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK;
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         // Use side effect to note range check completed.
         (void)LookupValue(ARRAY_REF, array, index, NO_VALUE);
         // Rev the memory version
@@ -447,9 +441,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         uint16_t memory_version = GetMemoryVersion(base, field_ref);
         if (opcode == Instruction::IGET_WIDE) {
@@ -479,9 +470,6 @@
         } else {
           null_checked_.insert(base);
         }
-        if (mir->meta.throw_insn != NULL) {
-          mir->meta.throw_insn->optimization_flags |= mir->optimization_flags;
-        }
         uint16_t field_ref = mir->dalvikInsn.vC;
         AdvanceMemoryVersion(base, field_ref);
       }
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index 8d1653f..856ae52 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -126,9 +126,6 @@
   bottom_block->terminated_by_return = orig_block->terminated_by_return;
   orig_block->terminated_by_return = false;
 
-  /* Add it to the quick lookup cache */
-  dex_pc_to_block_map_.Put(bottom_block->start_offset, bottom_block->id);
-
   /* Handle the taken path */
   bottom_block->taken = orig_block->taken;
   if (bottom_block->taken != NullBasicBlockId) {
@@ -177,19 +174,29 @@
   }
 
   // Associate dex instructions in the bottom block with the new container.
-  MIR* p = bottom_block->first_mir_insn;
-  while (p != NULL) {
+  DCHECK(insn != nullptr);
+  DCHECK(insn != orig_block->first_mir_insn);
+  DCHECK(insn == bottom_block->first_mir_insn);
+  DCHECK_EQ(insn->offset, bottom_block->start_offset);
+  DCHECK(static_cast<int>(insn->dalvikInsn.opcode) == kMirOpCheck ||
+         !IsPseudoMirOp(insn->dalvikInsn.opcode));
+  DCHECK_EQ(dex_pc_to_block_map_.Get(insn->offset), orig_block->id);
+  MIR* p = insn;
+  dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
+  while (p != bottom_block->last_mir_insn) {
+    p = p->next;
+    DCHECK(p != nullptr);
     int opcode = p->dalvikInsn.opcode;
     /*
      * Some messiness here to ensure that we only enter real opcodes and only the
      * first half of a potentially throwing instruction that has been split into
-     * CHECK and work portions.  The 2nd half of a split operation will have a non-null
-     * throw_insn pointer that refers to the 1st half.
+     * CHECK and work portions. Since the 2nd half of a split operation is always
+     * the first in a BasicBlock, we can't hit it here.
      */
-    if ((opcode == kMirOpCheck) || (!IsPseudoMirOp(opcode) && (p->meta.throw_insn == NULL))) {
+    if ((opcode == kMirOpCheck) || !IsPseudoMirOp(opcode)) {
+      DCHECK_EQ(dex_pc_to_block_map_.Get(p->offset), orig_block->id);
       dex_pc_to_block_map_.Put(p->offset, bottom_block->id);
     }
-    p = (p == bottom_block->last_mir_insn) ? NULL : p->next;
   }
 
   return bottom_block;
@@ -508,7 +515,6 @@
       static_cast<Instruction::Code>(kMirOpCheck);
   // Associate the two halves
   insn->meta.throw_insn = new_insn;
-  new_insn->meta.throw_insn = insn;
   AppendMIR(new_block, new_insn);
   return new_block;
 }
diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h
index b68e699..4666d1e 100644
--- a/compiler/dex/mir_graph.h
+++ b/compiler/dex/mir_graph.h
@@ -253,8 +253,10 @@
   union {
     // Incoming edges for phi node.
     BasicBlockId* phi_incoming;
-    // Establish link between two halves of throwing instructions.
+    // Establish link from check instruction (kMirOpCheck) to the actual throwing instruction.
     MIR* throw_insn;
+    // Fused cmp branch condition.
+    ConditionCode ccode;
   } meta;
 };
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index ee9f28e..5c41520 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -259,7 +259,7 @@
             if ((ccode != kCondNv) &&
                 (mir->ssa_rep->defs[0] == mir_next->ssa_rep->uses[0]) &&
                 (GetSSAUseCount(mir->ssa_rep->defs[0]) == 1)) {
-              mir_next->dalvikInsn.arg[0] = ccode;
+              mir_next->meta.ccode = ccode;
               switch (opcode) {
                 case Instruction::CMPL_FLOAT:
                   mir_next->dalvikInsn.opcode =
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 2bc579a..3668dc0 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 #define ART_COMPILER_DEX_QUICK_ARM_CODEGEN_ARM_H_
 
+#include "arm_lir.h"
 #include "dex/compiler_internals.h"
 
 namespace art {
@@ -94,9 +95,9 @@
                      RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                           RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +111,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -195,6 +196,9 @@
     void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void InsertFixupBefore(LIR* prev_lir, LIR* orig_lir, LIR* new_lir);
     void AssignDataOffsets();
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 1a9d9c5..46542e1 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -209,7 +209,7 @@
     NewLIR2(kThumb2Vcmps, rl_src1.low_reg, rl_src2.low_reg);
   }
   NewLIR0(kThumb2Fmstat);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
     case kCondNe:
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 86ae75e..71c3492 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -228,7 +228,7 @@
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
   // Normalize such that if either operand is constant, src2 will be constant.
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
     ccode = FlipComparisonOrder(ccode);
@@ -444,6 +444,17 @@
   return NULL;
 }
 
+RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
+  return rl_dest;
+}
+
+RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
+  return rl_dest;
+}
+
 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
                                      bool is_div) {
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -795,8 +806,8 @@
   return (abs(mir_graph_->SRegToVReg(rl_src.s_reg_low) - mir_graph_->SRegToVReg(rl_dest.s_reg_low)) == 1);
 }
 
-void ArmMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
     /*
      * To pull off inline multiply, we have a worst-case requirement of 8 temporary
      * registers.  Normally for Arm, we get 5.  We can get to 6 by including
@@ -868,27 +879,27 @@
     UnmarkTemp(rARM_LR);
 }
 
-void ArmMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAddLong for Arm";
 }
 
-void ArmMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenSubLong for Arm";
 }
 
-void ArmMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Arm";
 }
 
-void ArmMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Arm";
 }
 
-void ArmMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+void ArmMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of genXoLong for Arm";
 }
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 7591041..ceec7d5 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -14,10 +14,12 @@
  * limitations under the License.
  */
 
+#include "codegen_arm.h"
+
+#include <inttypes.h>
+
 #include <string>
 
-#include "arm_lir.h"
-#include "codegen_arm.h"
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 
@@ -407,9 +409,8 @@
              strcpy(tbuf, cc_names[operand]);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 +
-                 (operand << 1),
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
                  lir->target);
              break;
            case 'u': {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 2ce7ecd..12ecfff 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -34,7 +34,7 @@
 void DumpMappingTable(const char* table_name, const char* descriptor, const char* name,
                       const Signature& signature, uint32_t size, It first) {
   if (size != 0) {
-    std::string line(StringPrintf("\n  %s %s%s_%s_table[%zu] = {", table_name,
+    std::string line(StringPrintf("\n  %s %s%s_%s_table[%u] = {", table_name,
                      descriptor, name, signature.ToString().c_str(), size));
     std::replace(line.begin(), line.end(), ';', '_');
     LOG(INFO) << line;
@@ -234,8 +234,8 @@
                                                lir, base_addr));
         std::string op_operands(BuildInsnString(GetTargetInstFmt(lir->opcode),
                                                     lir, base_addr));
-        LOG(INFO) << StringPrintf("%05x: %-9s%s%s",
-                                  reinterpret_cast<unsigned int>(base_addr + offset),
+        LOG(INFO) << StringPrintf("%5p: %-9s%s%s",
+                                  base_addr + offset,
                                   op_name.c_str(), op_operands.c_str(),
                                   lir->flags.is_nop ? "(nop)" : "");
       }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index daf21df..1f00b2a 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1382,6 +1382,9 @@
       }
       rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
       done = true;
+    } else if (cu_->instruction_set == kX86) {
+      rl_result = GenDivRem(rl_dest, rl_src1, rl_src2, op == kOpDiv, check_zero);
+      done = true;
     } else if (cu_->instruction_set == kThumb2) {
       if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
         // Use ARM SDIV instruction for division.  For remainder we also need to
@@ -1650,6 +1653,9 @@
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
         done = true;
+      } else if (cu_->instruction_set == kX86) {
+        rl_result = GenDivRemLit(rl_dest, rl_src, lit, is_div);
+        done = true;
       } else if (cu_->instruction_set == kThumb2) {
         if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
           // Use ARM SDIV instruction for division.  For remainder we also need to
@@ -1718,7 +1724,7 @@
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenAddLong(rl_dest, rl_src1, rl_src2);
+        GenAddLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpAdd;
@@ -1727,7 +1733,7 @@
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (cu_->instruction_set != kThumb2) {
-        GenSubLong(rl_dest, rl_src1, rl_src2);
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpSub;
@@ -1736,7 +1742,7 @@
     case Instruction::MUL_LONG:
     case Instruction::MUL_LONG_2ADDR:
       if (cu_->instruction_set == kThumb2) {
-        GenMulLong(rl_dest, rl_src1, rl_src2);
+        GenMulLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       } else {
         call_out = true;
@@ -1762,7 +1768,7 @@
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (cu_->instruction_set == kX86) {
-        return GenAndLong(rl_dest, rl_src1, rl_src2);
+        return GenAndLong(opcode, rl_dest, rl_src1, rl_src2);
       }
       first_op = kOpAnd;
       second_op = kOpAnd;
@@ -1770,7 +1776,7 @@
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenOrLong(rl_dest, rl_src1, rl_src2);
+        GenOrLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpOr;
@@ -1779,7 +1785,7 @@
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (cu_->instruction_set == kX86) {
-        GenXorLong(rl_dest, rl_src1, rl_src2);
+        GenXorLong(opcode, rl_dest, rl_src1, rl_src2);
         return;
       }
       first_op = kOpXor;
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 8f2f6ad..65582dd 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -294,6 +294,53 @@
   }
 }
 
+void Mir2Lir::StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src) {
+  DCHECK_EQ(IsFpReg(rl_src.low_reg), IsFpReg(rl_src.high_reg));
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_src.wide);
+  DCHECK_EQ(rl_src.location, kLocPhysReg);
+
+  if (rl_dest.location == kLocPhysReg) {
+    OpRegCopyWide(rl_dest.low_reg, rl_dest.high_reg, rl_src.low_reg, rl_src.high_reg);
+  } else {
+    // Just re-assign the registers.  Dest gets Src's regs.
+    rl_dest.low_reg = rl_src.low_reg;
+    rl_dest.high_reg = rl_src.high_reg;
+    rl_dest.location = kLocPhysReg;
+    Clobber(rl_src.low_reg);
+    Clobber(rl_src.high_reg);
+  }
+
+  // Dest is now live and dirty (until/if we flush it to home location).
+  MarkLive(rl_dest.low_reg, rl_dest.s_reg_low);
+
+  // Does this wide value live in two registers (or one vector one)?
+  if (rl_dest.low_reg != rl_dest.high_reg) {
+    MarkLive(rl_dest.high_reg, GetSRegHi(rl_dest.s_reg_low));
+    MarkDirty(rl_dest);
+    MarkPair(rl_dest.low_reg, rl_dest.high_reg);
+  } else {
+    // This must be an x86 vector register value,
+    DCHECK(IsFpReg(rl_dest.low_reg) && (cu_->instruction_set == kX86));
+    MarkDirty(rl_dest);
+  }
+
+  ResetDefLocWide(rl_dest);
+  if ((IsDirty(rl_dest.low_reg) ||
+      IsDirty(rl_dest.high_reg)) &&
+      (oat_live_out(rl_dest.s_reg_low) ||
+      oat_live_out(GetSRegHi(rl_dest.s_reg_low)))) {
+    LIR *def_start = last_lir_insn_;
+    DCHECK_EQ((mir_graph_->SRegToVReg(rl_dest.s_reg_low)+1),
+              mir_graph_->SRegToVReg(GetSRegHi(rl_dest.s_reg_low)));
+    StoreBaseDispWide(TargetReg(kSp), SRegOffset(rl_dest.s_reg_low),
+                      rl_dest.low_reg, rl_dest.high_reg);
+    MarkClean(rl_dest);
+    LIR *def_end = last_lir_insn_;
+    MarkDefWide(rl_dest, def_start, def_end);
+  }
+}
+
 /* Utilities to load the current Method* */
 void Mir2Lir::LoadCurrMethodDirect(int r_tgt) {
   LoadValueDirectFixed(mir_graph_->GetMethodLoc(), r_tgt);
@@ -303,4 +350,47 @@
   return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg);
 }
 
+RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
+  DCHECK(!loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
+RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
+  DCHECK(loc.wide);
+  DCHECK(loc.location == kLocPhysReg);
+  DCHECK(!IsFpReg(loc.low_reg));
+  DCHECK(!IsFpReg(loc.high_reg));
+  if (IsTemp(loc.low_reg)) {
+    Clobber(loc.low_reg);
+  } else {
+    int temp_low = AllocTemp();
+    OpRegCopy(temp_low, loc.low_reg);
+    loc.low_reg = temp_low;
+  }
+  if (IsTemp(loc.high_reg)) {
+    Clobber(loc.high_reg);
+  } else {
+    int temp_high = AllocTemp();
+    OpRegCopy(temp_high, loc.high_reg);
+    loc.high_reg = temp_high;
+  }
+
+  // Ensure that this doesn't represent the original SR any more.
+  loc.s_reg_low = INVALID_SREG;
+  return loc;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index a5a14d5..aca93f5 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     RegLocation GenDivRem(RegLocation rl_dest, int reg_lo, int reg_hi, bool is_div);
@@ -175,6 +175,9 @@
 
   private:
     void ConvertShortToLongBranch(LIR* lir);
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                          RegLocation rl_src2, bool is_div, bool check_zero);
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/mips/int_mips.cc b/compiler/dex/quick/mips/int_mips.cc
index 180d56c..013041a 100644
--- a/compiler/dex/quick/mips/int_mips.cc
+++ b/compiler/dex/quick/mips/int_mips.cc
@@ -250,6 +250,17 @@
   return rl_result;
 }
 
+RegLocation MipsMir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, bool is_div, bool check_zero) {
+  LOG(FATAL) << "Unexpected use of GenDivRem for Mips";
+  return rl_dest;
+}
+
+RegLocation MipsMir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit, bool is_div) {
+  LOG(FATAL) << "Unexpected use of GenDivRemLit for Mips";
+  return rl_dest;
+}
+
 void MipsMir2Lir::OpLea(int rBase, int reg1, int reg2, int scale, int offset) {
   LOG(FATAL) << "Unexpected use of OpLea for Arm";
 }
@@ -356,13 +367,13 @@
   return NULL;
 }
 
-void MipsMir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenMulLong for Mips";
 }
 
-void MipsMir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -383,8 +394,8 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
   rl_src2 = LoadValueWide(rl_src2, kCoreReg);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
@@ -425,18 +436,19 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
-void MipsMir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+void MipsMir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1,
                              RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenAndLong for Mips";
 }
 
-void MipsMir2Lir::GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
+void MipsMir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenOrLong for Mips";
 }
 
-void MipsMir2Lir::GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
-                             RegLocation rl_src2) {
+void MipsMir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                             RegLocation rl_src1, RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenXorLong for Mips";
 }
 
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index 1aee06c..b744adc 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -15,12 +15,15 @@
  */
 
 #include "codegen_mips.h"
+
+#include <inttypes.h>
+
+#include <string>
+
 #include "dex/compiler_internals.h"
 #include "dex/quick/mir_to_lir-inl.h"
 #include "mips_lir.h"
 
-#include <string>
-
 namespace art {
 
 static int core_regs[] = {r_ZERO, r_AT, r_V0, r_V1, r_A0, r_A1, r_A2, r_A3,
@@ -203,9 +206,9 @@
              snprintf(tbuf, arraysize(tbuf), "%d", operand*2);
              break;
            case 't':
-             snprintf(tbuf, arraysize(tbuf), "0x%08x (L%p)",
-                      reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 2),
-                      lir->target);
+             snprintf(tbuf, arraysize(tbuf), "0x%08" PRIxPTR " (L%p)",
+                 reinterpret_cast<uintptr_t>(base_addr) + lir->offset + 4 + (operand << 1),
+                 lir->target);
              break;
            case 'T':
              snprintf(tbuf, arraysize(tbuf), "0x%08x", operand << 2);
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 6281eff..94db134 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -762,6 +762,8 @@
       // Combine check and work halves of throwing instruction.
       MIR* work_half = mir->meta.throw_insn;
       mir->dalvikInsn.opcode = work_half->dalvikInsn.opcode;
+      mir->optimization_flags = work_half->optimization_flags;
+      mir->meta = work_half->meta;  // Whatever the work_half had, we need to copy it.
       opcode = work_half->dalvikInsn.opcode;
       SSARepresentation* ssa_rep = work_half->ssa_rep;
       work_half->ssa_rep = mir->ssa_rep;
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index f9d9e9e..b59ec5e 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -628,6 +628,18 @@
      */
     void StoreValueWide(RegLocation rl_dest, RegLocation rl_src);
 
+    /**
+     * @brief Used to do the final store in a wide destination as per bytecode semantics.
+     * @see StoreValueWide
+     * @param rl_dest The destination dalvik register location.
+     * @param rl_src The source register location. It must be kLocPhysReg
+     *
+     * This is used for x86 two operand computations, where we have computed the correct
+     * register values that now need to be properly registered.  This is used to avoid an
+     * extra pair of register copies that would result if StoreValueWide was called.
+     */
+    void StoreFinalValueWide(RegLocation rl_dest, RegLocation rl_src);
+
     // Shared by all targets - implemented in mir_to_lir.cc.
     void CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list);
     void HandleExtendedMethodMIR(BasicBlock* bb, MIR* mir);
@@ -696,11 +708,14 @@
     // Required for target - Dalvik-level generators.
     virtual void GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                    RegLocation rl_src1, RegLocation rl_src2) = 0;
-    virtual void GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenMulLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAddLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenAndLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual void GenArithOpDouble(Instruction::Code opcode,
                                   RegLocation rl_dest, RegLocation rl_src1,
@@ -728,11 +743,14 @@
     virtual bool GenInlinedPeek(CallInfo* info, OpSize size) = 0;
     virtual bool GenInlinedPoke(CallInfo* info, OpSize size) = 0;
     virtual void GenNegLong(RegLocation rl_dest, RegLocation rl_src) = 0;
-    virtual void GenOrLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenOrLong(Instruction::Code,
+                           RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2) = 0;
-    virtual void GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenSubLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
-    virtual void GenXorLong(RegLocation rl_dest, RegLocation rl_src1,
+    virtual void GenXorLong(Instruction::Code,
+                            RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
     virtual LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base,
                                 int offset, ThrowKind kind) = 0;
@@ -740,6 +758,25 @@
                                   bool is_div) = 0;
     virtual RegLocation GenDivRemLit(RegLocation rl_dest, int reg_lo, int lit,
                                      bool is_div) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    virtual RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) = 0;
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    virtual RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1,
+                                     int lit, bool is_div) = 0;
     virtual void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) = 0;
 
@@ -837,6 +874,20 @@
       return cu_;
     }
 
+    /*
+     * @brief Force a location (in a register) into a temporary register
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTemp(RegLocation loc);
+
+    /*
+     * @brief Force a wide location (in registers) into temporary registers
+     * @param loc location of result
+     * @returns update location
+     */
+    RegLocation ForceTempWide(RegLocation loc);
+
   private:
     void GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest,
                             RegLocation rl_src);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1dcff65..5e1c4d1 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -242,12 +242,13 @@
   UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0,           R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
   UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""),
 
-  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
-  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
-  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kRegRegReg, IS_UNARY_OP | REG_USE0, DaM, kRegRegMem, IS_BINARY_OP | REG_USE0, DaA, kRegRegArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Mul,     0x4, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Imul,    0x5, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEA,  REG_DEFAD_USEA,  "ax,al,", "dx:ax,ax,", "edx:eax,eax,"),
+  UNARY_ENCODING_MAP(Divmod,  0x6, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
+  UNARY_ENCODING_MAP(Idivmod, 0x7, 0, SETS_CCODES, DaR, kReg, IS_UNARY_OP | REG_USE0, DaM, kMem, IS_BINARY_OP | REG_USE0, DaA, kArray, IS_QUAD_OP | REG_USE01, 0, REG_DEFA_USEA, REG_DEFAD_USEAD, REG_DEFAD_USEAD, "ah:al,ax,", "dx:ax,dx:ax,", "edx:eax,edx:eax,"),
 #undef UNARY_ENCODING_MAP
 
+  { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 816f2d0..9cc4efd 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -94,9 +94,9 @@
                      RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark);
     void GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                            RegLocation rl_src1, RegLocation rl_shift);
-    void GenMulLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAddLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenAndLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAddLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenAndLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpDouble(Instruction::Code opcode, RegLocation rl_dest,
                                   RegLocation rl_src1, RegLocation rl_src2);
     void GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest,
@@ -110,9 +110,9 @@
     bool GenInlinedPeek(CallInfo* info, OpSize size);
     bool GenInlinedPoke(CallInfo* info, OpSize size);
     void GenNegLong(RegLocation rl_dest, RegLocation rl_src);
-    void GenOrLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenSubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
-    void GenXorLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenOrLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenSubLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
+    void GenXorLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     LIR* GenRegMemCheck(ConditionCode c_code, int reg1, int base, int offset,
                                 ThrowKind kind);
     LIR* GenMemImmedCheck(ConditionCode c_code, int base, int offset, int check_value,
@@ -136,6 +136,49 @@
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& special);
+    /*
+     * @brief Generate a two address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    /*
+     * @brief Generate a three address long operation with a constant value
+     * @param rl_dest location of result
+     * @param rl_src1 source operand
+     * @param rl_src2 constant source operand
+     * @param op Opcode to be generated
+     */
+    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                        RegLocation rl_src2, Instruction::Code op);
+
+    /**
+      * @brief Generate a long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src1 First operand.
+      * @param rl_src2 Second operand.
+      * @param op The DEX opcode for the operation.
+      * @param is_commutative The sources can be swapped if needed.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                      RegLocation rl_src2, Instruction::Code op, bool is_commutative);
+
+    /**
+      * @brief Generate a two operand long arithmetic operation.
+      * @param rl_dest The destination.
+      * @param rl_src Second operand.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+
+    /**
+      * @brief Generate a long operation.
+      * @param rl_dest The destination.  Must be in a register
+      * @param rl_src The other operand.  May be in a register or in memory.
+      * @param op The DEX opcode for the operation.
+      */
+    void GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
 
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
@@ -230,6 +273,70 @@
                                   int64_t val, ConditionCode ccode);
     void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg);
     void GenConstWide(RegLocation rl_dest, int64_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param loc Register location of the operand
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @param value Immediate value for the operation.  Used for byte variants
+     * @returns the correct x86 opcode to perform the operation
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, int32_t value);
+
+    /*
+     * @brief Return the correct x86 opcode for the Dex operation
+     * @param op Dex opcode for the operation
+     * @param dest location of the destination.  May be register or memory.
+     * @param rhs Location for the rhs of the operation.  May be in register or memory.
+     * @param is_high_op 'true' if this is an operation on the high word
+     * @returns the correct x86 opcode to perform the operation
+     * @note at most one location may refer to memory
+     */
+    X86OpCode GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                        bool is_high_op);
+
+    /*
+     * @brief Is this operation a no-op for this opcode and value
+     * @param op Dex opcode for the operation
+     * @param value Immediate value for the operation.
+     * @returns 'true' if the operation will have no effect
+     */
+    bool IsNoOp(Instruction::Code op, int32_t value);
+
+    /*
+     * @brief Dump a RegLocation using printf
+     * @param loc Register location to dump
+     */
+    static void DumpRegLocation(RegLocation loc);
+
+    /**
+     * @brief Calculate magic number and shift for a given divisor
+     * @param divisor divisor number for calculation
+     * @param magic hold calculated magic number
+     * @param shift hold calculated shift
+     */
+    void CalculateMagicAndShift(int divisor, int& magic, int& shift);
+
+    /*
+     * @brief Generate an integer div or rem operation.
+     * @param rl_dest Destination Location.
+     * @param rl_src1 Numerator Location.
+     * @param rl_src2 Divisor Location.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     * @param check_zero 'true' if an exception should be generated if the divisor is 0.
+     */
+    RegLocation GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero);
+
+    /*
+     * @brief Generate an integer div or rem operation by a literal.
+     * @param rl_dest Destination Location.
+     * @param rl_src Numerator Location.
+     * @param lit Divisor.
+     * @param is_div 'true' if this is a division, 'false' for a remainder.
+     */
+    RegLocation GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, int lit, bool is_div);
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 6272498..006fe76 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -303,7 +303,7 @@
     rl_src2 = LoadValue(rl_src2, kFPReg);
     NewLIR2(kX86UcomissRR, rl_src1.low_reg, rl_src2.low_reg);
   }
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
   switch (ccode) {
     case kCondEq:
       if (!gt_bias) {
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 01479a9..ccae130 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -187,7 +187,7 @@
   LIR* taken = &block_label_list_[bb->taken];
   RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
   RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
-  ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+  ConditionCode ccode = mir->meta.ccode;
 
   if (rl_src1.is_const) {
     std::swap(rl_src1, rl_src2);
@@ -284,18 +284,261 @@
   OpCmpImmBranch(ccode, low_reg, val_lo, taken);
 }
 
+void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
+  // It does not make sense to calculate magic and shift for zero divisor.
+  DCHECK_NE(divisor, 0);
+
+  /* According to H.S.Warren's Hacker's Delight Chapter 10 and
+   * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+   * The magic number M and shift S can be calculated in the following way:
+   * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+   * where divisor(d) >=2.
+   * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+   * where divisor(d) <= -2.
+   * Thus nc can be calculated like:
+   * nc = 2^31 + 2^31 % d - 1, where d >= 2
+   * nc = -2^31 + (2^31 + 1) % d, where d >= 2.
+   *
+   * So the shift p is the smallest p satisfying
+   * 2^p > nc * (d - 2^p % d), where d >= 2
+   * 2^p > nc * (d + 2^p % d), where d <= -2.
+   *
+   * the magic number M is calcuated by
+   * M = (2^p + d - 2^p % d) / d, where d >= 2
+   * M = (2^p - d - 2^p % d) / d, where d <= -2.
+   *
+   * Notice that p is always bigger than or equal to 32, so we just return 32-p as
+   * the shift number S.
+   */
+
+  int32_t p = 31;
+  const uint32_t two31 = 0x80000000U;
+
+  // Initialize the computations.
+  uint32_t abs_d = (divisor >= 0) ? divisor : -divisor;
+  uint32_t tmp = two31 + (static_cast<uint32_t>(divisor) >> 31);
+  uint32_t abs_nc = tmp - 1 - tmp % abs_d;
+  uint32_t quotient1 = two31 / abs_nc;
+  uint32_t remainder1 = two31 % abs_nc;
+  uint32_t quotient2 = two31 / abs_d;
+  uint32_t remainder2 = two31 % abs_d;
+
+  /*
+   * To avoid handling both positive and negative divisor, Hacker's Delight
+   * introduces a method to handle these 2 cases together to avoid duplication.
+   */
+  uint32_t delta;
+  do {
+    p++;
+    quotient1 = 2 * quotient1;
+    remainder1 = 2 * remainder1;
+    if (remainder1 >= abs_nc) {
+      quotient1++;
+      remainder1 = remainder1 - abs_nc;
+    }
+    quotient2 = 2 * quotient2;
+    remainder2 = 2 * remainder2;
+    if (remainder2 >= abs_d) {
+      quotient2++;
+      remainder2 = remainder2 - abs_d;
+    }
+    delta = abs_d - remainder2;
+  } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+  magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+  shift = p - 32;
+}
+
 RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, int reg_lo,
                                      int lit, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRemLit for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src,
+                                     int imm, bool is_div) {
+  // Use a multiply (and fixup) to perform an int div/rem by a constant.
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Assume that the result will be in EDX.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r2, INVALID_REG, INVALID_SREG, INVALID_SREG};
+
+  // handle 0x80000000 / -1 special case.
+  LIR *minint_branch = 0;
+  if (imm == -1) {
+    if (is_div) {
+      LoadValueDirectFixed(rl_src, r0);
+      OpRegImm(kOpCmp, r0, 0x80000000);
+      minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq);
+
+      // for x != MIN_INT, x / -1 == -x.
+      NewLIR1(kX86Neg32R, r0);
+
+      LIR* branch_around = NewLIR1(kX86Jmp8, 0);
+      // The target for cmp/jmp above.
+      minint_branch->target = NewLIR0(kPseudoTargetLabel);
+      // EAX already contains the right value (0x80000000),
+      branch_around->target = NewLIR0(kPseudoTargetLabel);
+    } else {
+      // x % -1 == 0.
+      LoadConstantNoClobber(r0, 0);
+    }
+    // For this case, return the result in EAX.
+    rl_result.low_reg = r0;
+  } else {
+    DCHECK(imm <= -2 || imm >= 2);
+    // Use H.S.Warren's Hacker's Delight Chapter 10 and
+    // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication.
+    int magic, shift;
+    CalculateMagicAndShift(imm, magic, shift);
+
+    /*
+     * For imm >= 2,
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0.
+     * For imm <= -2,
+     *     int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0
+     *     int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0.
+     * We implement this algorithm in the following way:
+     * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX
+     * 2. if imm > 0 and magic < 0, add numerator to EDX
+     *    if imm < 0 and magic > 0, sub numerator from EDX
+     * 3. if S !=0, SAR S bits for EDX
+     * 4. add 1 to EDX if EDX < 0
+     * 5. Thus, EDX is the quotient
+     */
+
+    // Numerator into EAX.
+    int numerator_reg = -1;
+    if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) {
+      // We will need the value later.
+      if (rl_src.location == kLocPhysReg) {
+        // We can use it directly.
+        DCHECK(rl_src.low_reg != r0 && rl_src.low_reg != r2);
+        numerator_reg = rl_src.low_reg;
+      } else {
+        LoadValueDirectFixed(rl_src, r1);
+        numerator_reg = r1;
+      }
+      OpRegCopy(r0, numerator_reg);
+    } else {
+      // Only need this once.  Just put it into EAX.
+      LoadValueDirectFixed(rl_src, r0);
+    }
+
+    // EDX = magic.
+    LoadConstantNoClobber(r2, magic);
+
+    // EDX:EAX = magic & dividend.
+    NewLIR1(kX86Imul32DaR, r2);
+
+    if (imm > 0 && magic < 0) {
+      // Add numerator to EDX.
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Add32RR, r2, numerator_reg);
+    } else if (imm < 0 && magic > 0) {
+      DCHECK_NE(numerator_reg, -1);
+      NewLIR2(kX86Sub32RR, r2, numerator_reg);
+    }
+
+    // Do we need the shift?
+    if (shift != 0) {
+      // Shift EDX by 'shift' bits.
+      NewLIR2(kX86Sar32RI, r2, shift);
+    }
+
+    // Add 1 to EDX if EDX < 0.
+
+    // Move EDX to EAX.
+    OpRegCopy(r0, r2);
+
+    // Move sign bit to bit 0, zeroing the rest.
+    NewLIR2(kX86Shr32RI, r2, 31);
+
+    // EDX = EDX + EAX.
+    NewLIR2(kX86Add32RR, r2, r0);
+
+    // Quotient is in EDX.
+    if (!is_div) {
+      // We need to compute the remainder.
+      // Remainder is divisor - (quotient * imm).
+      DCHECK_NE(numerator_reg, -1);
+      OpRegCopy(r0, numerator_reg);
+
+      // EAX = numerator * imm.
+      OpRegRegImm(kOpMul, r2, r2, imm);
+
+      // EDX -= EAX.
+      NewLIR2(kX86Sub32RR, r0, r2);
+
+      // For this case, return the result in EAX.
+      rl_result.low_reg = r0;
+    }
+  }
+
+  return rl_result;
+}
+
 RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, int reg_lo,
                                   int reg_hi, bool is_div) {
   LOG(FATAL) << "Unexpected use of GenDivRem for x86";
   return rl_dest;
 }
 
+RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
+                                  RegLocation rl_src2, bool is_div, bool check_zero) {
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into EAX.
+  LoadValueDirectFixed(rl_src1, r0);
+
+  // Load RHS into EBX.
+  LoadValueDirectFixed(rl_src2, r1);
+
+  // Copy LHS sign bit into EDX.
+  NewLIR0(kx86Cdq32Da);
+
+  if (check_zero) {
+    // Handle division by zero case.
+    GenImmedCheck(kCondEq, r1, 0, kThrowDivZero);
+  }
+
+  // Have to catch 0x80000000/-1 case, or we will get an exception!
+  OpRegImm(kOpCmp, r1, -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  OpRegImm(kOpCmp, r0, 0x80000000);
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x80000000/-1 case.
+  if (!is_div) {
+    // For DIV, EAX is already right. For REM, we need EDX 0.
+    LoadConstantNoClobber(r2, 0);
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod32DaR, r1);
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in EAX for div and EDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed,
+                          r0, INVALID_REG, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.low_reg = r2;
+  }
+  return rl_result;
+}
+
 bool X86Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
   DCHECK_EQ(cu_->instruction_set, kX86);
 
@@ -512,100 +755,174 @@
   return NULL;
 }
 
-void X86Mir2Lir::GenMulLong(RegLocation rl_dest, RegLocation rl_src1,
+void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
   LOG(FATAL) << "Unexpected use of GenX86Long for x86";
 }
-void X86Mir2Lir::GenAddLong(RegLocation rl_dest, RegLocation rl_src1,
-                         RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpAdd, r0, r2);  // r0 = r0 + r2
-  OpRegReg(kOpAdc, r1, r3);  // r1 = r1 + r3 + CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+
+void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
+                                   Instruction::Code op) {
+  DCHECK_EQ(rl_dest.location, kLocPhysReg);
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  if (rl_src.location == kLocPhysReg) {
+    // Both operands are in registers.
+    if (rl_dest.low_reg == rl_src.high_reg) {
+      // The registers are the same, so we would clobber it before the use.
+      int temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_dest.low_reg);
+      rl_src.high_reg = temp_reg;
+    }
+    NewLIR2(x86op, rl_dest.low_reg, rl_src.low_reg);
+
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    NewLIR2(x86op, rl_dest.high_reg, rl_src.high_reg);
+    FreeTemp(rl_src.low_reg);
+    FreeTemp(rl_src.high_reg);
+    return;
+  }
+
+  // RHS is in memory.
+  DCHECK((rl_src.location == kLocDalvikFrame) ||
+         (rl_src.location == kLocCompilerTemp));
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_src.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rl_dest.low_reg, rBase, displacement + LOWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rl_dest.high_reg, rBase, displacement + HIWORD_OFFSET);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          true /* is_load */, true /* is64bit */);
 }
 
-void X86Mir2Lir::GenSubLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) + (r2:r3)
-  OpRegReg(kOpSub, r0, r2);  // r0 = r0 - r2
-  OpRegReg(kOpSbc, r1, r3);  // r1 = r1 - r3 - CF
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  rl_dest = UpdateLocWide(rl_dest);
+  if (rl_dest.location == kLocPhysReg) {
+    // Ensure we are in a register pair
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+
+    rl_src = UpdateLocWide(rl_src);
+    GenLongRegOrMemOp(rl_result, rl_src, op);
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+
+  // Operate directly into memory.
+  X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false);
+  int rBase = TargetReg(kSp);
+  int displacement = SRegOffset(rl_dest.s_reg_low);
+
+  LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, rl_src.low_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  x86op = GetOpcode(op, rl_dest, rl_src, true);
+  lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, rl_src.high_reg);
+  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                          false /* is_load */, true /* is64bit */);
+  FreeTemp(rl_src.low_reg);
+  FreeTemp(rl_src.high_reg);
 }
 
-void X86Mir2Lir::GenAndLong(RegLocation rl_dest, RegLocation rl_src1,
-                            RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) & (r2:r3)
-  OpRegReg(kOpAnd, r0, r2);  // r0 = r0 & r2
-  OpRegReg(kOpAnd, r1, r3);  // r1 = r1 & r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
+                              RegLocation rl_src2, Instruction::Code op,
+                              bool is_commutative) {
+  // Is this really a 2 operand operation?
+  switch (op) {
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::SUB_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+      GenLongArith(rl_dest, rl_src2, op);
+      return;
+    default:
+      break;
+  }
+
+  if (rl_dest.location == kLocPhysReg) {
+    RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg);
+
+    // We are about to clobber the LHS, so it needs to be a temp.
+    rl_result = ForceTempWide(rl_result);
+
+    // Perform the operation using the RHS.
+    rl_src2 = UpdateLocWide(rl_src2);
+    GenLongRegOrMemOp(rl_result, rl_src2, op);
+
+    // And now record that the result is in the temp.
+    StoreFinalValueWide(rl_dest, rl_result);
+    return;
+  }
+
+  // It wasn't in registers, so it better be in memory.
+  DCHECK((rl_dest.location == kLocDalvikFrame) ||
+         (rl_dest.location == kLocCompilerTemp));
+  rl_src1 = UpdateLocWide(rl_src1);
+  rl_src2 = UpdateLocWide(rl_src2);
+
+  // Get one of the source operands into temporary register.
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  if (IsTemp(rl_src1.low_reg) && IsTemp(rl_src1.high_reg)) {
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else if (is_commutative) {
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    // We need at least one of them to be a temporary.
+    if (!(IsTemp(rl_src2.low_reg) && IsTemp(rl_src2.high_reg))) {
+      rl_src1 = ForceTempWide(rl_src1);
+    }
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  } else {
+    // Need LHS to be the temp.
+    rl_src1 = ForceTempWide(rl_src1);
+    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_src1);
 }
 
-void X86Mir2Lir::GenOrLong(RegLocation rl_dest,
-                           RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) | (r2:r3)
-  OpRegReg(kOpOr, r0, r2);  // r0 = r0 | r2
-  OpRegReg(kOpOr, r1, r3);  // r1 = r1 | r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
-}
-
-void X86Mir2Lir::GenXorLong(RegLocation rl_dest,
+void X86Mir2Lir::GenAddLong(Instruction::Code opcode, RegLocation rl_dest,
                             RegLocation rl_src1, RegLocation rl_src2) {
-  // TODO: fixed register usage here as we only have 4 temps and temporary allocation isn't smart
-  // enough.
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src1, r0, r1);
-  LoadValueDirectWideFixed(rl_src2, r2, r3);
-  // Compute (r1:r0) = (r1:r0) ^ (r2:r3)
-  OpRegReg(kOpXor, r0, r2);  // r0 = r0 ^ r2
-  OpRegReg(kOpXor, r1, r3);  // r1 = r1 ^ r3
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
-  StoreValueWide(rl_dest, rl_result);
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenSubLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false);
+}
+
+void X86Mir2Lir::GenAndLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenOrLong(Instruction::Code opcode, RegLocation rl_dest,
+                           RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
+}
+
+void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
+                            RegLocation rl_src1, RegLocation rl_src2) {
+  GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
-  FlushAllRegs();
-  LockCallTemps();  // Prepare for explicit register usage
-  LoadValueDirectWideFixed(rl_src, r0, r1);
-  // Compute (r1:r0) = -(r1:r0)
-  OpRegReg(kOpNeg, r0, r0);  // r0 = -r0
-  OpRegImm(kOpAdc, r1, 0);   // r1 = r1 + CF
-  OpRegReg(kOpNeg, r1, r1);  // r1 = -r1
-  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, kVectorNotUsed, r0, r1,
-                          INVALID_SREG, INVALID_SREG};
+  rl_src = LoadValueWide(rl_src, kCoreReg);
+  RegLocation rl_result = ForceTempWide(rl_src);
+  if (rl_dest.low_reg == rl_src.high_reg) {
+    // The registers are the same, so we would clobber it before the use.
+    int temp_reg = AllocTemp();
+    OpRegCopy(temp_reg, rl_result.low_reg);
+    rl_result.high_reg = temp_reg;
+  }
+  OpRegReg(kOpNeg, rl_result.low_reg, rl_result.low_reg);    // rLow = -rLow
+  OpRegImm(kOpAdc, rl_result.high_reg, 0);                   // rHigh = rHigh + CF
+  OpRegReg(kOpNeg, rl_result.high_reg, rl_result.high_reg);  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -748,8 +1065,241 @@
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
-  // Default - bail to non-const handler.
-  GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  switch (opcode) {
+    case Instruction::ADD_LONG:
+    case Instruction::AND_LONG:
+    case Instruction::OR_LONG:
+    case Instruction::XOR_LONG:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+      } else {
+        GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+      }
+      break;
+    case Instruction::ADD_LONG_2ADDR:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG_2ADDR:
+    case Instruction::AND_LONG_2ADDR:
+      if (rl_src2.is_const) {
+        GenLongImm(rl_dest, rl_src2, opcode);
+      } else {
+        DCHECK(rl_src1.is_const);
+        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+      }
+      break;
+    default:
+      // Default - bail to non-const handler.
+      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      break;
+  }
+}
+
+bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
+  switch (op) {
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      return value == -1;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      return value == 0;
+    default:
+      return false;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs,
+                                bool is_high_op) {
+  bool rhs_in_mem = rhs.location != kLocPhysReg;
+  bool dest_in_mem = dest.location != kLocPhysReg;
+  DCHECK(!rhs_in_mem || !dest_in_mem);
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+      }
+      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (dest_in_mem) {
+        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+      } else if (rhs_in_mem) {
+        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+      }
+      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (dest_in_mem) {
+        return kX86And32MR;
+      }
+      return rhs_in_mem ? kX86And32RM : kX86And32RR;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Or32MR;
+      }
+      return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (dest_in_mem) {
+        return kX86Xor32MR;
+      }
+      return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32RR;
+  }
+}
+
+X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
+                                int32_t value) {
+  bool in_mem = loc.location != kLocPhysReg;
+  bool byte_imm = IS_SIMM8(value);
+  DCHECK(in_mem || !IsFpReg(loc.low_reg));
+  switch (op) {
+    case Instruction::ADD_LONG:
+    case Instruction::ADD_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+        }
+        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+      }
+      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+    case Instruction::SUB_LONG:
+    case Instruction::SUB_LONG_2ADDR:
+      if (byte_imm) {
+        if (in_mem) {
+          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+        }
+        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+      }
+      if (in_mem) {
+        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+      }
+      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+    case Instruction::AND_LONG_2ADDR:
+    case Instruction::AND_LONG:
+      if (byte_imm) {
+        return in_mem ? kX86And32MI8 : kX86And32RI8;
+      }
+      return in_mem ? kX86And32MI : kX86And32RI;
+    case Instruction::OR_LONG:
+    case Instruction::OR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Or32MI8 : kX86Or32RI8;
+      }
+      return in_mem ? kX86Or32MI : kX86Or32RI;
+    case Instruction::XOR_LONG:
+    case Instruction::XOR_LONG_2ADDR:
+      if (byte_imm) {
+        return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
+      }
+      return in_mem ? kX86Xor32MI : kX86Xor32RI;
+    default:
+      LOG(FATAL) << "Unexpected opcode: " << op;
+      return kX86Add32MI;
+  }
+}
+
+void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+  DCHECK(rl_src.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+
+  // Can we just do this into memory?
+  if ((rl_dest.location == kLocDalvikFrame) ||
+      (rl_dest.location == kLocCompilerTemp)) {
+    int rBase = TargetReg(kSp);
+    int displacement = SRegOffset(rl_dest.s_reg_low);
+
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + LOWORD_OFFSET, val_lo);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      LIR *lir = NewLIR3(x86op, rBase, displacement + HIWORD_OFFSET, val_hi);
+      AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                                false /* is_load */, true /* is64bit */);
+    }
+    return;
+  }
+
+  RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  DCHECK_EQ(rl_result.location, kLocPhysReg);
+  DCHECK(!IsFpReg(rl_result.low_reg));
+
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+                                RegLocation rl_src2, Instruction::Code op) {
+  DCHECK(rl_src2.is_const);
+  int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+  int32_t val_lo = Low32Bits(val);
+  int32_t val_hi = High32Bits(val);
+  rl_dest = UpdateLocWide(rl_dest);
+  rl_src1 = UpdateLocWide(rl_src1);
+
+  // Can we do this directly into the destination registers?
+  if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg &&
+      rl_dest.low_reg == rl_src1.low_reg && rl_dest.high_reg == rl_src1.high_reg &&
+      !IsFpReg(rl_dest.low_reg)) {
+    if (!IsNoOp(op, val_lo)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo);
+      NewLIR2(x86op, rl_dest.low_reg, val_lo);
+    }
+    if (!IsNoOp(op, val_hi)) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi);
+      NewLIR2(x86op, rl_dest.high_reg, val_hi);
+    }
+    return;
+  }
+
+  rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  DCHECK_EQ(rl_src1.location, kLocPhysReg);
+
+  // We need the values to be in a temporary
+  RegLocation rl_result = ForceTempWide(rl_src1);
+  if (!IsNoOp(op, val_lo)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo);
+    NewLIR2(x86op, rl_result.low_reg, val_lo);
+  }
+  if (!IsNoOp(op, val_hi)) {
+    X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi);
+    NewLIR2(x86op, rl_result.high_reg, val_hi);
+  }
+
+  StoreFinalValueWide(rl_dest, rl_result);
 }
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 5c993c5..f223548 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -243,9 +243,9 @@
             }
             break;
           case 't':
-            buf += StringPrintf("0x%08x (L%p)",
-                                reinterpret_cast<uintptr_t>(base_addr)
-                                + lir->offset + operand, lir->target);
+            buf += StringPrintf("0x%08" PRIxPTR " (L%p)",
+                                reinterpret_cast<uintptr_t>(base_addr) + lir->offset + operand,
+                                lir->target);
             break;
           default:
             buf += StringPrintf("DecodeError '%c'", fmt[i]);
@@ -679,31 +679,24 @@
   }
 
   DCHECK_NE(loc.s_reg_low, INVALID_SREG);
-  if (IsFpReg(loc.low_reg) && reg_class != kCoreReg) {
-    // Need a wide vector register.
-    low_reg = AllocTypedTemp(true, reg_class);
-    loc.low_reg = low_reg;
-    loc.high_reg = low_reg;  // Play nice with existing code.
-    loc.vec_len = kVectorLength8;
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
-    }
+  DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
+
+  new_regs = AllocTypedTempPair(loc.fp, reg_class);
+  loc.low_reg = new_regs & 0xff;
+  loc.high_reg = (new_regs >> 8) & 0xff;
+
+  if (loc.low_reg == loc.high_reg) {
     DCHECK(IsFpReg(loc.low_reg));
+    loc.vec_len = kVectorLength8;
   } else {
-    DCHECK_NE(GetSRegHi(loc.s_reg_low), INVALID_SREG);
-
-    new_regs = AllocTypedTempPair(loc.fp, reg_class);
-    loc.low_reg = new_regs & 0xff;
-    loc.high_reg = (new_regs >> 8) & 0xff;
-
     MarkPair(loc.low_reg, loc.high_reg);
-    if (update) {
-      loc.location = kLocPhysReg;
-      MarkLive(loc.low_reg, loc.s_reg_low);
+  }
+  if (update) {
+    loc.location = kLocPhysReg;
+    MarkLive(loc.low_reg, loc.s_reg_low);
+    if (loc.low_reg != loc.high_reg) {
       MarkLive(loc.high_reg, GetSRegHi(loc.s_reg_low));
     }
-    DCHECK(!IsFpReg(loc.low_reg) || ((loc.low_reg & 0x1) == 0));
   }
   return loc;
 }
@@ -796,4 +789,23 @@
   // Just use the standard code to do the generation.
   Mir2Lir::GenConstWide(rl_dest, value);
 }
+
+// TODO: Merge with existing RegLocation dumper in vreg_analysis.cc
+void X86Mir2Lir::DumpRegLocation(RegLocation loc) {
+  LOG(INFO)  << "location: " << loc.location << ','
+             << (loc.wide ? " w" : "  ")
+             << (loc.defined ? " D" : "  ")
+             << (loc.is_const ? " c" : "  ")
+             << (loc.fp ? " F" : "  ")
+             << (loc.core ? " C" : "  ")
+             << (loc.ref ? " r" : "  ")
+             << (loc.high_word ? " h" : "  ")
+             << (loc.home ? " H" : "  ")
+             << " vec_len: " << loc.vec_len
+             << ", low: " << static_cast<int>(loc.low_reg)
+             << ", high: " << static_cast<int>(loc.high_reg)
+             << ", s_reg: " << loc.s_reg_low
+             << ", orig: " << loc.orig_sreg;
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 91c39fa..a2c215c 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -435,15 +435,37 @@
                      displacement + LOWORD_OFFSET);
     } else {
       if (rBase == r_dest) {
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
+        if (r_dest_hi == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load2 = NewLIR5(opcode, temp, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          OpRegCopy(r_dest_hi, temp);
+          FreeTemp(temp);
+        } else {
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+        }
       } else {
-        load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
-                       displacement + LOWORD_OFFSET);
-        load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
-                        displacement + HIWORD_OFFSET);
+        if (r_dest == r_index) {
+          // We can't use either register for the first load.
+          int temp = AllocTemp();
+          load = NewLIR5(opcode, temp, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+          OpRegCopy(r_dest, temp);
+          FreeTemp(temp);
+        } else {
+          load = NewLIR5(opcode, r_dest, rBase, r_index, scale,
+                         displacement + LOWORD_OFFSET);
+          load2 = NewLIR5(opcode, r_dest_hi, rBase, r_index, scale,
+                          displacement + HIWORD_OFFSET);
+        }
       }
     }
   }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 1488f5d..d7f61fc 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -316,6 +316,7 @@
   UnaryOpcode(kX86Imul, DaR, DaM, DaA),
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
+  kx86Cdq32Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index c51ea7b..6d82f0a 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -571,6 +571,9 @@
     reg_is_opcode = true;
     store = true;
     break;
+  case 0x99:
+    opcode << "cdq";
+    break;
   case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
     opcode << "mov";
     immediate_bytes = 1;
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 30bf623..a7e25cb 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_BASE_MUTEX_INL_H_
 #define ART_RUNTIME_BASE_MUTEX_INL_H_
 
-#define __STDC_FORMAT_MACROS 1
 #include <inttypes.h>
 
 #include "mutex.h"
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index 6e8736a..8fccd6d 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -16,9 +16,12 @@
 
 #include "dex_instruction-inl.h"
 
+#include <inttypes.h>
+
+#include <iomanip>
+
 #include "dex_file-inl.h"
 #include "utils.h"
-#include <iomanip>
 
 namespace art {
 
@@ -403,7 +406,8 @@
           os << StringPrintf("%s v%d, #int %+d // 0x%x", opcode, VRegA_21h(), value, value);
         } else {
           uint64_t value = static_cast<uint64_t>(VRegB_21h()) << 48;
-          os << StringPrintf("%s v%d, #long %+lld // 0x%llx", opcode, VRegA_21h(), value, value);
+          os << StringPrintf("%s v%d, #long %+" PRId64 " // 0x%" PRIx64, opcode, VRegA_21h(),
+                             value, value);
         }
       }
       break;
@@ -611,7 +615,7 @@
       }
       break;
     }
-    case k51l: os << StringPrintf("%s v%d, #%+lld", opcode, VRegA_51l(), VRegB_51l()); break;
+    case k51l: os << StringPrintf("%s v%d, #%+" PRId64, opcode, VRegA_51l(), VRegB_51l()); break;
     default: os << " unknown format (" << DumpHex(5) << ")"; break;
   }
   return os.str();
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index b3b24ba..2f7c38a 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -64,15 +64,16 @@
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
-  int64_t file_length = file_->GetLength();
-  if (file_length < 0) {
-    errno = -file_length;
+  int64_t temp_file_length = file_->GetLength();
+  if (temp_file_length < 0) {
+    errno = -temp_file_length;
     *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
                               file_->GetPath().c_str(), file_->Fd(), strerror(errno));
     return false;
   }
+  size_t file_length = static_cast<size_t>(temp_file_length);
   if (file_length < sizeof(llvm::ELF::Elf32_Ehdr)) {
-    *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF header of "
+    *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF header of "
                               "%zd bytes: '%s'", file_length, sizeof(llvm::ELF::Elf32_Ehdr),
                               file_->GetPath().c_str());
     return false;
@@ -89,7 +90,7 @@
     // then remap to cover program header
     size_t program_header_size = header_->e_phoff + (header_->e_phentsize * header_->e_phnum);
     if (file_length < program_header_size) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF program "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF program "
                                 "header of %zd bytes: '%s'", file_length,
                                 sizeof(llvm::ELF::Elf32_Ehdr), file_->GetPath().c_str());
       return false;
@@ -632,7 +633,14 @@
     // non-zero, the segments require the specific address specified,
     // which either was specified in the file because we already set
     // base_address_ after the first zero segment).
-    int64_t file_length = file_->GetLength();
+    int64_t temp_file_length = file_->GetLength();
+    if (temp_file_length < 0) {
+      errno = -temp_file_length;
+      *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
+                                file_->GetPath().c_str(), file_->Fd(), strerror(errno));
+      return false;
+    }
+    size_t file_length = static_cast<size_t>(temp_file_length);
     if (program_header.p_vaddr == 0) {
       std::string reservation_name("ElfFile reservation for ");
       reservation_name += file_->GetPath();
@@ -666,7 +674,7 @@
       flags |= MAP_PRIVATE;
     }
     if (file_length < (program_header.p_offset + program_header.p_memsz)) {
-      *error_msg = StringPrintf("File size of %lld bytes not large enough to contain ELF segment "
+      *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF segment "
                                 "%d of %d bytes: '%s'", file_length, i,
                                 program_header.p_offset + program_header.p_memsz,
                                 file_->GetPath().c_str());
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 51b238c..21e942e 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -326,7 +326,7 @@
    * Extract the table index from an indirect reference.
    */
   static uint32_t ExtractIndex(IndirectRef iref) {
-    uint32_t uref = (uint32_t) iref;
+    uintptr_t uref = reinterpret_cast<uintptr_t>(iref);
     return (uref >> 2) & 0xffff;
   }
 
@@ -337,8 +337,8 @@
   IndirectRef ToIndirectRef(const mirror::Object* /*o*/, uint32_t tableIndex) const {
     DCHECK_LT(tableIndex, 65536U);
     uint32_t serialChunk = slot_data_[tableIndex].serial;
-    uint32_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
-    return (IndirectRef) uref;
+    uintptr_t uref = serialChunk << 20 | (tableIndex << 2) | kind_;
+    return reinterpret_cast<IndirectRef>(uref);
   }
 
   /*
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 4aa7f13..e372c26 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -638,13 +638,14 @@
    * go to sleep indefinitely.
    */
   while (event_thread_id_ != 0) {
-    VLOG(jdwp) << StringPrintf("event in progress (%#llx), %#llx sleeping", event_thread_id_, threadId);
+    VLOG(jdwp) << StringPrintf("event in progress (%#" PRIx64 "), %#" PRIx64 " sleeping",
+                               event_thread_id_, threadId);
     waited = true;
     event_thread_cond_.Wait(self);
   }
 
   if (waited || threadId != 0) {
-    VLOG(jdwp) << StringPrintf("event token grabbed (%#llx)", threadId);
+    VLOG(jdwp) << StringPrintf("event token grabbed (%#" PRIx64 ")", threadId);
   }
   if (threadId != 0) {
     event_thread_id_ = threadId;
@@ -664,7 +665,7 @@
   MutexLock mu(self, event_thread_lock_);
 
   CHECK_NE(event_thread_id_, 0U);
-  VLOG(jdwp) << StringPrintf("cleared event token (%#llx)", event_thread_id_);
+  VLOG(jdwp) << StringPrintf("cleared event token (%#" PRIx64 ")", event_thread_id_);
 
   event_thread_id_ = 0;
 
@@ -820,7 +821,8 @@
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
                  << basket.className << "." << Dbg::GetMethodName(pLoc->method_id)
-                 << StringPrintf(" thread=%#llx dex_pc=%#llx)", basket.threadId, pLoc->dex_pc);
+                 << StringPrintf(" thread=%#" PRIx64 "  dex_pc=%#" PRIx64 ")",
+                                 basket.threadId, pLoc->dex_pc);
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -885,7 +887,7 @@
 
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ")";
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ")";
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
@@ -968,8 +970,8 @@
     FindMatchingEvents(EK_EXCEPTION, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total)"
-                 << StringPrintf(" thread=%#llx", basket.threadId)
-                 << StringPrintf(" exceptId=%#llx", exceptionId)
+                 << StringPrintf(" thread=%#" PRIx64, basket.threadId)
+                 << StringPrintf(" exceptId=%#" PRIx64, exceptionId)
                  << " caught=" << basket.caught << ")"
                  << "  throw: " << *pThrowLoc;
       if (pCatchLoc->class_id == 0) {
@@ -1036,7 +1038,7 @@
     FindMatchingEvents(EK_CLASS_PREPARE, &basket, match_list, &match_count);
     if (match_count != 0) {
       VLOG(jdwp) << "EVENT: " << match_list[0]->eventKind << "(" << match_count << " total) "
-                 << StringPrintf("thread=%#llx", basket.threadId) << ") " << signature;
+                 << StringPrintf("thread=%#" PRIx64, basket.threadId) << ") " << signature;
 
       suspend_policy = scanSuspendPolicy(match_list, match_count);
       VLOG(jdwp) << "  suspend_policy=" << suspend_policy;
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 523d892..6522a62 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -48,7 +48,7 @@
 std::string DescribeRefTypeId(const RefTypeId& ref_type_id) {
   std::string signature("unknown");
   Dbg::GetSignature(ref_type_id, &signature);
-  return StringPrintf("%#llx (%s)", ref_type_id, signature.c_str());
+  return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str());
 }
 
 // Helper function: write a variable-width value into the output input buffer.
@@ -99,8 +99,9 @@
 
   int32_t arg_count = request.ReadSigned32("argument count");
 
-  VLOG(jdwp) << StringPrintf("    --> thread_id=%#llx object_id=%#llx", thread_id, object_id);
-  VLOG(jdwp) << StringPrintf("        class_id=%#llx method_id=%x %s.%s", class_id,
+  VLOG(jdwp) << StringPrintf("    --> thread_id=%#" PRIx64 " object_id=%#" PRIx64,
+                             thread_id, object_id);
+  VLOG(jdwp) << StringPrintf("        class_id=%#" PRIx64 " method_id=%x %s.%s", class_id,
                              method_id, Dbg::GetClassName(class_id).c_str(),
                              Dbg::GetMethodName(method_id).c_str());
   VLOG(jdwp) << StringPrintf("        %d args:", arg_count);
@@ -111,7 +112,8 @@
     argTypes[i] = request.ReadTag();
     size_t width = Dbg::GetTagWidth(argTypes[i]);
     argValues[i] = request.ReadValue(width);
-    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#llx", width, argValues[i]);
+    VLOG(jdwp) << "          " << argTypes[i] << StringPrintf("(%zd): %#" PRIx64, width,
+                                                              argValues[i]);
   }
 
   uint32_t options = request.ReadUnsigned32("InvokeOptions bit flags");
@@ -143,7 +145,8 @@
     expandBufAdd1(pReply, JT_OBJECT);
     expandBufAddObjectId(pReply, exceptObjId);
 
-    VLOG(jdwp) << "  --> returned " << resultTag << StringPrintf(" %#llx (except=%#llx)", resultValue, exceptObjId);
+    VLOG(jdwp) << "  --> returned " << resultTag
+        << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId);
 
     /* show detailed debug output */
     if (resultTag == JT_STRING && exceptObjId == 0) {
@@ -526,7 +529,7 @@
   if (status != ERR_NONE) {
     return status;
   }
-  VLOG(jdwp) << StringPrintf("    --> ObjectId %#llx", class_object_id);
+  VLOG(jdwp) << StringPrintf("    --> ObjectId %#" PRIx64, class_object_id);
   expandBufAddObjectId(pReply, class_object_id);
   return ERR_NONE;
 }
@@ -936,7 +939,7 @@
   if (error != ERR_NONE) {
     return error;
   }
-  VLOG(jdwp) << StringPrintf("  Name of thread %#llx is \"%s\"", thread_id, name.c_str());
+  VLOG(jdwp) << StringPrintf("  Name of thread %#" PRIx64 " is \"%s\"", thread_id, name.c_str());
   expandBufAddUtf8String(pReply, name);
 
   return ERR_NONE;
@@ -1335,7 +1338,7 @@
         ObjectId thread_id = request.ReadThreadId();
         uint32_t size = request.ReadUnsigned32("step size");
         uint32_t depth = request.ReadUnsigned32("step depth");
-        VLOG(jdwp) << StringPrintf("    Step: thread=%#llx", thread_id)
+        VLOG(jdwp) << StringPrintf("    Step: thread=%#" PRIx64, thread_id)
                      << " size=" << JdwpStepSize(size) << " depth=" << JdwpStepDepth(depth);
 
         mod.step.threadId = thread_id;
@@ -1640,7 +1643,7 @@
   std::string result;
   result += "REQUEST: ";
   result += GetCommandName(request);
-  result += StringPrintf(" (length=%d id=0x%06x)", request.GetLength(), request.GetId());
+  result += StringPrintf(" (length=%zu id=0x%06x)", request.GetLength(), request.GetId());
   return result;
 }
 
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 127ebfa..928f53d 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -156,11 +156,11 @@
   errno = 0;
   ssize_t actual = netState->WriteBufferedPacket(iov);
   if (static_cast<size_t>(actual) != expected) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%d of %d)",
-                                static_cast<uint8_t>(type >> 24),
-                                static_cast<uint8_t>(type >> 16),
-                                static_cast<uint8_t>(type >> 8),
-                                static_cast<uint8_t>(type),
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet %c%c%c%c to debugger (%zd of %zu)",
+                                static_cast<char>(type >> 24),
+                                static_cast<char>(type >> 16),
+                                static_cast<char>(type >> 8),
+                                static_cast<char>(type),
                                 actual, expected);
   }
 }
@@ -175,7 +175,7 @@
   errno = 0;
   ssize_t actual = netState->WritePacket(pReq);
   if (static_cast<size_t>(actual) != expandBufGetLength(pReq)) {
-    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%d of %d)",
+    PLOG(ERROR) << StringPrintf("Failed to send JDWP packet to debugger (%zd of %zu)",
                                 actual, expandBufGetLength(pReq));
   }
 }
@@ -607,7 +607,7 @@
 std::ostream& operator<<(std::ostream& os, const JdwpLocation& rhs) {
   os << "JdwpLocation["
      << Dbg::GetClassName(rhs.class_id) << "." << Dbg::GetMethodName(rhs.method_id)
-     << "@" << StringPrintf("%#llx", rhs.dex_pc) << " " << rhs.type_tag << "]";
+     << "@" << StringPrintf("%#" PRIx64, rhs.dex_pc) << " " << rhs.type_tag << "]";
   return os;
 }
 
diff --git a/runtime/jdwp/jdwp_request.cc b/runtime/jdwp/jdwp_request.cc
index a9dd1e1..7b15d6d 100644
--- a/runtime/jdwp/jdwp_request.cc
+++ b/runtime/jdwp/jdwp_request.cc
@@ -16,6 +16,8 @@
 
 #include "jdwp/jdwp.h"
 
+#include <inttypes.h>
+
 #include "base/stringprintf.h"
 #include "jdwp/jdwp_priv.h"
 
@@ -98,7 +100,7 @@
 
 ObjectId Request::ReadObjectId(const char* specific_kind) {
   ObjectId id = Read8BE();
-  VLOG(jdwp) << StringPrintf("    %s id %#llx", specific_kind, id);
+  VLOG(jdwp) << StringPrintf("    %s id %#" PRIx64, specific_kind, id);
   return id;
 }
 
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index b07cf5c..d3b8236 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -16,7 +16,6 @@
 
 #include "mem_map.h"
 
-#define __STDC_FORMAT_MACROS 1
 #include <inttypes.h>
 #include <backtrace/BacktraceMap.h>