Support longs in the register allocator for x86_64.

Change-Id: I7fb6dfb761bc5cf9e5705682032855a0a70ca867
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c3a322c..cc995f7 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -226,7 +226,7 @@
 }
 
 template<typename T>
-void HGraphBuilder::Binop_32x(const Instruction& instruction, Primitive::Type type) {
+void HGraphBuilder::Binop_23x(const Instruction& instruction, Primitive::Type type) {
   HInstruction* first = LoadLocal(instruction.VRegB(), type);
   HInstruction* second = LoadLocal(instruction.VRegC(), type);
   current_block_->AddInstruction(new (arena_) T(type, first, second));
@@ -501,22 +501,22 @@
     }
 
     case Instruction::ADD_INT: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimInt);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::ADD_LONG: {
-      Binop_32x<HAdd>(instruction, Primitive::kPrimLong);
+      Binop_23x<HAdd>(instruction, Primitive::kPrimLong);
       break;
     }
 
     case Instruction::SUB_INT: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimInt);
+      Binop_23x<HSub>(instruction, Primitive::kPrimInt);
       break;
     }
 
     case Instruction::SUB_LONG: {
-      Binop_32x<HSub>(instruction, Primitive::kPrimLong);
+      Binop_23x<HSub>(instruction, Primitive::kPrimLong);
       break;
     }
 
@@ -573,6 +573,11 @@
       UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
       break;
 
+    case Instruction::CMP_LONG: {
+      Binop_23x<HCompare>(instruction, Primitive::kPrimLong);
+      break;
+    }
+
     case Instruction::NOP:
       break;
 
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 0852a26..ee32ca8 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -73,7 +73,7 @@
   bool InitializeParameters(uint16_t number_of_parameters);
 
   template<typename T>
-  void Binop_32x(const Instruction& instruction, Primitive::Type type);
+  void Binop_23x(const Instruction& instruction, Primitive::Type type);
 
   template<typename T>
   void Binop_12x(const Instruction& instruction, Primitive::Type type);
@@ -84,11 +84,8 @@
   template<typename T>
   void Binop_22s(const Instruction& instruction, bool reverse);
 
-  template<typename T>
-  void If_22t(const Instruction& instruction, int32_t dex_offset);
-
-  template<typename T>
-  void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_21t(const Instruction& instruction, int32_t dex_offset);
+  template<typename T> void If_22t(const Instruction& instruction, int32_t dex_offset);
 
   void BuildReturn(const Instruction& instruction, Primitive::Type type);
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 83621e0..ae2f030 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -90,6 +90,7 @@
   virtual void SetupBlockedRegisters(bool* blocked_registers) const = 0;
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
+  virtual InstructionSet GetInstructionSet() const = 0;
 
   void RecordPcInfo(uint32_t dex_pc) {
     struct PcInfo pc_info;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ec3c815..d87c14b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -905,6 +905,48 @@
          locations->InAt(0).AsArm().AsCoreRegister(), ShifterOperand(1));
 }
 
+void LocationsBuilderARM::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Register output = locations->Out().AsArm().AsCoreRegister();
+      ArmManagedRegister left = locations->InAt(0).AsArm();
+      ArmManagedRegister right = locations->InAt(1).AsArm();
+      Label less, greater, done;
+      __ cmp(left.AsRegisterPairHigh(),
+             ShifterOperand(right.AsRegisterPairHigh()));  // Signed compare.
+      __ b(&less, LT);
+      __ b(&greater, GT);
+      __ cmp(left.AsRegisterPairLow(),
+             ShifterOperand(right.AsRegisterPairLow()));  // Unsigned compare.
+      __ LoadImmediate(output, 0);
+      __ b(&done, EQ);
+      __ b(&less, CC);
+
+      __ Bind(&greater);
+      __ LoadImmediate(output, 1);
+      __ b(&done);
+
+      __ Bind(&less);
+      __ LoadImmediate(output, -1);
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 712a24c..c46c1b1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -171,6 +171,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kArm;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f624f3c..572d494 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -81,12 +81,23 @@
                                                        bool* blocked_registers) const {
   switch (type) {
     case Primitive::kPrimLong: {
-      size_t reg = AllocateFreeRegisterInternal(
-          GetBlockedRegisterPairs(blocked_registers), kNumberOfRegisterPairs);
+      bool* blocked_register_pairs = GetBlockedRegisterPairs(blocked_registers);
+      size_t reg = AllocateFreeRegisterInternal(blocked_register_pairs, kNumberOfRegisterPairs);
       X86ManagedRegister pair =
           X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(reg));
       blocked_registers[pair.AsRegisterPairLow()] = true;
       blocked_registers[pair.AsRegisterPairHigh()] = true;
+      // Block all other register pairs that share a register with `pair`.
+      for (int i = 0; i < kNumberOfRegisterPairs; i++) {
+        X86ManagedRegister current =
+            X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
+        if (current.AsRegisterPairLow() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairLow() == pair.AsRegisterPairHigh()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairLow()
+            || current.AsRegisterPairHigh() == pair.AsRegisterPairHigh()) {
+          blocked_register_pairs[i] = true;
+        }
+      }
       return pair;
     }
 
@@ -901,6 +912,46 @@
   __ xorl(out.AsX86().AsCpuRegister(), Immediate(1));
 }
 
+void LocationsBuilderX86::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong: {
+      Label less, greater, done;
+      Register output = locations->Out().AsX86().AsCpuRegister();
+      X86ManagedRegister left = locations->InAt(0).AsX86();
+      X86ManagedRegister right = locations->InAt(1).AsX86();
+      __ cmpl(left.AsRegisterPairHigh(), right.AsRegisterPairHigh());
+      __ j(kLess, &less);  // Signed compare.
+      __ j(kGreater, &greater);  // Signed compare.
+      __ cmpl(left.AsRegisterPairLow(), right.AsRegisterPairLow());
+      __ movl(output, Immediate(0));
+      __ j(kEqual, &done);
+      __ j(kBelow, &less);  // Unsigned compare.
+
+      __ Bind(&greater);
+      __ movl(output, Immediate(1));
+      __ jmp(&done);
+
+      __ Bind(&less);
+      __ movl(output, Immediate(-1));
+
+      __ Bind(&done);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+}
+
 void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
   for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index acc670e..8a8216a 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -173,6 +173,10 @@
     return &move_resolver_;
   }
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86;
+  }
+
  private:
   // Helper method to move a 32bits value between two locations.
   void Move32(Location destination, Location source);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 283f1f5..dc1d616 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -228,7 +228,9 @@
   }
 }
 
-void CodeGeneratorX86_64::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+void CodeGeneratorX86_64::Move(HInstruction* instruction,
+                               Location location,
+                               HInstruction* move_for) {
   if (instruction->AsIntConstant() != nullptr) {
     Immediate imm(instruction->AsIntConstant()->GetValue());
     if (location.IsRegister()) {
@@ -383,7 +385,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(comp);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
+  locations->SetOut(Location::RequiresRegister());
   comp->SetLocations(locations);
 }
 
@@ -444,6 +446,39 @@
   VisitCondition(comp);
 }
 
+void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+  compare->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
+  Label greater, done;
+  LocationSummary* locations = compare->GetLocations();
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimLong:
+      __ cmpq(locations->InAt(0).AsX86_64().AsCpuRegister(),
+              locations->InAt(1).AsX86_64().AsCpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << compare->InputAt(0)->GetType();
+  }
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(0));
+  __ j(kEqual, &done);
+  __ j(kGreater, &greater);
+
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(-1));
+  __ jmp(&done);
+
+  __ Bind(&greater);
+  __ movl(locations->Out().AsX86_64().AsCpuRegister(), Immediate(1));
+
+  __ Bind(&done);
+}
+
 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   // TODO: Support constant locations.
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
@@ -463,7 +498,7 @@
 }
 
 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant) {
-  // Will be generated at use site.
+  codegen_->Move(constant, constant->GetLocations()->Out(), nullptr);
 }
 
 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
@@ -812,10 +847,13 @@
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
       __ movq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
-    } else {
-      DCHECK(destination.IsStackSlot());
+    } else if (destination.IsStackSlot()) {
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
               source.AsX86_64().AsCpuRegister());
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
+              source.AsX86_64().AsCpuRegister());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
@@ -826,18 +864,27 @@
       __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsDoubleStackSlot()) {
+    if (destination.IsRegister()) {
+      __ movq(destination.AsX86_64().AsX86_64().AsCpuRegister(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot());
+      __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
+      __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
+    }
   } else {
     LOG(FATAL) << "Unimplemented";
   }
 }
 
-void ParallelMoveResolverX86_64::Exchange(CpuRegister reg, int mem) {
+void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
   __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
-  __ movl(Address(CpuRegister(RSP), mem), CpuRegister(reg));
-  __ movl(CpuRegister(reg), CpuRegister(TMP));
+  __ movl(Address(CpuRegister(RSP), mem), reg);
+  __ movl(reg, CpuRegister(TMP));
 }
 
-void ParallelMoveResolverX86_64::Exchange(int mem1, int mem2) {
+void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
   ScratchRegisterScope ensure_scratch(
       this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
 
@@ -850,6 +897,25 @@
           CpuRegister(ensure_scratch.GetRegister()));
 }
 
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
+  __ movq(Address(CpuRegister(RSP), mem), reg);
+  __ movq(reg, CpuRegister(TMP));
+}
+
+void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
+  ScratchRegisterScope ensure_scratch(
+      this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+
+  int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
+  __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
+  __ movq(CpuRegister(ensure_scratch.GetRegister()),
+          Address(CpuRegister(RSP), mem2 + stack_offset));
+  __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
+  __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
+          CpuRegister(ensure_scratch.GetRegister()));
+}
+
 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
   MoveOperands* move = moves_.Get(index);
   Location source = move->GetSource();
@@ -858,11 +924,17 @@
   if (source.IsRegister() && destination.IsRegister()) {
     __ xchgq(destination.AsX86_64().AsCpuRegister(), source.AsX86_64().AsCpuRegister());
   } else if (source.IsRegister() && destination.IsStackSlot()) {
-    Exchange(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+    Exchange32(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsRegister()) {
-    Exchange(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+    Exchange32(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
-    Exchange(destination.GetStackIndex(), source.GetStackIndex());
+    Exchange32(destination.GetStackIndex(), source.GetStackIndex());
+  } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
+    Exchange64(source.AsX86_64().AsCpuRegister(), destination.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
+    Exchange64(destination.AsX86_64().AsCpuRegister(), source.GetStackIndex());
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    Exchange64(destination.GetStackIndex(), source.GetStackIndex());
   } else {
     LOG(FATAL) << "Unimplemented";
   }
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f07df29..d347a4f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -69,8 +69,10 @@
   X86_64Assembler* GetAssembler() const;
 
  private:
-  void Exchange(CpuRegister reg, int mem);
-  void Exchange(int mem1, int mem2);
+  void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(int mem1, int mem2);
+  void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
 
@@ -170,6 +172,10 @@
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
+  virtual InstructionSet GetInstructionSet() const OVERRIDE {
+    return InstructionSet::kX86_64;
+  }
+
  private:
   // Helper method to move a value between two locations.
   void Move(Location destination, Location source);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a49ce64..f033e2e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -108,9 +108,11 @@
       } else {
         codegen_.DumpCoreRegister(output_, location.reg().RegId());
       }
-    } else {
-      DCHECK(location.IsStackSlot());
+    } else if (location.IsStackSlot()) {
       output_ << location.GetStackIndex() << "(sp)";
+    } else {
+      DCHECK(location.IsDoubleStackSlot());
+      output_ << "2x" << location.GetStackIndex() << "(sp)";
     }
   }
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 503f31d..9292084 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -414,6 +414,7 @@
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
   M(Sub)                                                   \
+  M(Compare)                                               \
 
 
 #define FORWARD_DECLARATION(type) class H##type;
@@ -986,6 +987,22 @@
 };
 
 
+// Instruction to check how two inputs compare to each other.
+// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
+class HCompare : public HBinaryOperation {
+ public:
+  HCompare(Primitive::Type type, HInstruction* first, HInstruction* second)
+      : HBinaryOperation(Primitive::kPrimInt, first, second) {
+    DCHECK_EQ(type, first->GetType());
+    DCHECK_EQ(type, second->GetType());
+  }
+
+  DECLARE_INSTRUCTION(Compare);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HCompare);
+};
+
 // A local in the graph. Corresponds to a Dex register.
 class HLocal : public HTemplateInstruction<0> {
  public:
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 1f4cb41..68130dd 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -55,7 +55,7 @@
          it.Advance()) {
       HInstruction* current = it.Current();
       if (current->NeedsEnvironment()) return false;
-      if (current->GetType() == Primitive::kPrimLong) return false;
+      if (current->GetType() == Primitive::kPrimLong && instruction_set != kX86_64) return false;
       if (current->GetType() == Primitive::kPrimFloat) return false;
       if (current->GetType() == Primitive::kPrimDouble) return false;
     }
@@ -139,7 +139,7 @@
         current->SetFrom(position + 1);
         current->SetRegister(output.reg().RegId());
         BlockRegister(output, position, position + 1, instruction->GetType());
-      } else if (output.IsStackSlot()) {
+      } else if (output.IsStackSlot() || output.IsDoubleStackSlot()) {
         current->SetSpillSlot(output.GetStackIndex());
       }
       for (size_t i = 0; i < instruction->InputCount(); ++i) {
@@ -430,7 +430,7 @@
 // we spill `current` instead.
 bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) {
   size_t first_register_use = current->FirstRegisterUse();
-  if (current->FirstRegisterUse() == kNoLifetime) {
+  if (first_register_use == kNoLifetime) {
     AllocateSpillSlotFor(current);
     return false;
   }
@@ -559,6 +559,10 @@
   }
 }
 
+static bool NeedTwoSpillSlot(Primitive::Type type) {
+  return type == Primitive::kPrimLong || type == Primitive::kPrimDouble;
+}
+
 void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   LiveInterval* parent = interval->GetParent();
 
@@ -581,6 +585,43 @@
   }
   size_t end = last_sibling->GetEnd();
 
+  if (NeedTwoSpillSlot(parent->GetType())) {
+    AllocateTwoSpillSlots(parent, end);
+  } else {
+    AllocateOneSpillSlot(parent, end);
+  }
+}
+
+void RegisterAllocator::AllocateTwoSpillSlots(LiveInterval* parent, size_t end) {
+  // Find an available spill slot.
+  size_t slot = 0;
+  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
+    // We check if it is less rather than less or equal because the parallel move
+    // resolver does not work when a single spill slot needs to be exchanged with
+    // a double spill slot. The strict comparison avoids needing to exchange these
+    // locations at the same lifetime position.
+    if (spill_slots_.Get(slot) < parent->GetStart()
+        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+      break;
+    }
+  }
+
+  if (slot == spill_slots_.Size()) {
+    // We need a new spill slot.
+    spill_slots_.Add(end);
+    spill_slots_.Add(end);
+  } else if (slot == spill_slots_.Size() - 1) {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Add(end);
+  } else {
+    spill_slots_.Put(slot, end);
+    spill_slots_.Put(slot + 1, end);
+  }
+
+  parent->SetSpillSlot(slot * kVRegSize);
+}
+
+void RegisterAllocator::AllocateOneSpillSlot(LiveInterval* parent, size_t end) {
   // Find an available spill slot.
   size_t slot = 0;
   for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
@@ -604,7 +645,11 @@
     return Location::RegisterLocation(ManagedRegister(interval->GetRegister()));
   } else {
     DCHECK(interval->GetParent()->HasSpillSlot());
-    return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    if (NeedTwoSpillSlot(interval->GetType())) {
+      return Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot());
+    } else {
+      return Location::StackSlot(interval->GetParent()->GetSpillSlot());
+    }
   }
 }
 
@@ -750,7 +795,9 @@
     // We spill eagerly, so move must be at definition.
     InsertMoveAfter(interval->GetDefinedBy(),
                     Location::RegisterLocation(ManagedRegister(interval->GetRegister())),
-                    Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+                    NeedTwoSpillSlot(interval->GetType())
+                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
+                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
   }
   UsePosition* use = current->GetFirstUse();
 
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index e63122f..7d4cd1a 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -93,6 +93,8 @@
 
   // Allocate a spill slot for the given interval.
   void AllocateSpillSlotFor(LiveInterval* interval);
+  void AllocateOneSpillSlot(LiveInterval* interval, size_t end);
+  void AllocateTwoSpillSlots(LiveInterval* interval, size_t end);
 
   // Connect adjacent siblings within blocks.
   void ConnectSiblings(LiveInterval* interval);
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 41d1529..4d5d613 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -949,6 +949,14 @@
 }
 
 
+void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  CHECK(imm.is_int32());  // andq only supports 32b immediate.
+  EmitRex64(reg);
+  EmitComplex(4, Operand(reg), imm);
+}
+
+
 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitOptionalRex32(dst, src);
@@ -972,6 +980,14 @@
 }
 
 
+void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitRex64(dst, src);
+  EmitUint8(0x33);
+  EmitOperand(dst.LowBits(), Operand(src));
+}
+
+
 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 9aa5a54..7514854 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -391,12 +391,14 @@
 
   void andl(CpuRegister dst, const Immediate& imm);
   void andl(CpuRegister dst, CpuRegister src);
+  void andq(CpuRegister dst, const Immediate& imm);
 
   void orl(CpuRegister dst, const Immediate& imm);
   void orl(CpuRegister dst, CpuRegister src);
 
   void xorl(CpuRegister dst, CpuRegister src);
   void xorq(CpuRegister dst, const Immediate& imm);
+  void xorq(CpuRegister dst, CpuRegister src);
 
   void addl(CpuRegister dst, CpuRegister src);
   void addl(CpuRegister reg, const Immediate& imm);
diff --git a/test/405-optimizing-long-allocator/expected.txt b/test/405-optimizing-long-allocator/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/405-optimizing-long-allocator/expected.txt
diff --git a/test/405-optimizing-long-allocator/info.txt b/test/405-optimizing-long-allocator/info.txt
new file mode 100644
index 0000000..b6b31ae
--- /dev/null
+++ b/test/405-optimizing-long-allocator/info.txt
@@ -0,0 +1 @@
+Tests with long for the optimizing compiler's register allocator.
diff --git a/test/405-optimizing-long-allocator/src/Main.java b/test/405-optimizing-long-allocator/src/Main.java
new file mode 100644
index 0000000..9fd840b
--- /dev/null
+++ b/test/405-optimizing-long-allocator/src/Main.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Note that $opt$ is a marker for the optimizing compiler to ensure
+// it compiles these methods.
+
+public class Main {
+  public static void main(String[] args) {
+
+    expectEquals(4, $opt$TestLostCopy());
+    expectEquals(-10, $opt$TestTwoLive());
+    expectEquals(-20, $opt$TestThreeLive());
+    expectEquals(5, $opt$TestFourLive());
+    expectEquals(10, $opt$TestMultipleLive());
+    expectEquals(1, $opt$TestWithBreakAndContinue());
+    expectEquals(-15, $opt$testSpillInIf(5, 6, 7));
+    expectEquals(-567, $opt$TestAgressiveLive1(1, 2, 3, 4, 5, 6, 7));
+    expectEquals(-77, $opt$TestAgressiveLive2(1, 2, 3, 4, 5, 6, 7));
+
+    expectEquals(-55834574850L, $opt$testSpillInIf(5, 6L << 32, 7L << 32));
+    expectEquals(-73014444553L, $opt$TestAgressiveLive1(
+        1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, (1L << 32) + 2));
+    expectEquals(-124554051632L, $opt$TestAgressiveLive2(
+        1L << 32, (1L << 32) + 1, 3L << 32, 4L << 32, 5L << 32, 6L << 32, 7L << 32));
+  }
+
+  public static long $opt$TestLostCopy() {
+    long a = 0;
+    long b = 0;
+    do {
+      b = a;
+      a++;
+    } while (a != 5);
+    return b;
+  }
+
+  public static long $opt$TestTwoLive() {
+    long a = 0;
+    long b = 0;
+    do {
+      a++;
+      b += 3;
+    } while (a != 5);
+    return a - b;
+  }
+
+  public static long $opt$TestThreeLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+    } while (a != 5);
+    return a - b - c;
+  }
+
+  public static long $opt$TestFourLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    long d = 0;
+    do {
+      a++;
+      b += 3;
+      c += 2;
+      d++;
+    } while (a != 5);
+    return d;
+  }
+
+  public static long $opt$TestMultipleLive() {
+    long a = 0;
+    long b = 0;
+    long c = 0;
+    long d = 0;
+    long e = 0;
+    long f = 0;
+    long g = 0;
+    do {
+      a++;
+      b++;
+      c++;
+      d++;
+      e += 3;
+      f += 2;
+      g += 2;
+    } while (a != 5);
+    return f;
+  }
+
+  public static long $opt$TestWithBreakAndContinue() {
+    long a = 0;
+    long b = 0;
+    do {
+      a++;
+      if (a == 2) {
+        continue;
+      }
+      b++;
+      if (a == 5) {
+        break;
+      }
+    } while (true);
+    return a - b;
+  }
+
+  public static long $opt$testSpillInIf(long a, long b, long c) {
+    long d = 0;
+    long e = 0;
+    if (a == 5) {
+      b++;
+      c++;
+      d += 2;
+      e += 3;
+    }
+
+    return a - b - c - d - e;
+  }
+
+  public static long $opt$TestAgressiveLive1(long a, long b, long c, long d, long e, long f, long g) {
+    long h = a - b;
+    long i = c - d;
+    long j = e - f;
+    long k = 42 + g - a;
+    do {
+      b++;
+      while (k != 1) {
+        --k;
+        ++i;
+        if (i == 9) {
+          ++i;
+        }
+        j += 5;
+      }
+      k = 9;
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static long $opt$TestAgressiveLive2(long a, long b, long c, long d, long e, long f, long g) {
+    long h = a - b;
+    long i = c - d;
+    long j = e - f;
+    long k = 42 + g - a;
+    do {
+      h++;
+    } while (h != 5);
+    return a - b - c - d - e - f - g - h - i - j - k;
+  }
+
+  public static void expectEquals(long expected, long value) {
+    if (expected != value) {
+      throw new Error("Expected: " + expected + ", got: " + value);
+    }
+  }
+}