Merge "Fix neg-float & neg-double for null values in opt. compiler."
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 6375326..42f9eba 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -689,19 +689,25 @@
   }
   if (destination.IsRegisterPair()) {
     if (source.IsRegisterPair()) {
-      __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
-      __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+      EmitParallelMoves(
+          Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
     } else if (source.IsQuickParameter()) {
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
       InvokeDexCallingConvention calling_convention;
-      __ Mov(destination.AsRegisterPairLow<Register>(),
-             calling_convention.GetRegisterAt(register_index));
-      __ LoadFromOffset(kLoadWord, destination.AsRegisterPairHigh<Register>(),
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize());
+      EmitParallelMoves(
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Location::StackSlot(
+              calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
     } else {
+      // No conflict possible, so just do the moves.
       DCHECK(source.IsDoubleStackSlot());
       if (destination.AsRegisterPairLow<Register>() == R1) {
         DCHECK_EQ(destination.AsRegisterPairHigh<Register>(), R2);
@@ -725,22 +731,21 @@
     uint16_t register_index = destination.GetQuickParameterRegisterIndex();
     uint16_t stack_index = destination.GetQuickParameterStackIndex();
     if (source.IsRegisterPair()) {
-      __ Mov(calling_convention.GetRegisterAt(register_index),
-             source.AsRegisterPairLow<Register>());
-      __ StoreToOffset(kStoreWord, source.AsRegisterPairHigh<Register>(),
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1));
+      UNIMPLEMENTED(FATAL);
     } else if (source.IsFpuRegister()) {
       UNIMPLEMENTED(FATAL);
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ LoadFromOffset(
-          kLoadWord, calling_convention.GetRegisterAt(register_index), SP, source.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, R0, SP, source.GetHighStackIndex(kArmWordSize));
-      __ StoreToOffset(kStoreWord, R0, SP, calling_convention.GetStackOffsetOf(stack_index + 1));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
+          Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
     }
   } else {
     DCHECK(destination.IsDoubleStackSlot());
     if (source.IsRegisterPair()) {
+      // No conflict possible, so just do the moves.
       if (source.AsRegisterPairLow<Register>() == R1) {
         DCHECK_EQ(source.AsRegisterPairHigh<Register>(), R2);
         __ StoreToOffset(kStoreWord, R1, SP, destination.GetStackIndex());
@@ -753,21 +758,24 @@
       InvokeDexCallingConvention calling_convention;
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
+      // Just move the low part. The only time a source is a quick parameter is
+      // when moving the parameter to its stack locations. And the (Java) caller
+      // of this method has already done that.
       __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(register_index),
-             SP, destination.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, R0,
-             SP, calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize());
-      __ StoreToOffset(kStoreWord, R0, SP, destination.GetHighStackIndex(kArmWordSize));
+                       SP, destination.GetStackIndex());
+      DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
+                static_cast<size_t>(destination.GetHighStackIndex(kArmWordSize)));
     } else if (source.IsFpuRegisterPair()) {
       __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
                         SP,
                         destination.GetStackIndex());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
-      __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
-      __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
-      __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::StackSlot(destination.GetStackIndex()),
+          Location::StackSlot(source.GetHighStackIndex(kArmWordSize)),
+          Location::StackSlot(destination.GetHighStackIndex(kArmWordSize)));
     }
   }
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 01fd701..ac2a084 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -603,19 +603,25 @@
   }
   if (destination.IsRegisterPair()) {
     if (source.IsRegisterPair()) {
-      __ movl(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
-      __ movl(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+      EmitParallelMoves(
+          Location::RegisterLocation(source.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()),
+          Location::RegisterLocation(source.AsRegisterPairLow<Register>()),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()));
     } else if (source.IsFpuRegister()) {
       LOG(FATAL) << "Unimplemented";
     } else if (source.IsQuickParameter()) {
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
       InvokeDexCallingConvention calling_convention;
-      __ movl(destination.AsRegisterPairLow<Register>(),
-              calling_convention.GetRegisterAt(register_index));
-      __ movl(destination.AsRegisterPairHigh<Register>(), Address(ESP,
-          calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()));
+      EmitParallelMoves(
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::RegisterLocation(destination.AsRegisterPairLow<Register>()),
+          Location::StackSlot(
+              calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize()),
+          Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()));
     } else {
+      // No conflict possible, so just do the moves.
       DCHECK(source.IsDoubleStackSlot());
       __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex()));
       __ movl(destination.AsRegisterPairHigh<Register>(),
@@ -625,18 +631,17 @@
     InvokeDexCallingConvention calling_convention;
     uint16_t register_index = destination.GetQuickParameterRegisterIndex();
     uint16_t stack_index = destination.GetQuickParameterStackIndex();
-    if (source.IsRegister()) {
-      __ movl(calling_convention.GetRegisterAt(register_index), source.AsRegisterPairLow<Register>());
-      __ movl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1)),
-              source.AsRegisterPairHigh<Register>());
+    if (source.IsRegisterPair()) {
+      LOG(FATAL) << "Unimplemented";
     } else if (source.IsFpuRegister()) {
       LOG(FATAL) << "Unimplemented";
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ movl(calling_convention.GetRegisterAt(register_index),
-              Address(ESP, source.GetStackIndex()));
-      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ popl(Address(ESP, calling_convention.GetStackOffsetOf(stack_index + 1)));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::RegisterLocation(calling_convention.GetRegisterAt(register_index)),
+          Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
+          Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index + 1)));
     }
   } else if (destination.IsFpuRegister()) {
     if (source.IsDoubleStackSlot()) {
@@ -647,13 +652,18 @@
   } else {
     DCHECK(destination.IsDoubleStackSlot()) << destination;
     if (source.IsRegisterPair()) {
+      // No conflict possible, so just do the moves.
       __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegisterPairLow<Register>());
       __ movl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)),
               source.AsRegisterPairHigh<Register>());
     } else if (source.IsQuickParameter()) {
+      // No conflict possible, so just do the move.
       InvokeDexCallingConvention calling_convention;
       uint16_t register_index = source.GetQuickParameterRegisterIndex();
       uint16_t stack_index = source.GetQuickParameterStackIndex();
+      // Just move the low part. The only time a source is a quick parameter is
+      // when moving the parameter to its stack locations. And the (Java) caller
+      // of this method has already done that.
       __ movl(Address(ESP, destination.GetStackIndex()),
               calling_convention.GetRegisterAt(register_index));
       DCHECK_EQ(calling_convention.GetStackOffsetOf(stack_index + 1) + GetFrameSize(),
@@ -662,10 +672,11 @@
       __ movsd(Address(ESP, destination.GetStackIndex()), source.As<XmmRegister>());
     } else {
       DCHECK(source.IsDoubleStackSlot());
-      __ pushl(Address(ESP, source.GetStackIndex()));
-      __ popl(Address(ESP, destination.GetStackIndex()));
-      __ pushl(Address(ESP, source.GetHighStackIndex(kX86WordSize)));
-      __ popl(Address(ESP, destination.GetHighStackIndex(kX86WordSize)));
+      EmitParallelMoves(
+          Location::StackSlot(source.GetStackIndex()),
+          Location::StackSlot(destination.GetStackIndex()),
+          Location::StackSlot(source.GetHighStackIndex(kX86WordSize)),
+          Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)));
     }
   }
 }
diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt
index 7ec3168..0f7001f 100644
--- a/test/800-smali/expected.txt
+++ b/test/800-smali/expected.txt
@@ -7,4 +7,5 @@
 b/18380491
 invoke-super abstract
 BadCaseInOpRegRegReg
+CmpLong
 Done!
diff --git a/test/800-smali/smali/CmpLong.smali b/test/800-smali/smali/CmpLong.smali
new file mode 100644
index 0000000..d54812f
--- /dev/null
+++ b/test/800-smali/smali/CmpLong.smali
@@ -0,0 +1,18 @@
+.class public LCmpLong;
+.super Ljava/lang/Object;
+
+
+.method public constructor <init>()V
+.registers 1
+       invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+       return-void
+.end method
+
+.method public static run()I
+.registers 5000
+       const-wide v100, 5678233453L
+       move-wide/from16 v101, v100
+       const-wide v4, 5678233453L
+       cmp-long v0, v101, v4
+       return v0
+.end method
diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java
index abb53de..f2c1ab5 100644
--- a/test/800-smali/src/Main.java
+++ b/test/800-smali/src/Main.java
@@ -63,6 +63,7 @@
         testCases.add(new TestCase("invoke-super abstract", "B18380491ConcreteClass", "foo",
             new Object[]{0}, new AbstractMethodError(), null));
         testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2));
+        testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0));
     }
 
     public void runTests() {