Revert "Revert "Revert "Revert "[optimizing] Improve x86 shifts""""

This reverts commit 2a7a1d7808f003bea908023ebd11eb442d2fca39.

Fix the problem that a long long >> 63 got the wrong answer.  The
problem was that a shr was used instead of a sar.

Change-Id: I0327f79c718016ddec9272a605fc50ec15ec4566
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 5ee091f..8aa7796 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -2740,17 +2740,12 @@
       new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
 
   switch (op->GetResultType()) {
-    case Primitive::kPrimInt: {
-      locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
-      locations->SetOut(Location::SameAsFirstInput());
-      break;
-    }
+    case Primitive::kPrimInt:
     case Primitive::kPrimLong: {
+      // Can't have Location::Any() and output SameAsFirstInput()
       locations->SetInAt(0, Location::RequiresRegister());
-      // The shift count needs to be in CL.
-      locations->SetInAt(1, Location::RegisterLocation(ECX));
+      // The shift count needs to be in CL or a constant.
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
       locations->SetOut(Location::SameAsFirstInput());
       break;
     }
@@ -2769,6 +2764,7 @@
 
   switch (op->GetResultType()) {
     case Primitive::kPrimInt: {
+      DCHECK(first.IsRegister());
       Register first_reg = first.AsRegister<Register>();
       if (second.IsRegister()) {
         Register second_reg = second.AsRegister<Register>();
@@ -2781,7 +2777,11 @@
           __ shrl(first_reg, second_reg);
         }
       } else {
-        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+        if (shift == 0) {
+          return;
+        }
+        Immediate imm(shift);
         if (op->IsShl()) {
           __ shll(first_reg, imm);
         } else if (op->IsShr()) {
@@ -2793,14 +2793,29 @@
       break;
     }
     case Primitive::kPrimLong: {
-      Register second_reg = second.AsRegister<Register>();
-      DCHECK_EQ(ECX, second_reg);
-      if (op->IsShl()) {
-        GenerateShlLong(first, second_reg);
-      } else if (op->IsShr()) {
-        GenerateShrLong(first, second_reg);
+      if (second.IsRegister()) {
+        Register second_reg = second.AsRegister<Register>();
+        DCHECK_EQ(ECX, second_reg);
+        if (op->IsShl()) {
+          GenerateShlLong(first, second_reg);
+        } else if (op->IsShr()) {
+          GenerateShrLong(first, second_reg);
+        } else {
+          GenerateUShrLong(first, second_reg);
+        }
       } else {
-        GenerateUShrLong(first, second_reg);
+        // Shift by a constant.
+        int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+        // Nothing to do if the shift is 0, as the input is already the output.
+        if (shift != 0) {
+          if (op->IsShl()) {
+            GenerateShlLong(first, shift);
+          } else if (op->IsShr()) {
+            GenerateShrLong(first, shift);
+          } else {
+            GenerateUShrLong(first, shift);
+          }
+        }
       }
       break;
     }
@@ -2809,6 +2824,30 @@
   }
 }
 
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. High gets low, and low gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToLow(),
+        loc.ToHigh(),
+        Primitive::kPrimInt,
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+        loc.ToLow(),
+        Primitive::kPrimInt);
+  } else if (shift > 32) {
+    // Low part becomes 0.  High part is low part << (shift-32).
+    __ movl(high, low);
+    __ shll(high, Immediate(shift - 32));
+    __ xorl(low, low);
+  } else {
+    // Between 1 and 31.
+    __ shld(high, low, Immediate(shift));
+    __ shll(low, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
   Label done;
   __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2820,6 +2859,27 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Need to copy the sign.
+    DCHECK_NE(low, high);
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+  } else if (shift > 32) {
+    DCHECK_NE(low, high);
+    // High part becomes sign. Low part is shifted by shift - 32.
+    __ movl(low, high);
+    __ sarl(high, Immediate(31));
+    __ sarl(low, Immediate(shift - 32));
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ sarl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2831,6 +2891,30 @@
   __ Bind(&done);
 }
 
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+  Register low = loc.AsRegisterPairLow<Register>();
+  Register high = loc.AsRegisterPairHigh<Register>();
+  if (shift == 32) {
+    // Shift by 32 is easy. Low gets high, and high gets 0.
+    codegen_->EmitParallelMoves(
+        loc.ToHigh(),
+        loc.ToLow(),
+        Primitive::kPrimInt,
+        Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+        loc.ToHigh(),
+        Primitive::kPrimInt);
+  } else if (shift > 32) {
+    // Low part is high >> (shift - 32). High part becomes 0.
+    __ movl(low, high);
+    __ shrl(low, Immediate(shift - 32));
+    __ xorl(high, high);
+  } else {
+    // Between 1 and 31.
+    __ shrd(low, high, Immediate(shift));
+    __ shrl(high, Immediate(shift));
+  }
+}
+
 void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
   Label done;
   __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 79dec7a..5a5a37b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -166,6 +166,9 @@
   void GenerateShlLong(const Location& loc, Register shifter);
   void GenerateShrLong(const Location& loc, Register shifter);
   void GenerateUShrLong(const Location& loc, Register shifter);
+  void GenerateShlLong(const Location& loc, int shift);
+  void GenerateShrLong(const Location& loc, int shift);
+  void GenerateUShrLong(const Location& loc, int shift);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 329698c..f2541a2 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1292,32 +1292,62 @@
 
 
 void X86Assembler::shll(Register reg, const Immediate& imm) {
-  EmitGenericShift(4, reg, imm);
+  EmitGenericShift(4, Operand(reg), imm);
 }
 
 
 void X86Assembler::shll(Register operand, Register shifter) {
-  EmitGenericShift(4, operand, shifter);
+  EmitGenericShift(4, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shll(const Address& address, const Immediate& imm) {
+  EmitGenericShift(4, address, imm);
+}
+
+
+void X86Assembler::shll(const Address& address, Register shifter) {
+  EmitGenericShift(4, address, shifter);
 }
 
 
 void X86Assembler::shrl(Register reg, const Immediate& imm) {
-  EmitGenericShift(5, reg, imm);
+  EmitGenericShift(5, Operand(reg), imm);
 }
 
 
 void X86Assembler::shrl(Register operand, Register shifter) {
-  EmitGenericShift(5, operand, shifter);
+  EmitGenericShift(5, Operand(operand), shifter);
+}
+
+
+void X86Assembler::shrl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(5, address, imm);
+}
+
+
+void X86Assembler::shrl(const Address& address, Register shifter) {
+  EmitGenericShift(5, address, shifter);
 }
 
 
 void X86Assembler::sarl(Register reg, const Immediate& imm) {
-  EmitGenericShift(7, reg, imm);
+  EmitGenericShift(7, Operand(reg), imm);
 }
 
 
 void X86Assembler::sarl(Register operand, Register shifter) {
-  EmitGenericShift(7, operand, shifter);
+  EmitGenericShift(7, Operand(operand), shifter);
+}
+
+
+void X86Assembler::sarl(const Address& address, const Immediate& imm) {
+  EmitGenericShift(7, address, imm);
+}
+
+
+void X86Assembler::sarl(const Address& address, Register shifter) {
+  EmitGenericShift(7, address, shifter);
 }
 
 
@@ -1330,6 +1360,15 @@
 }
 
 
+void X86Assembler::shld(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xA4);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::shrd(Register dst, Register src, Register shifter) {
   DCHECK_EQ(ECX, shifter);
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1339,6 +1378,15 @@
 }
 
 
+void X86Assembler::shrd(Register dst, Register src, const Immediate& imm) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xAC);
+  EmitRegisterOperand(src, dst);
+  EmitUint8(imm.value() & 0xFF);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
@@ -1622,28 +1670,28 @@
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register reg,
+                                    const Operand& operand,
                                     const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK(imm.is_int8());
   if (imm.value() == 1) {
     EmitUint8(0xD1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
   } else {
     EmitUint8(0xC1);
-    EmitOperand(reg_or_opcode, Operand(reg));
+    EmitOperand(reg_or_opcode, operand);
     EmitUint8(imm.value() & 0xFF);
   }
 }
 
 
 void X86Assembler::EmitGenericShift(int reg_or_opcode,
-                                    Register operand,
+                                    const Operand& operand,
                                     Register shifter) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   CHECK_EQ(shifter, ECX);
   EmitUint8(0xD3);
-  EmitOperand(reg_or_opcode, Operand(operand));
+  EmitOperand(reg_or_opcode, operand);
 }
 
 static dwarf::Reg DWARFReg(Register reg) {
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 7fc8ef0..946c96d 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -430,12 +430,20 @@
 
   void shll(Register reg, const Immediate& imm);
   void shll(Register operand, Register shifter);
+  void shll(const Address& address, const Immediate& imm);
+  void shll(const Address& address, Register shifter);
   void shrl(Register reg, const Immediate& imm);
   void shrl(Register operand, Register shifter);
+  void shrl(const Address& address, const Immediate& imm);
+  void shrl(const Address& address, Register shifter);
   void sarl(Register reg, const Immediate& imm);
   void sarl(Register operand, Register shifter);
+  void sarl(const Address& address, const Immediate& imm);
+  void sarl(const Address& address, Register shifter);
   void shld(Register dst, Register src, Register shifter);
+  void shld(Register dst, Register src, const Immediate& imm);
   void shrd(Register dst, Register src, Register shifter);
+  void shrd(Register dst, Register src, const Immediate& imm);
 
   void negl(Register reg);
   void notl(Register reg);
@@ -620,8 +628,8 @@
   void EmitLabelLink(Label* label);
   void EmitNearLabelLink(Label* label);
 
-  void EmitGenericShift(int rm, Register reg, const Immediate& imm);
-  void EmitGenericShift(int rm, Register operand, Register shifter);
+  void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm);
+  void EmitGenericShift(int rm, const Operand& operand, Register shifter);
 
   DISALLOW_COPY_AND_ASSIGN(X86Assembler);
 };