Add X86 bsf and rotate instructions

These are for use in new intrinsics.  Bsf (Bit Scan Forward) is used in
{Long,Integer}NumberOfTrailingZeros and the rotates are used in
{Long,Integer}Rotate{Left,Right}.

Change-Id: Icb599d7e1eec4e4ea9e5b4f0b1654c7b8d4de678
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index e3962b4..04e815a 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -158,6 +158,20 @@
   EmitUint8(0xC8 + dst);
 }
 
+void X86Assembler::bsfl(Register dst, Register src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::bsfl(Register dst, const Address& src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0xBC);
+  EmitOperand(dst, src);
+}
+
 void X86Assembler::bsrl(Register dst, Register src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x0F);
@@ -1423,6 +1437,26 @@
 }
 
 
+void X86Assembler::roll(Register reg, const Immediate& imm) {
+  EmitGenericShift(0, Operand(reg), imm);
+}
+
+
+void X86Assembler::roll(Register operand, Register shifter) {
+  EmitGenericShift(0, Operand(operand), shifter);
+}
+
+
+void X86Assembler::rorl(Register reg, const Immediate& imm) {
+  EmitGenericShift(1, Operand(reg), imm);
+}
+
+
+void X86Assembler::rorl(Register operand, Register shifter) {
+  EmitGenericShift(1, Operand(operand), shifter);
+}
+
+
 void X86Assembler::negl(Register reg) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF7);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 7d7b3d3..af78663 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -319,9 +319,16 @@
   void movntl(const Address& dst, Register src);
 
   void bswapl(Register dst);
+  void bsfl(Register dst, Register src);
+  void bsfl(Register dst, const Address& src);
   void bsrl(Register dst, Register src);
   void bsrl(Register dst, const Address& src);
 
+  void rorl(Register reg, const Immediate& imm);
+  void rorl(Register operand, Register shifter);
+  void roll(Register reg, const Immediate& imm);
+  void roll(Register operand, Register shifter);
+
   void movzxb(Register dst, ByteRegister src);
   void movzxb(Register dst, const Address& src);
   void movsxb(Register dst, ByteRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 9ac54af..16f9db4 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -32,6 +32,10 @@
 
 class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::Register,
                                               x86::XmmRegister, x86::Immediate> {
+ public:
+  typedef AssemblerTest<x86::X86Assembler, x86::Register,
+                         x86::XmmRegister, x86::Immediate> Base;
+
  protected:
   std::string GetArchitectureString() OVERRIDE {
     return "x86";
@@ -230,6 +234,19 @@
   DriverStr(expected, "rep_movsw");
 }
 
+TEST_F(AssemblerX86Test, Bsfl) {
+  DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl");
+}
+
+TEST_F(AssemblerX86Test, BsflAddress) {
+  GetAssembler()->bsfl(x86::Register(x86::EDI), x86::Address(
+      x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+  const char* expected =
+    "bsfl 0xc(%EDI,%EBX,4), %EDI\n";
+
+  DriverStr(expected, "bsfl_address");
+}
+
 TEST_F(AssemblerX86Test, Bsrl) {
   DriverStr(RepeatRR(&x86::X86Assembler::bsrl, "bsrl %{reg2}, %{reg1}"), "bsrl");
 }
@@ -243,6 +260,52 @@
   DriverStr(expected, "bsrl_address");
 }
 
+// Rorl only allows CL as the shift count.
+std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+
+  x86::Register shifter(x86::ECX);
+  for (auto reg : registers) {
+    assembler->rorl(*reg, shifter);
+    str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86Test, RorlReg) {
+  DriverFn(&rorl_fn, "rorl");
+}
+
+TEST_F(AssemblerX86Test, RorlImm) {
+  DriverStr(RepeatRI(&x86::X86Assembler::rorl, 1U, "rorl ${imm}, %{reg}"), "rorli");
+}
+
+// Roll only allows CL as the shift count.
+std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
+  std::ostringstream str;
+
+  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+
+  x86::Register shifter(x86::ECX);
+  for (auto reg : registers) {
+    assembler->roll(*reg, shifter);
+    str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  }
+
+  return str.str();
+}
+
+TEST_F(AssemblerX86Test, RollReg) {
+  DriverFn(&roll_fn, "roll");
+}
+
+TEST_F(AssemblerX86Test, RollImm) {
+  DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
+}
+
 /////////////////
 // Near labels //
 /////////////////