Replace rotate patterns and invokes with HRor IR.

Replace constant and register version bitfield rotate patterns, and
rotateRight/Left intrinsic invokes, with new HRor IR.

Where k is constant and r is a register, with the UShr and Shl on
either side of a |, +, or ^, the following patterns are replaced:

  x >>> #k OP x << #(reg_size - k)
  x >>> #k OP x << #-k

  x >>> r OP x << (#reg_size - r)
  x >>> (#reg_size - r) OP x << r

  x >>> r OP x << -r
  x >>> -r OP x << r

Implemented for ARM/ARM64 & X86/X86_64.

Tests changed to not be inlined to prevent optimization from folding
them out. Additional tests added for constant rotate amounts.

Change-Id: I5847d104c0a0348e5792be6c5072ce5090ca2c34
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ac6b5e8..5abbb37 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3234,6 +3234,146 @@
   }
 }
 
+void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    __ Ror(out, in, rhs.AsRegister<Register>());
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= kMaxLongShiftValue;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= kArmBitsPerWord) {
+      rot -= kArmBitsPerWord;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot != 0u) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, kArmBitsPerWord - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, kArmBitsPerWord - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_right = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_left = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label shift_by_32_plus_shift_right;
+
+    __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_left, shift_right, ShifterOperand(kArmBitsPerWord), AL, kCcKeep);
+    __ b(&shift_by_32_plus_shift_right, CC);
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+void LocationsBuilderARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (ror->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARM::HandleRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Primitive::Type type = ror->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HandleIntegerRotate(locations);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      HandleLongRotate(locations);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+  }
+}
+
+void LocationsBuilderARM::VisitRor(HRor* op) {
+    HandleRotate(op);
+}
+
+void InstructionCodeGeneratorARM::VisitRor(HRor* op) {
+    HandleRotate(op);
+}
+
 void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
   DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 193add2..8193c28 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -170,6 +170,9 @@
  private:
   void HandleInvoke(HInvoke* invoke);
   void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -213,6 +216,9 @@
   void GenerateOrrConst(Register out, Register first, uint32_t value);
   void GenerateEorConst(Register out, Register first, uint32_t value);
   void HandleBitwiseOperation(HBinaryOperation* operation);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 04acd9d..5a32530 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1791,6 +1791,17 @@
         __ Orr(dst, lhs, rhs);
       } else if (instr->IsSub()) {
         __ Sub(dst, lhs, rhs);
+      } else if (instr->IsRor()) {
+        if (rhs.IsImmediate()) {
+          uint32_t shift = rhs.immediate() & (lhs.SizeInBits() - 1);
+          __ Ror(dst, lhs, shift);
+        } else {
+          // Ensure shift distance is in the same size register as the result. If
+          // we are rotating a long and the shift comes in a w register originally,
+          // we don't need to sxtw for use as an x since the shift distances are
+          // all & reg_bits - 1.
+          __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
+        }
       } else {
         DCHECK(instr->IsXor());
         __ Eor(dst, lhs, rhs);
@@ -4258,6 +4269,14 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
+void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
+  HandleBinaryOp(ror);
+}
+
 void LocationsBuilderARM64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 9dc9167..af6a1ce 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -3767,6 +3767,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index bc5eb31..b58b610 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3519,6 +3519,16 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderMIPS64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 2fb87d3..faf2777 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3760,6 +3760,92 @@
   __ Bind(&done);
 }
 
+void LocationsBuilderX86::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimLong:
+      // Add the temporary needed.
+      locations->AddTemp(Location::RequiresRegister());
+      FALLTHROUGH_INTENDED;
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  if (ror->GetResultType() == Primitive::kPrimInt) {
+    Register first_reg = first.AsRegister<Register>();
+    if (second.IsRegister()) {
+      Register second_reg = second.AsRegister<Register>();
+      __ rorl(first_reg, second_reg);
+    } else {
+      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+      __ rorl(first_reg, imm);
+    }
+    return;
+  }
+
+  DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong);
+  Register first_reg_lo = first.AsRegisterPairLow<Register>();
+  Register first_reg_hi = first.AsRegisterPairHigh<Register>();
+  Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
+  if (second.IsRegister()) {
+    Register second_reg = second.AsRegister<Register>();
+    DCHECK_EQ(second_reg, ECX);
+    __ movl(temp_reg, first_reg_hi);
+    __ shrd(first_reg_hi, first_reg_lo, second_reg);
+    __ shrd(first_reg_lo, temp_reg, second_reg);
+    __ movl(temp_reg, first_reg_hi);
+    __ testl(second_reg, Immediate(32));
+    __ cmovl(kNotEqual, first_reg_hi, first_reg_lo);
+    __ cmovl(kNotEqual, first_reg_lo, temp_reg);
+  } else {
+    int32_t shift_amt =
+      CodeGenerator::GetInt64ValueOf(second.GetConstant()) & kMaxLongShiftValue;
+    if (shift_amt == 0) {
+      // Already fine.
+      return;
+    }
+    if (shift_amt == 32) {
+      // Just swap.
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+      return;
+    }
+
+    Immediate imm(shift_amt);
+    // Save the constents of the low value.
+    __ movl(temp_reg, first_reg_lo);
+
+    // Shift right into low, feeding bits from high.
+    __ shrd(first_reg_lo, first_reg_hi, imm);
+
+    // Shift right into high, feeding bits from the original low.
+    __ shrd(first_reg_hi, temp_reg, imm);
+
+    // Swap if needed.
+    if (shift_amt > 32) {
+      __ movl(temp_reg, first_reg_lo);
+      __ movl(first_reg_lo, first_reg_hi);
+      __ movl(first_reg_hi, temp_reg);
+    }
+  }
+}
+
 void LocationsBuilderX86::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 4618be9..69129bf 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3754,6 +3754,55 @@
   }
 }
 
+void LocationsBuilderX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      // The shift count needs to be in CL (unless it is a constant).
+      locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  Location second = locations->InAt(1);
+
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorl(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+        __ rorl(first_reg, imm);
+      }
+      break;
+    case Primitive::kPrimLong:
+      if (second.IsRegister()) {
+        CpuRegister second_reg = second.AsRegister<CpuRegister>();
+        __ rorq(first_reg, second_reg);
+      } else {
+        Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
+        __ rorq(first_reg, imm);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
   HandleShift(shl);
 }
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index af8b8b5..10d8343 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -202,6 +202,11 @@
     return true;
   }
 
+  // Our code generator ensures shift distances are within an encodable range.
+  if (instr->IsRor()) {
+    return true;
+  }
+
   int64_t value = CodeGenerator::GetInt64ValueOf(constant);
 
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c16b872..dfc363f 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -850,7 +850,7 @@
 
 void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) {
   VisitInstruction(op);
-  if (op->IsUShr() || op->IsShr() || op->IsShl()) {
+  if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) {
     if (PrimitiveKind(op->InputAt(1)->GetType()) != Primitive::kPrimInt) {
       AddError(StringPrintf(
           "Shift operation %s %d has a non-int kind second input: "
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2f3df7f..6946265 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -39,6 +39,12 @@
     }
   }
 
+  bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotate(HBinaryOperation* instruction);
+  bool TryReplaceWithRotateConstantPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+  bool TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op, HUShr* ushr, HShl* shl);
+
   bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
   void VisitShift(HBinaryOperation* shift);
 
@@ -77,6 +83,7 @@
 
   bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
 
+  void SimplifyRotate(HInvoke* invoke, bool is_left);
   void SimplifySystemArrayCopy(HInvoke* invoke);
   void SimplifyStringEquals(HInvoke* invoke);
 
@@ -173,6 +180,161 @@
   }
 }
 
+static bool IsSubRegBitsMinusOther(HSub* sub, size_t reg_bits, HInstruction* other) {
+  return (sub->GetRight() == other &&
+          sub->GetLeft()->IsConstant() &&
+          (Int64FromConstant(sub->GetLeft()->AsConstant()) & (reg_bits - 1)) == 0);
+}
+
+bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op,
+                                                        HUShr* ushr,
+                                                        HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(),
+                                                ushr->GetLeft(),
+                                                ushr->GetRight());
+  op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror);
+  if (!ushr->HasUses()) {
+    ushr->GetBlock()->RemoveInstruction(ushr);
+  }
+  if (!ushr->GetRight()->HasUses()) {
+    ushr->GetRight()->GetBlock()->RemoveInstruction(ushr->GetRight());
+  }
+  if (!shl->HasUses()) {
+    shl->GetBlock()->RemoveInstruction(shl);
+  }
+  if (!shl->GetRight()->HasUses()) {
+    shl->GetRight()->GetBlock()->RemoveInstruction(shl->GetRight());
+  }
+  return true;
+}
+
+// Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation.
+bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) {
+  // This simplification is currently supported on ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return false;
+  }
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  HInstruction* left = op->GetLeft();
+  HInstruction* right = op->GetRight();
+  // If we have an UShr and a Shl (in either order).
+  if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) {
+    HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr();
+    HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl();
+    DCHECK(Primitive::IsIntOrLongType(ushr->GetType()));
+    if (ushr->GetType() == shl->GetType() &&
+        ushr->GetLeft() == shl->GetLeft()) {
+      if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) {
+        // Shift distances are both constant, try replacing with Ror if they
+        // add up to the register size.
+        return TryReplaceWithRotateConstantPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()) {
+        // Shift distances are potentially of the form x and (reg_size - x).
+        return TryReplaceWithRotateRegisterSubPattern(op, ushr, shl);
+      } else if (ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg()) {
+        // Shift distances are potentially of the form d and -d.
+        return TryReplaceWithRotateRegisterNegPattern(op, ushr, shl);
+      }
+    }
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> #rdist OP x << #ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #ldist
+//    OP   dst, dst, tmp
+// or like (x >>> #rdist OP x << #-ldist):
+//    UShr dst, x,   #rdist
+//    Shl  tmp, x,   #-ldist
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   #rdist
+bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOperation* op,
+                                                                       HUShr* ushr,
+                                                                       HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant());
+  size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant());
+  if (((ldist + rdist) & (reg_bits - 1)) == 0) {
+    ReplaceRotateWithRor(op, ushr, shl);
+    return true;
+  }
+  return false;
+}
+
+// Replace code looking like (x >>> -d OP x << d):
+//    Neg  neg, d
+//    UShr dst, x,   neg
+//    Shl  tmp, x,   d
+//    OP   dst, dst, tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,   neg
+// *** OR ***
+// Replace code looking like (x >>> d OP x << -d):
+//    UShr dst, x,   d
+//    Neg  neg, d
+//    Shl  tmp, x,   neg
+//    OP   dst, dst, tmp
+// with
+//    Ror  dst, x,   d
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterNegPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsNeg() || shl->GetRight()->IsNeg());
+  bool neg_is_left = shl->GetRight()->IsNeg();
+  HNeg* neg = neg_is_left ? shl->GetRight()->AsNeg() : ushr->GetRight()->AsNeg();
+  // And the shift distance being negated is the distance being shifted the other way.
+  if (neg->InputAt(0) == (neg_is_left ? ushr->GetRight() : shl->GetRight())) {
+    ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
+// Try replacing code looking like (x >>> d OP x << (#bits - d)):
+//    UShr dst, x,     d
+//    Sub  ld,  #bits, d
+//    Shl  tmp, x,     ld
+//    OP   dst, dst,   tmp
+// with
+//    Ror  dst, x,     d
+// *** OR ***
+// Replace code looking like (x >>> (#bits - d) OP x << d):
+//    Sub  rd,  #bits, d
+//    UShr dst, x,     rd
+//    Shl  tmp, x,     d
+//    OP   dst, dst,   tmp
+// with
+//    Neg  neg, d
+//    Ror  dst, x,     neg
+bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinaryOperation* op,
+                                                                          HUShr* ushr,
+                                                                          HShl* shl) {
+  DCHECK(op->IsAdd() || op->IsXor() || op->IsOr());
+  DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub());
+  size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte;
+  HInstruction* shl_shift = shl->GetRight();
+  HInstruction* ushr_shift = ushr->GetRight();
+  if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) ||
+      (ushr_shift->IsSub() && IsSubRegBitsMinusOther(ushr_shift->AsSub(), reg_bits, shl_shift))) {
+    return ReplaceRotateWithRor(op, ushr, shl);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
   HInstruction* obj = null_check->InputAt(0);
   if (!obj->CanBeNull()) {
@@ -530,7 +692,10 @@
     instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
     RecordSimplification();
     neg->GetBlock()->RemoveInstruction(neg);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
@@ -906,7 +1071,10 @@
     //    src
     instruction->ReplaceWith(instruction->GetLeft());
     instruction->GetBlock()->RemoveInstruction(instruction);
+    return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitShl(HShl* instruction) {
@@ -1027,6 +1195,8 @@
     RecordSimplification();
     return;
   }
+
+  TryReplaceWithRotate(instruction);
 }
 
 void InstructionSimplifierVisitor::VisitFakeString(HFakeString* instruction) {
@@ -1095,6 +1265,42 @@
   }
 }
 
+void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) {
+  DCHECK(invoke->IsInvokeStaticOrDirect());
+  DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic);
+  // This simplification is currently supported on ARM and ARM64.
+  // TODO: Implement it for MIPS/64.
+  const InstructionSet instruction_set = GetGraph()->GetInstructionSet();
+  switch (instruction_set) {
+    case kArm:
+    case kArm64:
+    case kThumb2:
+    case kX86:
+    case kX86_64:
+      break;
+    default:
+      return;
+  }
+  HInstruction* value = invoke->InputAt(0);
+  HInstruction* distance = invoke->InputAt(1);
+  // Replace the invoke with an HRor.
+  if (is_left) {
+    distance = new (GetGraph()->GetArena()) HNeg(distance->GetType(), distance);
+    invoke->GetBlock()->InsertInstructionBefore(distance, invoke);
+  }
+  HRor* ror = new (GetGraph()->GetArena()) HRor(value->GetType(), value, distance);
+  invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror);
+  // Remove ClinitCheck and LoadClass, if possible.
+  HInstruction* clinit = invoke->InputAt(invoke->InputCount() - 1);
+  if (clinit->IsClinitCheck() && !clinit->HasUses()) {
+    clinit->GetBlock()->RemoveInstruction(clinit);
+    HInstruction* ldclass = clinit->InputAt(0);
+    if (ldclass->IsLoadClass() && !ldclass->HasUses()) {
+      ldclass->GetBlock()->RemoveInstruction(ldclass);
+    }
+  }
+}
+
 static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potential_array) {
   if (potential_length->IsArrayLength()) {
     return potential_length->InputAt(0) == potential_array;
@@ -1165,6 +1371,12 @@
     SimplifyStringEquals(instruction);
   } else if (instruction->GetIntrinsic() == Intrinsics::kSystemArrayCopy) {
     SimplifySystemArrayCopy(instruction);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateRight ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateRight) {
+    SimplifyRotate(instruction, false);
+  } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerRotateLeft ||
+             instruction->GetIntrinsic() == Intrinsics::kLongRotateLeft) {
+    SimplifyRotate(instruction, true);
   }
 }
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 5329b5c..b5cb611 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -240,178 +240,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
-static void GenIntegerRotate(LocationSummary* locations,
-                             ArmAssembler* assembler,
-                             bool is_left) {
-  Register in = locations->InAt(0).AsRegister<Register>();
-  Location rhs = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-
-  if (rhs.IsConstant()) {
-    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
-    // so map all rotations to a +ve. equivalent in that range.
-    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
-    if (rot) {
-      // Rotate, mapping left rotations to right equivalents if necessary.
-      // (e.g. left by 2 bits == right by 30.)
-      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
-    } else if (out != in) {
-      __ Mov(out, in);
-    }
-  } else {
-    if (is_left) {
-      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
-      __ Ror(out, in, out);
-    } else {
-      __ Ror(out, in, rhs.AsRegister<Register>());
-    }
-  }
-}
-
-// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
-// rotates by swapping input regs (effectively rotating by the first 32-bits of
-// a larger rotation) or flipping direction (thus treating larger right/left
-// rotations as sub-word sized rotations in the other direction) as appropriate.
-static void GenLongRotate(LocationSummary* locations,
-                          ArmAssembler* assembler,
-                          bool is_left) {
-  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
-  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
-  Location rhs = locations->InAt(1);
-  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
-  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
-
-  if (rhs.IsConstant()) {
-    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
-    // Map all left rotations to right equivalents.
-    if (is_left) {
-      rot = 0x40 - rot;
-    }
-    // Map all rotations to +ve. equivalents on the interval [0,63].
-    rot &= 0x3F;
-    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
-    // logic below to a simple pair of binary orr.
-    // (e.g. 34 bits == in_reg swap + 2 bits right.)
-    if (rot >= 0x20) {
-      rot -= 0x20;
-      std::swap(in_reg_hi, in_reg_lo);
-    }
-    // Rotate, or mov to out for zero or word size rotations.
-    if (rot) {
-      __ Lsr(out_reg_hi, in_reg_hi, rot);
-      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
-      __ Lsr(out_reg_lo, in_reg_lo, rot);
-      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
-    } else {
-      __ Mov(out_reg_lo, in_reg_lo);
-      __ Mov(out_reg_hi, in_reg_hi);
-    }
-  } else {
-    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
-    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
-    Label end;
-    Label right;
-
-    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
-    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
-    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
-
-    if (is_left) {
-      __ b(&right, CS);
-    } else {
-      __ b(&right, CC);
-      std::swap(shift_left, shift_right);
-    }
-
-    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
-    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
-    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ Lsr(shift_left, in_reg_hi, shift_right);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
-    __ b(&end);
-
-    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
-    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
-    __ Bind(&right);
-    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
-    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
-    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
-    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
-    __ Lsl(shift_right, in_reg_hi, shift_left);
-    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
-
-    __ Bind(&end);
-  }
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kNoCall,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  if (invoke->InputAt(1)->IsConstant()) {
-    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
-  } else {
-    locations->SetInAt(1, Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
-  }
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
-  GenLongRotate(invoke->GetLocations(), GetAssembler(), /* is_left */ true);
-}
-
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();
@@ -1700,8 +1528,12 @@
 
 UNIMPLEMENTED_INTRINSIC(IntegerReverse)
 UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongReverse)
 UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
 UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 962c4d5..93a8224 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -330,103 +330,6 @@
   GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
-static void GenRotateRight(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t shift = rhs.immediate() & (RegisterFrom(in, type).SizeInBits() - 1);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           rhs.reg());
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateRight(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateRight(HInvoke* invoke) {
-  GenRotateRight(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
-static void GenRotateLeft(LocationSummary* locations,
-                           Primitive::Type type,
-                           vixl::MacroAssembler* masm) {
-  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
-
-  Location in = locations->InAt(0);
-  Location out = locations->Out();
-  Operand rhs = OperandFrom(locations->InAt(1), type);
-
-  if (rhs.IsImmediate()) {
-    uint32_t regsize = RegisterFrom(in, type).SizeInBits();
-    uint32_t shift = (regsize - rhs.immediate()) & (regsize - 1);
-    __ Ror(RegisterFrom(out, type), RegisterFrom(in, type), shift);
-  } else {
-    DCHECK(rhs.shift() == vixl::LSL && rhs.shift_amount() == 0);
-    __ Neg(RegisterFrom(out, type),
-           Operand(RegisterFrom(locations->InAt(1), type)));
-    __ Ror(RegisterFrom(out, type),
-           RegisterFrom(in, type),
-           RegisterFrom(out, type));
-  }
-}
-
-void IntrinsicLocationsBuilderARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
-  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitLongRotateLeft(HInvoke* invoke) {
-  GenRotateLeft(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
-}
-
 static void GenReverse(LocationSummary* locations,
                        Primitive::Type type,
                        vixl::MacroAssembler* masm) {
@@ -1527,6 +1430,10 @@
 void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
 }
 
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 5b67cde..53c0d2e 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2277,56 +2277,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86Assembler* assembler, HInvoke* invoke, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  Register first_reg = locations->InAt(0).AsRegister<Register>();
-  Location second = locations->InAt(1);
-
-  if (second.IsRegister()) {
-    Register second_reg = second.AsRegister<Register>();
-    if (is_left) {
-      __ roll(first_reg, second_reg);
-    } else {
-      __ rorl(first_reg, second_reg);
-    }
-  } else {
-    Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-    if (is_left) {
-      __ roll(first_reg, imm);
-    } else {
-      __ rorl(first_reg, imm);
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86Assembler* assembler = down_cast<X86Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2337,6 +2287,8 @@
 
 UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
 UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ecd129f..a54445b 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2295,92 +2295,6 @@
   GenTrailingZeros(assembler, invoke, /* is_long */ true);
 }
 
-static void CreateRotateLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
-                                                           kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  // The shift count needs to be in CL or a constant.
-  locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, invoke->InputAt(1)));
-  locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void GenRotate(X86_64Assembler* assembler, HInvoke* invoke, bool is_long, bool is_left) {
-  LocationSummary* locations = invoke->GetLocations();
-  CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
-  Location second = locations->InAt(1);
-
-  if (is_long) {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ rolq(first_reg, second_reg);
-      } else {
-        __ rorq(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue);
-      if (is_left) {
-        __ rolq(first_reg, imm);
-      } else {
-        __ rorq(first_reg, imm);
-      }
-    }
-  } else {
-    if (second.IsRegister()) {
-      CpuRegister second_reg = second.AsRegister<CpuRegister>();
-      if (is_left) {
-        __ roll(first_reg, second_reg);
-      } else {
-        __ rorl(first_reg, second_reg);
-      }
-    } else {
-      Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
-      if (is_left) {
-        __ roll(first_reg, imm);
-      } else {
-        __ rorl(first_reg, imm);
-      }
-    }
-  }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitIntegerRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ false, /* is_left */ false);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateLeft(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ true);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  CreateRotateLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitLongRotateRight(HInvoke* invoke) {
-  X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
-  GenRotate(assembler, invoke, /* is_long */ true, /* is_left */ false);
-}
-
 // Unimplemented intrinsics.
 
 #define UNIMPLEMENTED_INTRINSIC(Name)                                                   \
@@ -2390,6 +2304,10 @@
 }
 
 UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
+UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
+UNIMPLEMENTED_INTRINSIC(LongRotateRight)
 
 #undef UNIMPLEMENTED_INTRINSIC
 
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 9d3c88c..f50de28 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1085,6 +1085,7 @@
   M(Rem, BinaryOperation)                                               \
   M(Return, Instruction)                                                \
   M(ReturnVoid, Instruction)                                            \
+  M(Ror, BinaryOperation)                                               \
   M(Shl, BinaryOperation)                                               \
   M(Shr, BinaryOperation)                                               \
   M(StaticFieldGet, Instruction)                                        \
@@ -4201,6 +4202,44 @@
   DISALLOW_COPY_AND_ASSIGN(HXor);
 };
 
+class HRor : public HBinaryOperation {
+ public:
+  HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance)
+    : HBinaryOperation(result_type, value, distance) {}
+
+  template <typename T, typename U, typename V>
+  T Compute(T x, U y, V max_shift_value) const {
+    static_assert(std::is_same<V, typename std::make_unsigned<T>::type>::value,
+                  "V is not the unsigned integer type corresponding to T");
+    V ux = static_cast<V>(x);
+    if ((y & max_shift_value) == 0) {
+      return static_cast<T>(ux);
+    } else {
+      const V reg_bits = sizeof(T) * 8;
+      return static_cast<T>(ux >> (y & max_shift_value)) |
+                           (x << (reg_bits - (y & max_shift_value)));
+    }
+  }
+
+  HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetIntConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxIntShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HIntConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+  HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+    return GetBlock()->GetGraph()->GetLongConstant(
+        Compute(x->GetValue(), y->GetValue(), kMaxLongShiftValue), GetDexPc());
+  }
+
+  DECLARE_INSTRUCTION(Ror);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HRor);
+};
+
 // The value of a parameter in this method. Its location depends on
 // the calling convention.
 class HParameterValue : public HExpression<0> {
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index e843935..18405f2 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -17,6 +17,8 @@
 #ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 #define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
 
+#include "nodes.h"
+
 namespace art {
 
 class HArm64DataProcWithShifterOp : public HExpression<2> {