ARM/ARM64: Intrinsics - numberOfTrailingZeros, rotateLeft, rotateRight

Change-Id: I2a07c279756ee804fb7c129416bdc4a3962e93ed
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 69a3e62..b7dc1df 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -266,6 +266,227 @@
   GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
 }
 
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     ArmAssembler* assembler) {
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (type == Primitive::kPrimLong) {
+    Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+    Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+    Label end;
+    __ rbit(out, in_reg_lo);
+    __ clz(out, out);
+    __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+    __ rbit(out, in_reg_hi);
+    __ clz(out, out);
+    __ AddConstant(out, 32);
+    __ Bind(&end);
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    __ rbit(out, in);
+    __ clz(out, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenIntegerRotate(LocationSummary* locations,
+                             ArmAssembler* assembler,
+                             bool is_left) {
+  Register in = locations->InAt(0).AsRegister<Register>();
+  Location rhs = locations->InAt(1);
+  Register out = locations->Out().AsRegister<Register>();
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue() & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, is_left ? (0x20 - rot) : rot);
+    } else if (out != in) {
+      __ Mov(out, in);
+    }
+  } else {
+    if (is_left) {
+      __ rsb(out, rhs.AsRegister<Register>(), ShifterOperand(0));
+      __ Ror(out, in, out);
+    } else {
+      __ Ror(out, in, rhs.AsRegister<Register>());
+    }
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+static void GenLongRotate(LocationSummary* locations,
+                          ArmAssembler* assembler,
+                          bool is_left) {
+  Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+  Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+  Location rhs = locations->InAt(1);
+  Register out_reg_lo = locations->Out().AsRegisterPairLow<Register>();
+  Register out_reg_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+  if (rhs.IsConstant()) {
+    uint32_t rot = rhs.GetConstant()->AsIntConstant()->GetValue();
+    // Map all left rotations to right equivalents.
+    if (is_left) {
+      rot = 0x40 - rot;
+    }
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= 0x3F;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= 0x20) {
+      rot -= 0x20;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ orr(out_reg_hi, out_reg_hi, ShifterOperand(in_reg_lo, arm::LSL, 0x20 - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ orr(out_reg_lo, out_reg_lo, ShifterOperand(in_reg_hi, arm::LSL, 0x20 - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    Register shift_left = locations->GetTemp(0).AsRegister<Register>();
+    Register shift_right = locations->GetTemp(1).AsRegister<Register>();
+    Label end;
+    Label right;
+
+    __ and_(shift_left, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
+    __ Lsrs(shift_right, rhs.AsRegister<Register>(), 6);
+    __ rsb(shift_right, shift_left, ShifterOperand(0x20), AL, kCcKeep);
+
+    if (is_left) {
+      __ b(&right, CS);
+    } else {
+      __ b(&right, CC);
+      std::swap(shift_left, shift_right);
+    }
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
+    __ b(&end);
+
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Bind(&right);
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ add(out_reg_hi, out_reg_hi, ShifterOperand(out_reg_lo));
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
+
+    __ Bind(&end);
+  }
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerRotateRight(HInvoke* invoke) {
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongRotateRight(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (invoke->InputAt(1)->IsConstant()) {
+    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongRotateRight(HInvoke* invoke) {
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), false /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitIntegerRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerRotateLeft(HInvoke* invoke) {
+  GenIntegerRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+}
+
+void IntrinsicLocationsBuilderARM::VisitLongRotateLeft(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (invoke->InputAt(1)->IsConstant()) {
+    locations->SetInAt(1, Location::ConstantLocation(invoke->InputAt(1)->AsConstant()));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARM::VisitLongRotateLeft(HInvoke* invoke) {
+  GenLongRotate(invoke->GetLocations(), GetAssembler(), true /* is_left */);
+}
+
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
   Location in = locations->InAt(0);
   Location out = locations->Out();