Integer.bitCount and Long.bitCount intrinsics for ARM64

Change-Id: If6180acc90239e52e5d33901b65e194d1ca7e248
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a4a6ef..2e1198c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -46,6 +46,7 @@
 using helpers::SRegisterFrom;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
+using helpers::InputRegisterAt;
 
 namespace {
 
@@ -367,6 +368,40 @@
   GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler());
 }
 
+static void GenBitCount(HInvoke* instr, bool is_long, vixl::MacroAssembler* masm) {
+  DCHECK(instr->GetType() == Primitive::kPrimInt);
+  DCHECK((is_long && instr->InputAt(0)->GetType() == Primitive::kPrimLong) ||
+         (!is_long && instr->InputAt(0)->GetType() == Primitive::kPrimInt));
+
+  Location out = instr->GetLocations()->Out();
+  UseScratchRegisterScope temps(masm);
+
+  Register   src = InputRegisterAt(instr, 0);
+  FPRegister fpr = is_long ? temps.AcquireD() : temps.AcquireS();
+  Register   dst = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
+
+  __ Fmov(fpr, src);
+  __ Cnt (fpr.V8B(), fpr.V8B());
+  __ Addv(fpr.B(),   fpr.V8B());
+  __ Fmov(dst, fpr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is_long */ true, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, /* is_long */ false, GetVIXLAssembler());
+}
+
 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
   LocationSummary* locations = new (arena) LocationSummary(invoke,
                                                            LocationSummary::kNoCall,
@@ -1672,8 +1707,6 @@
   __ Bind(&done);
 }
 
-UNIMPLEMENTED_INTRINSIC(ARM64, IntegerBitCount)
-UNIMPLEMENTED_INTRINSIC(ARM64, LongBitCount)
 UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARM64, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)