Introduce Long.divideUnsigned() intrinsic on ARM64.

Extends the 082-inline-execute test and reorders
divideUnsigned intrinsic code before VarHandle code.

Test: 082-inline-execute.
Test: testrunner.py --target --optimizing --64
Bug: 156736938
Change-Id: I8acf3db184da30640aff36cf02570ae0c6d2b88c
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b02b32e..a547551 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -3342,6 +3342,43 @@
   GenerateFP16Compare(invoke, codegen_, masm, ls);
 }
 
+static void GenerateDivideUnsigned(HInvoke* invoke, CodeGeneratorARM64* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
+  DataType::Type type = invoke->GetType();
+  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+
+  Register dividend = RegisterFrom(locations->InAt(0), type);
+  Register divisor = RegisterFrom(locations->InAt(1), type);
+  Register out = RegisterFrom(locations->Out(), type);
+
+  // Check if divisor is zero, bail to managed implementation to handle.
+  SlowPathCodeARM64* slow_path =
+      new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen->AddSlowPath(slow_path);
+  __ Cbz(divisor, slow_path->GetEntryLabel());
+
+  __ Udiv(out, dividend, divisor);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
+  CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
+  GenerateDivideUnsigned(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
+  CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongDivideUnsigned(HInvoke* invoke) {
+  GenerateDivideUnsigned(invoke, codegen_);
+}
+
 // Check access mode and the primitive type from VarHandle.varType.
 // The `var_type_no_rb`, if valid, shall be filled with VarHandle.varType read without read barrier.
 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
@@ -3709,28 +3746,6 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
-void IntrinsicLocationsBuilderARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
-  CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
-  LocationSummary* locations = invoke->GetLocations();
-  MacroAssembler* masm = GetVIXLAssembler();
-  Register dividend = WRegisterFrom(locations->InAt(0));
-  Register divisor = WRegisterFrom(locations->InAt(1));
-  Register out = WRegisterFrom(locations->Out());
-
-  // Check if divisor is zero, bail to managed implementation to handle.
-  SlowPathCodeARM64* slow_path =
-      new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-  __ Cbz(divisor, slow_path->GetEntryLabel());
-
-  __ Udiv(out, dividend, divisor);
-
-  __ Bind(slow_path->GetExitLabel());
-}
-
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index b8df060..19f55e1 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -3018,6 +3018,7 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerDivideUnsigned)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongDivideUnsigned)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 87880d7..045cacd 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -4501,6 +4501,7 @@
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(X86, LongDivideUnsigned)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index eaa3abe..7ebc632 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2735,6 +2735,7 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, FP16GreaterEquals)
 UNIMPLEMENTED_INTRINSIC(X86_64, FP16Less)
 UNIMPLEMENTED_INTRINSIC(X86_64, FP16LessEquals)
+UNIMPLEMENTED_INTRINSIC(X86_64, LongDivideUnsigned)
 
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
diff --git a/runtime/image.cc b/runtime/image.cc
index 13c9bd0..d91106a 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,7 +29,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '6', '\0' };  // Integer.divideUnsigned
+const uint8_t ImageHeader::kImageVersion[] = { '0', '8', '7', '\0' };  // Long.divideUnsigned
 
 ImageHeader::ImageHeader(uint32_t image_reservation_size,
                          uint32_t component_count,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index ced4132..c174ede 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -440,6 +440,7 @@
     UNIMPLEMENTED_CASE(FloatIsNaN /* (F)Z */)
     UNIMPLEMENTED_CASE(FloatIntBitsToFloat /* (I)F */)
     UNIMPLEMENTED_CASE(IntegerDivideUnsigned /* (II)I */)
+    UNIMPLEMENTED_CASE(LongDivideUnsigned /* (JJ)J */)
     INTRINSIC_CASE(IntegerReverse)
     INTRINSIC_CASE(IntegerReverseBytes)
     INTRINSIC_CASE(IntegerBitCount)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index edfa159..be1ca46 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -111,6 +111,7 @@
   V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "reverseBytes", "(J)J") \
   V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "bitCount", "(J)I") \
   V(LongCompare, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "compare", "(JJ)I") \
+  V(LongDivideUnsigned, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kCanThrow, "Ljava/lang/Long;", "divideUnsigned", "(JJ)J") \
   V(LongHighestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "highestOneBit", "(J)J") \
   V(LongLowestOneBit, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "lowestOneBit", "(J)J") \
   V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Long;", "numberOfLeadingZeros", "(J)I") \
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index b565985..5597947 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -83,6 +83,7 @@
     test_Memory_pokeInt();
     test_Memory_pokeLong();
     test_Integer_divideUnsigned();
+    test_Long_divideUnsigned();
     test_Integer_numberOfTrailingZeros();
     test_Long_numberOfTrailingZeros();
     test_Integer_rotateRight();
@@ -1402,6 +1403,40 @@
     }
   }
 
+
+  private static final long BIG_LONG_VALUE = 739287620162442240L;
+
+  public static void test_Long_divideUnsigned() {
+    Assert.assertEquals(Long.divideUnsigned(100L, 10L), 10L);
+    Assert.assertEquals(Long.divideUnsigned(100L, 1L), 100L);
+    Assert.assertEquals(Long.divideUnsigned(1024L, 128L), 8L);
+    Assert.assertEquals(Long.divideUnsigned(12345678L, 264L), 46763L);
+    Assert.assertEquals(Long.divideUnsigned(13L, 5L), 2L);
+    Assert.assertEquals(Long.divideUnsigned(-2L, 2L), Long.MAX_VALUE);
+    Assert.assertEquals(Long.divideUnsigned(-1L, 2L), Long.MAX_VALUE);
+    Assert.assertEquals(Long.divideUnsigned(100000L, -1L), 0L);
+    Assert.assertEquals(Long.divideUnsigned(Long.MAX_VALUE, -1L), 0L);
+    Assert.assertEquals(Long.divideUnsigned(-2L, -1L), 0L);
+    Assert.assertEquals(Long.divideUnsigned(-1L, -2L), 1L);
+    Assert.assertEquals(Long.divideUnsigned(-173448L, 13L), 1418980313362259859L);
+    Assert.assertEquals(Long.divideUnsigned(Long.MIN_VALUE, 2L), (1L << 62));
+    Assert.assertEquals(Long.divideUnsigned(-1L, Long.MIN_VALUE), 1L);
+    Assert.assertEquals(Long.divideUnsigned(Long.MAX_VALUE, Long.MIN_VALUE), 0L);
+    Assert.assertEquals(Long.divideUnsigned(Long.MIN_VALUE, Long.MAX_VALUE), 1L);
+    Assert.assertEquals(Long.divideUnsigned(Long.MAX_VALUE, 1L), Long.MAX_VALUE);
+    Assert.assertEquals(Long.divideUnsigned(Long.MIN_VALUE, 1L), Long.MIN_VALUE);
+    Assert.assertEquals(Long.divideUnsigned(BIG_LONG_VALUE, BIG_LONG_VALUE), 1L);
+    Assert.assertEquals(Long.divideUnsigned(BIG_LONG_VALUE, 1L), BIG_LONG_VALUE);
+    Assert.assertEquals(Long.divideUnsigned(BIG_LONG_VALUE, 1024L), 721960566564885L);
+    Assert.assertEquals(Long.divideUnsigned(BIG_LONG_VALUE, 0x1FFFFFFFFL), 86064406L);
+
+    try {
+      Long.divideUnsigned(1L, 0L);
+      Assert.fail("Unreachable");
+    } catch (ArithmeticException expected) {
+    }
+  }
+
   public static void test_Integer_numberOfLeadingZeros() {
     Assert.assertEquals(Integer.numberOfLeadingZeros(0), Integer.SIZE);
     Assert.assertEquals(Integer.numberOfLeadingZeros(1), Integer.SIZE - 1);