Add a IsInitialized check in AOT code with clinit at entry checks.

The investigations behind
https://android-review.googlesource.com/c/platform/art/+/2214701 showed
that it can take some time for a class to become visibly initialized. To
avoid a busy loop between the AOT code and the resolution trampoline, we
also add a isInitialized check, which if true will be followed by a
memory barrier and the execution of the compiled code.

Test: test.py
Bug: 162110941
Change-Id: I6c36cde6ebd12b1f81281eb8a684b496f266e3ea
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 17407a5..7fb6d3c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1242,6 +1242,7 @@
   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
     UseScratchRegisterScope temps(masm);
     vixl::aarch64::Label resolution;
+    vixl::aarch64::Label memory_barrier;
 
     Register temp1 = temps.AcquireW();
     Register temp2 = temps.AcquireW();
@@ -1255,6 +1256,11 @@
     __ Cmp(temp2, shifted_visibly_initialized_value);
     __ B(hs, &frame_entry_label_);
 
+    // Check if we're initialized and jump to code that does a memory barrier if
+    // so.
+    __ Cmp(temp2, shifted_initialized_value);
+    __ B(hs, &memory_barrier);
+
     // Check if we're initializing and the thread initializing is the one
     // executing the code.
     __ Cmp(temp2, shifted_initializing_value);
@@ -1271,6 +1277,9 @@
         GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
     __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
     __ Br(temp1.X());
+
+    __ Bind(&memory_barrier);
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
   __ Bind(&frame_entry_label_);