Add a IsInitialized check in AOT code with clinit at entry checks.

The investigations behind
https://android-review.googlesource.com/c/platform/art/+/2214701 showed
that it can take some time for a class to become visibly initialized. To
avoid a busy loop between the AOT code and the resolution trampoline, we
also add a isInitialized check, which if true will be followed by a
memory barrier and the execution of the compiled code.

Test: test.py
Bug: 162110941
Change-Id: I6c36cde6ebd12b1f81281eb8a684b496f266e3ea
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 6b85aaa..578a341 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -68,6 +68,8 @@
     enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
 constexpr uint32_t shifted_initializing_value =
     enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initialized_value =
+    enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
 
 class Assembler;
 class CodeGenerator;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 17407a5..7fb6d3c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1242,6 +1242,7 @@
   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
     UseScratchRegisterScope temps(masm);
     vixl::aarch64::Label resolution;
+    vixl::aarch64::Label memory_barrier;
 
     Register temp1 = temps.AcquireW();
     Register temp2 = temps.AcquireW();
@@ -1255,6 +1256,11 @@
     __ Cmp(temp2, shifted_visibly_initialized_value);
     __ B(hs, &frame_entry_label_);
 
+    // Check if we're initialized and jump to code that does a memory barrier if
+    // so.
+    __ Cmp(temp2, shifted_initialized_value);
+    __ B(hs, &memory_barrier);
+
     // Check if we're initializing and the thread initializing is the one
     // executing the code.
     __ Cmp(temp2, shifted_initializing_value);
@@ -1271,6 +1277,9 @@
         GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
     __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
     __ Br(temp1.X());
+
+    __ Bind(&memory_barrier);
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
   __ Bind(&frame_entry_label_);
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 0850e2f..0753d36 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2246,6 +2246,7 @@
   if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
     UseScratchRegisterScope temps(GetVIXLAssembler());
     vixl32::Label resolution;
+    vixl32::Label memory_barrier;
 
     // Check if we're visibly initialized.
 
@@ -2265,6 +2266,11 @@
     __ Cmp(temp2, shifted_visibly_initialized_value);
     __ B(cs, &frame_entry_label_);
 
+    // Check if we're initialized and jump to code that does a memory barrier if
+    // so.
+    __ Cmp(temp2, shifted_initialized_value);
+    __ B(cs, &memory_barrier);
+
     // Check if we're initializing and the thread initializing is the one
     // executing the code.
     __ Cmp(temp2, shifted_initializing_value);
@@ -2281,6 +2287,9 @@
         GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
     __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
     __ Bx(temp1);
+
+    __ Bind(&memory_barrier);
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 
   __ Bind(&frame_entry_label_);