Add isInitializing check in nterp header.

This simplifies Instrumentation::GetCodeForInvoke and will be consistent
with the code we generate for AOT in:
https://android-review.googlesource.com/c/platform/art/+/2163021

Bug: 162110941
Test: test.py
Change-Id: Ic4d7747c7729df1281a2ab309589005c41542a31
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 9c8c220..60bbde4 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1371,6 +1371,11 @@
       success = linker->EnsureInitialized(soa.Self(), h_called_class, true, true);
     }
     if (success) {
+      // When the clinit check is at entry of the AOT/nterp code, we do the clinit check
+      // before doing the suspend check. To ensure the code sees the latest
+      // version of the class (the code doesn't do a read barrier to reduce
+      // size), do a suspend check now.
+      self->CheckSuspend();
       instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
       // Check if we need instrumented code here. Since resolution stubs could suspend, it is
       // possible that we instrumented the entry points after we started executing the resolution
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8735dcf..f4584ad 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -1308,14 +1308,10 @@
   DCHECK(!method->IsProxyMethod()) << method->PrettyMethod();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   const void* code = method->GetEntryPointFromQuickCompiledCodePtrSize(kRuntimePointerSize);
-  // If we don't have the instrumentation, the resolution stub, the
-  // interpreter, or the nterp with clinit as entrypoint, just return the current entrypoint,
+  // If we don't have the instrumentation, the resolution stub, or the
+  // interpreter, just return the current entrypoint,
   // assuming it's the most optimized.
-  // We don't want to return the nterp with clinit entrypoint as it calls the
-  // resolution stub, and the resolution stub will call `GetCodeForInvoke` to know the actual
-  // code to invoke.
   if (code != GetQuickInstrumentationEntryPoint() &&
-      code != interpreter::GetNterpWithClinitEntryPoint() &&
       !class_linker->IsQuickResolutionStub(code) &&
       !class_linker->IsQuickToInterpreterBridge(code)) {
     return code;
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 81d6b7b..66432d1 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -1591,10 +1591,20 @@
  */
 
 OAT_ENTRY ExecuteNterpWithClinitImpl, EndExecuteNterpWithClinitImpl
-    ldr wip, [x0, ART_METHOD_DECLARING_CLASS_OFFSET]
-    ldrb wip, [ip, MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET]
-    cmp ip, #MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE
-    bcs ExecuteNterpImpl
+    // For simplicity, we don't do a read barrier here, but instead rely
+    // on art_quick_resolution_trampoline to always have a suspend point before
+    // calling back here.
+    ldr wip, [x0, #ART_METHOD_DECLARING_CLASS_OFFSET]
+    ldrb wip2, [ip, #MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET]
+    cmp ip2, #MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE
+    b.hs ExecuteNterpImpl
+    cmp ip2, #MIRROR_CLASS_IS_INITIALIZING_VALUE
+    b.lo .Lresolution_trampoline
+    ldr wip2, [ip, #MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET]
+    ldr wip, [xSELF, #THREAD_TID_OFFSET]
+    cmp wip, wip2
+    b.eq ExecuteNterpImpl
+.Lresolution_trampoline:
     b art_quick_resolution_trampoline
 EndExecuteNterpWithClinitImpl:
 
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index f89db40..320a792 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -1609,10 +1609,19 @@
  */
 
 OAT_ENTRY ExecuteNterpWithClinitImpl, EndExecuteNterpWithClinitImpl
-    ldr ip, [r0, ART_METHOD_DECLARING_CLASS_OFFSET]
-    ldrb ip, [ip, MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET]
+    // For simplicity, we don't do a read barrier here, but instead rely
+    // on art_quick_resolution_trampoline to always have a suspend point before
+    // calling back here.
+    ldr r4, [r0, ART_METHOD_DECLARING_CLASS_OFFSET]
+    ldrb ip, [r4, MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET]
     cmp ip, #MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE
     bcs ExecuteNterpImpl
+    cmp ip, #MIRROR_CLASS_IS_INITIALIZING_VALUE
+    blo art_quick_resolution_trampoline
+    ldr r4, [r4, #MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET]
+    ldr ip, [rSELF, #THREAD_TID_OFFSET]
+    cmp r4, ip
+    beq ExecuteNterpImpl
     b art_quick_resolution_trampoline
 EndExecuteNterpWithClinitImpl:
 
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 3e476db..6fc1777 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -1695,9 +1695,17 @@
  */
 
 OAT_ENTRY ExecuteNterpWithClinitImpl, EndExecuteNterpWithClinitImpl
+    // For simplicity, we don't do a read barrier here, but instead rely
+    // on art_quick_resolution_trampoline to always have a suspend point before
+    // calling back here.
     movl ART_METHOD_DECLARING_CLASS_OFFSET(%rdi), %r10d
     cmpb  $$(MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE), MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET(%r10d)
     jae ExecuteNterpImpl
+    cmpb  $$(MIRROR_CLASS_IS_INITIALIZING_VALUE), MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET(%r10d)
+    jb art_quick_resolution_trampoline
+    movl MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(%r10d), %r10d
+    cmpl %r10d, rSELF:THREAD_TID_OFFSET
+    je ExecuteNterpImpl
     jmp art_quick_resolution_trampoline
 EndExecuteNterpWithClinitImpl:
 
diff --git a/runtime/interpreter/mterp/x86ng/main.S b/runtime/interpreter/mterp/x86ng/main.S
index 7872520..42e2d18 100644
--- a/runtime/interpreter/mterp/x86ng/main.S
+++ b/runtime/interpreter/mterp/x86ng/main.S
@@ -1759,11 +1759,23 @@
 
 OAT_ENTRY ExecuteNterpWithClinitImpl, EndExecuteNterpWithClinitImpl
     push %esi
+    // For simplicity, we don't do a read barrier here, but instead rely
+    // on art_quick_resolution_trampoline to always have a suspend point before
+    // calling back here.
     movl ART_METHOD_DECLARING_CLASS_OFFSET(%eax), %esi
     cmpb $$(MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE), MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET(%esi)
+    jae .Lcontinue_execute_nterp
+    cmpb  $$(MIRROR_CLASS_IS_INITIALIZING_VALUE), MIRROR_CLASS_IS_VISIBLY_INITIALIZED_OFFSET(%esi)
+    jb .Linvoke_trampoline
+    movl MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(%esi), %esi
+    cmpl %esi, rSELF:THREAD_TID_OFFSET
+    je .Lcontinue_execute_nterp
+.Linvoke_trampoline:
     pop %esi
-    jae ExecuteNterpImpl
     jmp art_quick_resolution_trampoline
+.Lcontinue_execute_nterp:
+    pop %esi
+    jmp ExecuteNterpImpl
 EndExecuteNterpWithClinitImpl:
 
 OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 39f32bc..0b0ef48 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -571,6 +571,10 @@
     return OFFSET_OF_OBJECT_MEMBER(Class, object_size_alloc_fast_path_);
   }
 
+  static constexpr MemberOffset ClinitThreadIdOffset() {
+    return OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_);
+  }
+
   ALWAYS_INLINE void SetObjectSize(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetObjectSizeAllocFastPath(uint32_t new_object_size) REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/thread.h b/runtime/thread.h
index 183bc56..fd97829 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -759,6 +759,13 @@
   }
 
   template<PointerSize pointer_size>
+  static constexpr ThreadOffset<pointer_size> TidOffset() {
+    return ThreadOffset<pointer_size>(
+        OFFSETOF_MEMBER(Thread, tls32_) +
+        OFFSETOF_MEMBER(tls_32bit_sized_values, tid));
+  }
+
+  template<PointerSize pointer_size>
   static constexpr ThreadOffset<pointer_size> InterruptedOffset() {
     return ThreadOffset<pointer_size>(
         OFFSETOF_MEMBER(Thread, tls32_) +
diff --git a/tools/cpp-define-generator/mirror_class.def b/tools/cpp-define-generator/mirror_class.def
index c01aab3..af396bf 100644
--- a/tools/cpp-define-generator/mirror_class.def
+++ b/tools/cpp-define-generator/mirror_class.def
@@ -56,3 +56,8 @@
 ASM_DEFINE(MIRROR_CLASS_IS_VISIBLY_INITIALIZED_VALUE,
            art::enum_cast<uint32_t>(art::ClassStatus::kVisiblyInitialized) <<
                (art::SubtypeCheckBits::BitStructSizeOf() % art::kBitsPerByte))
+ASM_DEFINE(MIRROR_CLASS_IS_INITIALIZING_VALUE,
+           art::enum_cast<uint32_t>(art::ClassStatus::kInitializing) <<
+               (art::SubtypeCheckBits::BitStructSizeOf() % art::kBitsPerByte))
+ASM_DEFINE(MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET,
+           art::mirror::Class::ClinitThreadIdOffset().Int32Value())
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index d796542..97033fc 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -71,3 +71,5 @@
            art::Thread::ReadBarrierMarkEntryPointsOffset<art::kRuntimePointerSize>(0))
 ASM_DEFINE(THREAD_SHARED_METHOD_HOTNESS_OFFSET,
            art::Thread::SharedMethodHotnessOffset<art::kRuntimePointerSize>().Int32Value())
+ASM_DEFINE(THREAD_TID_OFFSET,
+           art::Thread::TidOffset<art::kRuntimePointerSize>().Int32Value())