Reland "Add clinit checks at entry for some boot image methods."

This reverts commit 0ae89052f7213701b8b3a782266e84b3d3600dbf.

Bug: 162110941
Bug: 238472973

Reason for revert: Remove code that forced using clinit entrypoints in
debug mode.

Change-Id: Ibc04e91b09deaa1ac23d32b9e45281f3299d2981
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 51cd999..a531bc9 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -23,6 +23,7 @@
 
 #include "arch/instruction_set.h"
 #include "arch/instruction_set_features.h"
+#include "art_method-inl.h"
 #include "base/runtime_debug.h"
 #include "base/string_view_cpp20.h"
 #include "base/variant_map.h"
@@ -146,14 +147,39 @@
 
 bool CompilerOptions::IsImageClass(const char* descriptor) const {
   // Historical note: We used to hold the set indirectly and there was a distinction between an
-  // empty set and a null, null meaning to include all classes. However, the distiction has been
+  // empty set and a null, null meaning to include all classes. However, the distinction has been
   // removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429
   return image_classes_.find(std::string_view(descriptor)) != image_classes_.end();
 }
 
+bool CompilerOptions::IsPreloadedClass(const char* pretty_descriptor) const {
+  return preloaded_classes_.find(std::string_view(pretty_descriptor)) != preloaded_classes_.end();
+}
+
 const VerificationResults* CompilerOptions::GetVerificationResults() const {
   DCHECK(Runtime::Current()->IsAotCompiler());
   return verification_results_;
 }
 
+bool CompilerOptions::ShouldCompileWithClinitCheck(ArtMethod* method) const {
+  if (method != nullptr &&
+      Runtime::Current()->IsAotCompiler() &&
+      method->IsStatic() &&
+      !method->IsConstructor() &&
+      // Compiled code for native methods never do a clinit check, so we may put the resolution
+      // trampoline for native methods. This means that it's possible post zygote fork for the
+      // entry to be dirtied. We could resolve this by either:
+      // - Make these methods use the generic JNI entrypoint, but that's not
+      //   desirable for a method that is in the profile.
+      // - Ensure the declaring class of such native methods are always in the
+      //   preloaded-classes list.
+      // - Emit the clinit check in the compiled code of native methods.
+      !method->IsNative()) {
+    ScopedObjectAccess soa(Thread::Current());
+    ObjPtr<mirror::Class> cls = method->GetDeclaringClass<kWithoutReadBarrier>();
+    return cls->IsInBootImageAndNotInPreloadedClasses();
+  }
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 1bffdb1..20f54bd 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -44,6 +44,7 @@
 class Arm64RelativePatcherTest;
 }  // namespace linker
 
+class ArtMethod;
 class DexFile;
 enum class InstructionSet;
 class InstructionSetFeatures;
@@ -300,6 +301,10 @@
 
   bool IsImageClass(const char* descriptor) const;
 
+  // Returns whether the given `pretty_descriptor` is in the list of preloaded
+  // classes. `pretty_descriptor` should be the result of calling `PrettyDescriptor`.
+  bool IsPreloadedClass(const char* pretty_descriptor) const;
+
   const VerificationResults* GetVerificationResults() const;
 
   bool ParseCompilerOptions(const std::vector<std::string>& options,
@@ -383,6 +388,12 @@
     return ContainsElement(GetDexFilesForOatFile(), dex_file);
   }
 
+  // If this is a static non-constructor method in the boot classpath, and its class isn't
+  // initialized at compile-time, or won't be initialized by the zygote, add
+  // initialization checks at entry. This will avoid the need of trampolines
+  // which at runtime we will need to dirty after initialization.
+  bool ShouldCompileWithClinitCheck(ArtMethod* method) const;
+
  private:
   bool ParseDumpInitFailures(const std::string& option, std::string* error_msg);
   bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg);
@@ -408,6 +419,10 @@
   // Must not be empty for real boot image, only for tests pretending to compile boot image.
   HashSet<std::string> image_classes_;
 
+  // Classes listed in the preloaded-classes file, used for boot image and
+  // boot image extension compilation.
+  HashSet<std::string> preloaded_classes_;
+
   // Results of AOT verification.
   const VerificationResults* verification_results_;
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7b46e13..de247a9 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -37,6 +37,7 @@
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
 #include "stack.h"
+#include "subtype_check.h"
 #include "utils/assembler.h"
 #include "utils/label.h"
 
@@ -60,6 +61,14 @@
 static const ReadBarrierOption gCompilerReadBarrierOption =
     gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
 
+constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+constexpr size_t status_byte_offset =
+    mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+constexpr uint32_t shifted_visibly_initialized_value =
+    enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initializing_value =
+    enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
+
 class Assembler;
 class CodeGenerator;
 class CompilerOptions;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index eb95541..17407a5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1233,6 +1233,45 @@
 
 void CodeGeneratorARM64::GenerateFrameEntry() {
   MacroAssembler* masm = GetVIXLAssembler();
+
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    UseScratchRegisterScope temps(masm);
+    vixl::aarch64::Label resolution;
+
+    Register temp1 = temps.AcquireW();
+    Register temp2 = temps.AcquireW();
+
+    // Check if we're visibly initialized.
+
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset));
+    __ Cmp(temp2, shifted_visibly_initialized_value);
+    __ B(hs, &frame_entry_label_);
+
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ Cmp(temp2, shifted_initializing_value);
+    __ B(lo, &resolution);
+
+    __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
+    __ Cmp(temp1, temp2);
+    __ B(eq, &frame_entry_label_);
+    __ Bind(&resolution);
+
+    // Jump to the resolution stub.
+    ThreadOffset64 entrypoint_offset =
+        GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
+    __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
+    __ Br(temp1.X());
+  }
   __ Bind(&frame_entry_label_);
 
   bool do_overflow_check =
@@ -1904,11 +1943,6 @@
                                                                      Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
 
   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index bf8e896..0850e2f 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2237,6 +2237,52 @@
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
   DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    vixl32::Label resolution;
+
+    // Check if we're visibly initialized.
+
+    vixl32::Register temp1 = temps.Acquire();
+    // Use r4 as other temporary register.
+    DCHECK(!blocked_core_registers_[R4]);
+    DCHECK(!kCoreCalleeSaves.Includes(r4));
+    vixl32::Register temp2 = r4;
+    for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+      DCHECK(!reg.Is(r4));
+    }
+
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ Ldrb(temp2, MemOperand(temp1, status_byte_offset));
+    __ Cmp(temp2, shifted_visibly_initialized_value);
+    __ B(cs, &frame_entry_label_);
+
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ Cmp(temp2, shifted_initializing_value);
+    __ B(lo, &resolution);
+
+    __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
+    __ Cmp(temp1, temp2);
+    __ B(eq, &frame_entry_label_);
+    __ Bind(&resolution);
+
+    // Jump to the resolution stub.
+    ThreadOffset32 entrypoint_offset =
+        GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
+    __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
+    __ Bx(temp1);
+  }
+
   __ Bind(&frame_entry_label_);
 
   if (HasEmptyFrame()) {
@@ -7625,12 +7671,7 @@
     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
-
-  const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
+  __ Ldrb(temp, MemOperand(class_reg, status_byte_offset));
   __ Cmp(temp, shifted_visibly_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f4529be..ce27083 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1261,6 +1261,44 @@
 
 void CodeGeneratorX86::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
+
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    NearLabel continue_execution, resolution;
+    // We'll use EBP as temporary.
+    __ pushl(EBP);
+    // Check if we're visibly initialized.
+
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
+    __ j(kAboveEqual, &continue_execution);
+
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_initializing_value));
+    __ j(kBelow, &resolution);
+
+    __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
+    __ j(kEqual, &continue_execution);
+    __ Bind(&resolution);
+
+    __ popl(EBP);
+    // Jump to the resolution stub.
+    ThreadOffset32 entrypoint_offset =
+        GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
+    __ fs()->jmp(Address::Absolute(entrypoint_offset));
+
+    __ Bind(&continue_execution);
+    __ popl(EBP);
+  }
+
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -7233,12 +7271,6 @@
 
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, Register class_reg) {
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d31a630..b1db993 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1653,6 +1653,44 @@
 
 void CodeGeneratorX86_64::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
+
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    NearLabel resolution;
+    // Check if we're visibly initialized.
+
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ movl(CpuRegister(TMP),
+            Address(CpuRegister(kMethodRegisterArgument),
+                    ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ cmpb(Address(CpuRegister(TMP),  status_byte_offset),
+            Immediate(shifted_visibly_initialized_value));
+    __ j(kAboveEqual, &frame_entry_label_);
+
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ cmpb(Address(CpuRegister(TMP),  status_byte_offset), Immediate(shifted_initializing_value));
+    __ j(kBelow, &resolution);
+
+    __ movl(CpuRegister(TMP),
+            Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ gs()->cmpl(
+        CpuRegister(TMP),
+        Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
+    __ j(kEqual, &frame_entry_label_);
+    __ Bind(&resolution);
+
+    // Jump to the resolution stub.
+    ThreadOffset64 entrypoint_offset =
+        GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
+    __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
+  }
+
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check = IsLeafMethod()
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -6282,12 +6320,6 @@
 
 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, CpuRegister class_reg) {
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
-
   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());