Reland "Add clinit checks at entry for some boot image methods."

This reverts commit 0ae89052f7213701b8b3a782266e84b3d3600dbf.

Bug: 162110941
Bug: 238472973

Reason for revert: Remove code that forced using clinit entrypoints in
debug mode.

Change-Id: Ibc04e91b09deaa1ac23d32b9e45281f3299d2981
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 7b46e13..de247a9 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -37,6 +37,7 @@
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
 #include "stack.h"
+#include "subtype_check.h"
 #include "utils/assembler.h"
 #include "utils/label.h"
@@ -60,6 +61,14 @@
 static const ReadBarrierOption gCompilerReadBarrierOption =
     gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
+constexpr size_t status_byte_offset =
+    mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
+constexpr uint32_t shifted_visibly_initialized_value =
+    enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
+constexpr uint32_t shifted_initializing_value =
+    enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte);
 class Assembler;
 class CodeGenerator;
 class CompilerOptions;
diff --git a/compiler/optimizing/ b/compiler/optimizing/
index eb95541..17407a5 100644
--- a/compiler/optimizing/
+++ b/compiler/optimizing/
@@ -1233,6 +1233,45 @@
 void CodeGeneratorARM64::GenerateFrameEntry() {
   MacroAssembler* masm = GetVIXLAssembler();
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    UseScratchRegisterScope temps(masm);
+    vixl::aarch64::Label resolution;
+    Register temp1 = temps.AcquireW();
+    Register temp2 = temps.AcquireW();
+    // Check if we're visibly initialized.
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset));
+    __ Cmp(temp2, shifted_visibly_initialized_value);
+    __ B(hs, &frame_entry_label_);
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ Cmp(temp2, shifted_initializing_value);
+    __ B(lo, &resolution);
+    __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value()));
+    __ Cmp(temp1, temp2);
+    __ B(eq, &frame_entry_label_);
+    __ Bind(&resolution);
+    // Jump to the resolution stub.
+    ThreadOffset64 entrypoint_offset =
+        GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline);
+    __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value()));
+    __ Br(temp1.X());
+  }
   __ Bind(&frame_entry_label_);
   bool do_overflow_check =
@@ -1904,11 +1943,6 @@
                                                                      Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register temp = temps.AcquireW();
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
diff --git a/compiler/optimizing/ b/compiler/optimizing/
index bf8e896..0850e2f 100644
--- a/compiler/optimizing/
+++ b/compiler/optimizing/
@@ -2237,6 +2237,52 @@
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    vixl32::Label resolution;
+    // Check if we're visibly initialized.
+    vixl32::Register temp1 = temps.Acquire();
+    // Use r4 as other temporary register.
+    DCHECK(!blocked_core_registers_[R4]);
+    DCHECK(!kCoreCalleeSaves.Includes(r4));
+    vixl32::Register temp2 = r4;
+    for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+      DCHECK(!reg.Is(r4));
+    }
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ Ldrb(temp2, MemOperand(temp1, status_byte_offset));
+    __ Cmp(temp2, shifted_visibly_initialized_value);
+    __ B(cs, &frame_entry_label_);
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ Cmp(temp2, shifted_initializing_value);
+    __ B(lo, &resolution);
+    __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value()));
+    __ Cmp(temp1, temp2);
+    __ B(eq, &frame_entry_label_);
+    __ Bind(&resolution);
+    // Jump to the resolution stub.
+    ThreadOffset32 entrypoint_offset =
+        GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline);
+    __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value()));
+    __ Bx(temp1);
+  }
   __ Bind(&frame_entry_label_);
   if (HasEmptyFrame()) {
@@ -7625,12 +7671,7 @@
     LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position;
-  const size_t status_offset = mirror::Class::StatusOffset().SizeValue();
-  GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset);
+  __ Ldrb(temp, MemOperand(class_reg, status_byte_offset));
   __ Cmp(temp, shifted_visibly_initialized_value);
   __ B(lo, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/ b/compiler/optimizing/
index f4529be..ce27083 100644
--- a/compiler/optimizing/
+++ b/compiler/optimizing/
@@ -1261,6 +1261,44 @@
 void CodeGeneratorX86::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86WordSize);  // return address
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    NearLabel continue_execution, resolution;
+    // We'll use EBP as temporary.
+    __ pushl(EBP);
+    // Check if we're visibly initialized.
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
+    __ j(kAboveEqual, &continue_execution);
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ cmpb(Address(EBP,  status_byte_offset), Immediate(shifted_initializing_value));
+    __ j(kBelow, &resolution);
+    __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value()));
+    __ j(kEqual, &continue_execution);
+    __ Bind(&resolution);
+    __ popl(EBP);
+    // Jump to the resolution stub.
+    ThreadOffset32 entrypoint_offset =
+        GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline);
+    __ fs()->jmp(Address::Absolute(entrypoint_offset));
+    __ Bind(&continue_execution);
+    __ popl(EBP);
+  }
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check =
       IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -7233,12 +7271,6 @@
 void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, Register class_reg) {
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/ b/compiler/optimizing/
index d31a630..b1db993 100644
--- a/compiler/optimizing/
+++ b/compiler/optimizing/
@@ -1653,6 +1653,44 @@
 void CodeGeneratorX86_64::GenerateFrameEntry() {
   __ cfi().SetCurrentCFAOffset(kX86_64WordSize);  // return address
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    NearLabel resolution;
+    // Check if we're visibly initialized.
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a suspend check before re-entering this code.
+    __ movl(CpuRegister(TMP),
+            Address(CpuRegister(kMethodRegisterArgument),
+                    ArtMethod::DeclaringClassOffset().Int32Value()));
+    __ cmpb(Address(CpuRegister(TMP),  status_byte_offset),
+            Immediate(shifted_visibly_initialized_value));
+    __ j(kAboveEqual, &frame_entry_label_);
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ cmpb(Address(CpuRegister(TMP),  status_byte_offset), Immediate(shifted_initializing_value));
+    __ j(kBelow, &resolution);
+    __ movl(CpuRegister(TMP),
+            Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
+    __ gs()->cmpl(
+        CpuRegister(TMP),
+        Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
+    __ j(kEqual, &frame_entry_label_);
+    __ Bind(&resolution);
+    // Jump to the resolution stub.
+    ThreadOffset64 entrypoint_offset =
+        GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
+    __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
+  }
   __ Bind(&frame_entry_label_);
   bool skip_overflow_check = IsLeafMethod()
       && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -6282,12 +6320,6 @@
 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
     SlowPathCode* slow_path, CpuRegister class_reg) {
-  constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
-  const size_t status_byte_offset =
-      mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
-  constexpr uint32_t shifted_visibly_initialized_value =
-      enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
   __ cmpb(Address(class_reg,  status_byte_offset), Immediate(shifted_visibly_initialized_value));
   __ j(kBelow, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());