diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/driver/compiler_options.cc | 28 | ||||
| -rw-r--r-- | compiler/driver/compiler_options.h | 15 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 44 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 53 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 44 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 44 |
7 files changed, 213 insertions, 24 deletions
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 51cd999b6d..a531bc91ff 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -23,6 +23,7 @@ #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" +#include "art_method-inl.h" #include "base/runtime_debug.h" #include "base/string_view_cpp20.h" #include "base/variant_map.h" @@ -146,14 +147,39 @@ bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& optio bool CompilerOptions::IsImageClass(const char* descriptor) const { // Historical note: We used to hold the set indirectly and there was a distinction between an - // empty set and a null, null meaning to include all classes. However, the distiction has been + // empty set and a null, null meaning to include all classes. However, the distinction has been // removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429 return image_classes_.find(std::string_view(descriptor)) != image_classes_.end(); } +bool CompilerOptions::IsPreloadedClass(const char* pretty_descriptor) const { + return preloaded_classes_.find(std::string_view(pretty_descriptor)) != preloaded_classes_.end(); +} + const VerificationResults* CompilerOptions::GetVerificationResults() const { DCHECK(Runtime::Current()->IsAotCompiler()); return verification_results_; } +bool CompilerOptions::ShouldCompileWithClinitCheck(ArtMethod* method) const { + if (method != nullptr && + Runtime::Current()->IsAotCompiler() && + method->IsStatic() && + !method->IsConstructor() && + // Compiled code for native methods never do a clinit check, so we may put the resolution + // trampoline for native methods. This means that it's possible post zygote fork for the + // entry to be dirtied. We could resolve this by either: + // - Make these methods use the generic JNI entrypoint, but that's not + // desirable for a method that is in the profile. + // - Ensure the declaring class of such native methods are always in the + // preloaded-classes list. + // - Emit the clinit check in the compiled code of native methods. + !method->IsNative()) { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> cls = method->GetDeclaringClass<kWithoutReadBarrier>(); + return cls->IsInBootImageAndNotInPreloadedClasses(); + } + return false; +} + } // namespace art diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 1bffdb11ed..20f54bdecd 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -44,6 +44,7 @@ namespace linker { class Arm64RelativePatcherTest; } // namespace linker +class ArtMethod; class DexFile; enum class InstructionSet; class InstructionSetFeatures; @@ -300,6 +301,10 @@ class CompilerOptions final { bool IsImageClass(const char* descriptor) const; + // Returns whether the given `pretty_descriptor` is in the list of preloaded + // classes. `pretty_descriptor` should be the result of calling `PrettyDescriptor`. + bool IsPreloadedClass(const char* pretty_descriptor) const; + const VerificationResults* GetVerificationResults() const; bool ParseCompilerOptions(const std::vector<std::string>& options, @@ -383,6 +388,12 @@ class CompilerOptions final { return ContainsElement(GetDexFilesForOatFile(), dex_file); } + // If this is a static non-constructor method in the boot classpath, and its class isn't + // initialized at compile-time, or won't be initialized by the zygote, add + // initialization checks at entry. This will avoid the need of trampolines + // which at runtime we will need to dirty after initialization. + bool ShouldCompileWithClinitCheck(ArtMethod* method) const; + private: bool ParseDumpInitFailures(const std::string& option, std::string* error_msg); bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg); @@ -408,6 +419,10 @@ class CompilerOptions final { // Must not be empty for real boot image, only for tests pretending to compile boot image. HashSet<std::string> image_classes_; + // Classes listed in the preloaded-classes file, used for boot image and + // boot image extension compilation. + HashSet<std::string> preloaded_classes_; + // Results of AOT verification. const VerificationResults* verification_results_; diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 7b46e13a44..de247a98b9 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -37,6 +37,7 @@ #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" +#include "subtype_check.h" #include "utils/assembler.h" #include "utils/label.h" @@ -60,6 +61,14 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); static const ReadBarrierOption gCompilerReadBarrierOption = gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; +constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); +constexpr size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); +constexpr uint32_t shifted_visibly_initialized_value = + enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); +constexpr uint32_t shifted_initializing_value = + enum_cast<uint32_t>(ClassStatus::kInitializing) << (status_lsb_position % kBitsPerByte); + class Assembler; class CodeGenerator; class CompilerOptions; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index eb95541db1..17407a59cd 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1233,6 +1233,45 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(masm); + vixl::aarch64::Label resolution; + + Register temp1 = temps.AcquireW(); + Register temp2 = temps.AcquireW(); + + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, HeapOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(hs, &frame_entry_label_); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, HeapOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArm64PointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1.X(), MemOperand(tr, entrypoint_offset.Int32Value())); + __ Br(temp1.X()); + } __ Bind(&frame_entry_label_); bool do_overflow_check = @@ -1904,11 +1943,6 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index bf8e896a9c..0850e2f4cd 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2237,6 +2237,52 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Label resolution; + + // Check if we're visibly initialized. + + vixl32::Register temp1 = temps.Acquire(); + // Use r4 as other temporary register. + DCHECK(!blocked_core_registers_[R4]); + DCHECK(!kCoreCalleeSaves.Includes(r4)); + vixl32::Register temp2 = r4; + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ Ldr(temp1, MemOperand(kMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value())); + __ Ldrb(temp2, MemOperand(temp1, status_byte_offset)); + __ Cmp(temp2, shifted_visibly_initialized_value); + __ B(cs, &frame_entry_label_); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ Cmp(temp2, shifted_initializing_value); + __ B(lo, &resolution); + + __ Ldr(temp1, MemOperand(temp1, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ Ldr(temp2, MemOperand(tr, Thread::TidOffset<kArmPointerSize>().Int32Value())); + __ Cmp(temp1, temp2); + __ B(eq, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kArmPointerSize>(kQuickQuickResolutionTrampoline); + __ Ldr(temp1, MemOperand(tr, entrypoint_offset.Int32Value())); + __ Bx(temp1); + } + __ Bind(&frame_entry_label_); if (HasEmptyFrame()) { @@ -7625,12 +7671,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << status_lsb_position; - - const size_t status_offset = mirror::Class::StatusOffset().SizeValue(); - GetAssembler()->LoadFromOffset(kLoadWord, temp, class_reg, status_offset); + __ Ldrb(temp, MemOperand(class_reg, status_byte_offset)); __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f4529bec7a..ce27083d00 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1261,6 +1261,44 @@ void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel continue_execution, resolution; + // We'll use EBP as temporary. + __ pushl(EBP); + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(EBP, Address(kMethodRegisterArgument, ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &continue_execution); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(EBP, status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(EBP, Address(EBP, mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ fs()->cmpl(EBP, Address::Absolute(Thread::TidOffset<kX86PointerSize>().Int32Value())); + __ j(kEqual, &continue_execution); + __ Bind(&resolution); + + __ popl(EBP); + // Jump to the resolution stub. + ThreadOffset32 entrypoint_offset = + GetThreadOffset<kX86PointerSize>(kQuickQuickResolutionTrampoline); + __ fs()->jmp(Address::Absolute(entrypoint_offset)); + + __ Bind(&continue_execution); + __ popl(EBP); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); @@ -7233,12 +7271,6 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index d31a6303b4..b1db993be1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1653,6 +1653,44 @@ void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { void CodeGeneratorX86_64::GenerateFrameEntry() { __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address + + // Check if we need to generate the clinit check. We will jump to the + // resolution stub if the class is not initialized and the executing thread is + // not the thread initializing it. + // We do this before constructing the frame to get the correct stack trace if + // an exception is thrown. + if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) { + NearLabel resolution; + // Check if we're visibly initialized. + + // We don't emit a read barrier here to save on code size. We rely on the + // resolution trampoline to do a suspend check before re-entering this code. + __ movl(CpuRegister(TMP), + Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::DeclaringClassOffset().Int32Value())); + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), + Immediate(shifted_visibly_initialized_value)); + __ j(kAboveEqual, &frame_entry_label_); + + // Check if we're initializing and the thread initializing is the one + // executing the code. + __ cmpb(Address(CpuRegister(TMP), status_byte_offset), Immediate(shifted_initializing_value)); + __ j(kBelow, &resolution); + + __ movl(CpuRegister(TMP), + Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value())); + __ gs()->cmpl( + CpuRegister(TMP), + Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true)); + __ j(kEqual, &frame_entry_label_); + __ Bind(&resolution); + + // Jump to the resolution stub. + ThreadOffset64 entrypoint_offset = + GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline); + __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true)); + } + __ Bind(&frame_entry_label_); bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); @@ -6282,12 +6320,6 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); - const size_t status_byte_offset = - mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); - constexpr uint32_t shifted_visibly_initialized_value = - enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); - __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); |