diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 145 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 19 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 127 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 19 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 59 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 59 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 51 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 51 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 7 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 13 | ||||
-rw-r--r-- | compiler/optimizing/prepare_for_register_allocation.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/scheduler.cc | 49 | ||||
-rw-r--r-- | compiler/optimizing/scheduler.h | 42 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_arm.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_arm64.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/scheduler_test.cc | 15 | ||||
-rw-r--r-- | compiler/optimizing/stack_map_stream.h | 3 |
18 files changed, 305 insertions, 371 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index b0a05da0b1..f6a104b7c2 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -737,14 +737,12 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); + DCHECK(!cls->MustGenerateClinitCheck()); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); - } else if (cls->MustGenerateClinitCheck()) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeStaticStorage, cls, cls->GetDexPc()); } else { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); InvokeRuntime(kQuickInitializeType, cls, cls->GetDexPc()); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 8a5cbcade0..00bf2f1c51 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -307,35 +307,41 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { public: - LoadClassSlowPathARM64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeARM64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARM64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); + arm64_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -354,12 +360,6 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; @@ -1403,9 +1403,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -1420,16 +1418,6 @@ void CodeGeneratorARM64::EmitJumpTables() { void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { EmitJumpTables(); - - // Emit JIT baker read barrier slow paths. - DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); - for (auto& entry : jit_baker_read_barrier_slow_paths_) { - uint32_t encoded_data = entry.first; - vixl::aarch64::Label* slow_path_entry = &entry.second.label; - __ Bind(slow_path_entry); - CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name */ nullptr); - } - // Ensure we emit the literal pool. __ FinalizeCode(); @@ -3194,8 +3182,8 @@ void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -4746,18 +4734,9 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } -void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { - ExactAssemblyScope guard(GetVIXLAssembler(), 1 * vixl::aarch64::kInstructionSize); - if (Runtime::Current()->UseJitCompilation()) { - auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); - vixl::aarch64::Label* slow_path_entry = &it->second.label; - __ cbnz(mr, slow_path_entry); - } else { - baker_read_barrier_patches_.emplace_back(custom_data); - vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; - __ bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - } +vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( @@ -5192,8 +5171,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA bool do_clinit = cls->MustGenerateClinitCheck(); if (generate_null_check || do_clinit) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), do_clinit); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Cbz(out, slow_path->GetEntryLabel()); @@ -6276,14 +6255,14 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter // the slow path to mark the GC root. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the reference - // and jumps to the entrypoint if needed. + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. // // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. @@ -6297,18 +6276,20 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); if (fixup_label != nullptr) { - __ bind(fixup_label); + __ Bind(fixup_label); } static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, "GC root LDR must be 2 instruction (8B) before the return address label."); __ ldr(root_reg, MemOperand(obj.X(), offset)); - EmitBakerReadBarrierCbnz(custom_data); - __ bind(&return_address); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + __ Bind(&return_address); } else { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter @@ -6380,17 +6361,18 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire) { + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !use_load_acquire && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -6416,13 +6398,15 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); { - ExactAssemblyScope guard(GetVIXLAssembler(), + EmissionCheckScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); - EmitBakerReadBarrierCbnz(custom_data); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Field LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); @@ -6432,7 +6416,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins MaybeRecordImplicitNullCheck(instruction); } GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ bind(&return_address); + __ Bind(&return_address); } MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; @@ -6468,17 +6452,17 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); - if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -6499,21 +6483,23 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); __ Add(temp.X(), obj.X(), Operand(data_offset)); { - ExactAssemblyScope guard(GetVIXLAssembler(), + EmissionCheckScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); - EmitBakerReadBarrierCbnz(custom_data); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Array LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ bind(&return_address); + __ Bind(&return_address); } MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; @@ -7002,12 +6988,7 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, UNREACHABLE(); } - // For JIT, the slow path is considered part of the compiled method, - // so JIT should pass null as `debug_name`. Tests may not have a runtime. - DCHECK(Runtime::Current() == nullptr || - !Runtime::Current()->UseJitCompilation() || - debug_name == nullptr); - if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + if (GetCompilerOptions().GenerateAnyDebugInfo()) { std::ostringstream oss; oss << "BakerReadBarrierThunk"; switch (kind) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index c07d1eaf95..93bab3180c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -619,9 +619,9 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Emit the CBNZ instruction for baker read barrier and record - // the associated patch for AOT or slow path for JIT. - void EmitBakerReadBarrierCbnz(uint32_t custom_data); + // Add a new baker read barrier patch and return the label to be bound + // before the CBNZ instruction. + vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -928,19 +928,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; - // Baker read barrier slow paths, mapping custom data (uint32_t) to label. - // Wrap the label to work around vixl::aarch64::Label being non-copyable - // and non-moveable and as such unusable in ArenaSafeMap<>. - struct LabelWrapper { - LabelWrapper(const LabelWrapper& src) - : label() { - DCHECK(!src.label.IsLinked() && !src.label.IsBound()); - } - LabelWrapper() = default; - vixl::aarch64::Label label; - }; - ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; - friend class linker::Arm64RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 836a989f22..d1b5bcb66e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -108,6 +108,14 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " +static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { + ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { return rt.IsLow() && rn.IsLow() && offset < 32u; } @@ -501,29 +509,39 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) - : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARMVIXL(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); + arm_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -541,12 +559,6 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; @@ -2344,9 +2356,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), - jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give D30 and D31 as scratch register to VIXL. The register allocator only works on @@ -2402,16 +2412,6 @@ void CodeGeneratorARMVIXL::FixJumpTables() { void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { FixJumpTables(); - - // Emit JIT baker read barrier slow paths. - DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); - for (auto& entry : jit_baker_read_barrier_slow_paths_) { - uint32_t encoded_data = entry.first; - vixl::aarch32::Label* slow_path_entry = &entry.second.label; - __ Bind(slow_path_entry); - CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name */ nullptr); - } - GetAssembler()->FinalizeCode(); CodeGenerator::Finalize(allocator); @@ -7508,8 +7508,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -7555,10 +7554,7 @@ void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), - check, - check->GetDexPc(), - /* do_clinit */ true); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -8796,14 +8792,14 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter // the slow path to mark the GC root. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the reference - // and jumps to the entrypoint if needed. + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. // // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. @@ -8816,6 +8812,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( temps.Exclude(ip); bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; @@ -8826,7 +8823,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( DCHECK_LT(offset, kReferenceLoadMinFarOffset); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitBakerReadBarrierBne(custom_data); + EmitPlaceholderBne(this, bne_label); __ Bind(&return_address); DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET @@ -8889,17 +8886,17 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -8928,6 +8925,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i UseScratchRegisterScope temps(GetVIXLAssembler()); temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); { vixl::EmissionCheckScope guard( @@ -8936,7 +8934,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(mr, Operand(0)); - EmitBakerReadBarrierBne(custom_data); + EmitPlaceholderBne(this, bne_label); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); if (needs_null_check) { @@ -8982,17 +8980,17 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); ScaleFactor scale_factor = TIMES_4; - if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use shared thunks for the slow path; shared within the method - // for JIT, across methods for AOT. That thunk checks the holder - // and jumps to the entrypoint if needed. If the holder is not gray, - // it creates a fake dependency and returns to the LDR instruction. + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -9012,6 +9010,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i UseScratchRegisterScope temps(GetVIXLAssembler()); temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); __ Add(data_reg, obj, Operand(data_offset)); { @@ -9021,7 +9020,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(mr, Operand(0)); - EmitBakerReadBarrierBne(custom_data); + EmitPlaceholderBne(this, bne_label); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. @@ -9492,20 +9491,9 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) { - ExactAssemblyScope eas(GetVIXLAssembler(), 1 * k32BitT32InstructionSizeInBytes); - if (Runtime::Current()->UseJitCompilation()) { - auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); - vixl::aarch32::Label* slow_path_entry = &it->second.label; - __ b(ne, EncodingSize(Wide), slow_path_entry); - } else { - baker_read_barrier_patches_.emplace_back(custom_data); - vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label; - __ bind(patch_label); - vixl32::Label placeholder_label; - __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. - __ bind(&placeholder_label); - } +vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { @@ -10097,12 +10085,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb UNREACHABLE(); } - // For JIT, the slow path is considered part of the compiled method, - // so JIT should pass null as `debug_name`. Tests may not have a runtime. - DCHECK(Runtime::Current() == nullptr || - !Runtime::Current()->UseJitCompilation() || - debug_name == nullptr); - if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + if (GetCompilerOptions().GenerateAnyDebugInfo()) { std::ostringstream oss; oss << "BakerReadBarrierThunk"; switch (kind) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index ef82f2e904..fc8cf98173 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -589,9 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); - // Emit the BNE instruction for baker read barrier and record - // the associated patch for AOT or slow path for JIT. - void EmitBakerReadBarrierBne(uint32_t custom_data); + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -916,19 +916,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; - // Baker read barrier slow paths, mapping custom data (uint32_t) to label. - // Wrap the label to work around vixl::aarch32::Label being non-copyable - // and non-moveable and as such unusable in ArenaSafeMap<>. - struct LabelWrapper { - LabelWrapper(const LabelWrapper& src) - : label() { - DCHECK(!src.label.IsReferenced() && !src.label.IsBound()); - } - LabelWrapper() = default; - vixl::aarch32::Label label; - }; - ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; - friend class linker::Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4aed2c091c..60bbf4c9f0 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -222,35 +222,41 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { public: - LoadClassSlowPathMIPS(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -272,12 +278,6 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS); }; @@ -3598,11 +3598,8 @@ void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); @@ -8277,8 +8274,8 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqz(out, slow_path->GetEntryLabel()); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 75169139cd..81d86a9a3f 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -175,35 +175,41 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - LoadClassSlowPathMIPS64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips64_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -225,12 +231,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64); }; @@ -3153,11 +3153,8 @@ void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<GpuRegister>()); @@ -6315,8 +6312,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqzc(out, slow_path->GetEntryLabel()); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 30436eef9c..83ce734797 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -255,36 +255,42 @@ class LoadStringSlowPathX86 : public SlowPathCode { class LoadClassSlowPathX86 : public SlowPathCode { public: - LoadClassSlowPathX86(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); - x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); + x86_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); x86_codegen->Move32(out, Location::RegisterLocation(EAX)); @@ -299,12 +305,6 @@ class LoadClassSlowPathX86 : public SlowPathCode { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86); }; @@ -6588,8 +6588,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { @@ -6636,8 +6635,8 @@ void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0d7837e70f..aabf2e0be4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -239,34 +239,41 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { class LoadClassSlowPathX86_64 : public SlowPathCode { public: - LoadClassSlowPathX86_64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); // Custom calling convention: RAX serves as both input and output. - __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_)); - x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - } else { + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(CpuRegister(RAX), Immediate(type_index.index_)); + x86_64_codegen->InvokeRuntime(kQuickInitializeType, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. + } else { + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_64_codegen->Move(Location::RegisterLocation(RAX), source); + } + if (must_do_clinit) { + x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } - Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); @@ -283,12 +290,6 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; @@ -5927,8 +5928,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ testl(out, out); @@ -5973,8 +5974,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 16a7417301..8b9e1da0d3 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -6284,6 +6284,13 @@ class HLoadClass FINAL : public HInstruction { bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); } bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); } + bool MustResolveTypeOnSlowPath() const { + // Check that this instruction has a slow path. + DCHECK(GetLoadKind() != LoadKind::kRuntimeCall); // kRuntimeCall calls on main path. + DCHECK(GetLoadKind() == LoadKind::kBssEntry || MustGenerateClinitCheck()); + return GetLoadKind() == LoadKind::kBssEntry; + } + void MarkInBootImage() { SetPackedFlag<kFlagIsInBootImage>(true); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d96746fdd7..b2733ee1f2 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1101,15 +1101,18 @@ static void CreateJniStackMap(ArenaStack* arena_stack, const JniCompiledMethod& jni_compiled_method, /* out */ ArenaVector<uint8_t>* stack_map) { ScopedArenaAllocator allocator(arena_stack); - StackMapStream stack_map_stream(&allocator, jni_compiled_method.GetInstructionSet()); - stack_map_stream.BeginMethod( + // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator + // to stay clear of the frame size limit. + std::unique_ptr<StackMapStream> stack_map_stream( + new (&allocator) StackMapStream(&allocator, jni_compiled_method.GetInstructionSet())); + stack_map_stream->BeginMethod( jni_compiled_method.GetFrameSize(), jni_compiled_method.GetCoreSpillMask(), jni_compiled_method.GetFpSpillMask(), /* num_dex_registers */ 0); - stack_map_stream.EndMethod(); - stack_map->resize(stack_map_stream.PrepareForFillIn()); - stack_map_stream.FillInCodeInfo(MemoryRegion(stack_map->data(), stack_map->size())); + stack_map_stream->EndMethod(); + stack_map->resize(stack_map_stream->PrepareForFillIn()); + stack_map_stream->FillInCodeInfo(MemoryRegion(stack_map->data(), stack_map->size())); } CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 831bccc90a..060613d349 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -150,7 +150,9 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { + } else if (can_merge_with_load_class && + load_class->GetLoadKind() != HLoadClass::LoadKind::kRuntimeCall) { + DCHECK(!load_class->NeedsAccessCheck()); // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. DCHECK(load_class->HasEnvironment()); diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 588ea03d69..1aa16f45bc 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -545,60 +545,67 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN void HScheduler::Schedule(HGraph* graph) { // We run lsa here instead of in a separate pass to better control whether we // should run the analysis or not. + const HeapLocationCollector* heap_location_collector = nullptr; LoadStoreAnalysis lsa(graph); if (!only_optimize_loop_blocks_ || graph->HasLoops()) { lsa.Run(); - scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector()); + heap_location_collector = &lsa.GetHeapLocationCollector(); } for (HBasicBlock* block : graph->GetReversePostOrder()) { if (IsSchedulable(block)) { - Schedule(block); + Schedule(block, heap_location_collector); } } } -void HScheduler::Schedule(HBasicBlock* block) { - ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler)); +void HScheduler::Schedule(HBasicBlock* block, + const HeapLocationCollector* heap_location_collector) { + ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack()); + ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler)); // Build the scheduling graph. - scheduling_graph_.Clear(); + SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector); for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) << instruction->DebugName() << " is in block " << instruction->GetBlock()->GetBlockId() << ", and expected in block " << block->GetBlockId(); - SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction)); + SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction)); CalculateLatency(node); scheduling_nodes.push_back(node); } - if (scheduling_graph_.Size() <= 1) { - scheduling_graph_.Clear(); + if (scheduling_graph.Size() <= 1) { return; } cursor_ = block->GetLastInstruction(); + // The list of candidates for scheduling. A node becomes a candidate when all + // its predecessors have been scheduled. + ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler)); + // Find the initial candidates for scheduling. - candidates_.clear(); for (SchedulingNode* node : scheduling_nodes) { if (!node->HasUnscheduledSuccessors()) { node->MaybeUpdateCriticalPath(node->GetLatency()); - candidates_.push_back(node); + candidates.push_back(node); } } - ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler)); + ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler)); if (kDumpDotSchedulingGraphs) { // Remember the list of initial candidates for debug output purposes. - initial_candidates.assign(candidates_.begin(), candidates_.end()); + initial_candidates.assign(candidates.begin(), candidates.end()); } // Schedule all nodes. - while (!candidates_.empty()) { - Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_)); + selector_->Reset(); + while (!candidates.empty()) { + SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph); + Schedule(node, &candidates); } if (kDumpDotSchedulingGraphs) { @@ -607,11 +614,12 @@ void HScheduler::Schedule(HBasicBlock* block) { std::stringstream description; description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx()) << " B" << block->GetBlockId(); - scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates); + scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates); } } -void HScheduler::Schedule(SchedulingNode* scheduling_node) { +void HScheduler::Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) { // Check whether any of the node's predecessors will be valid candidates after // this node is scheduled. uint32_t path_to_node = scheduling_node->GetCriticalPath(); @@ -620,7 +628,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency()); predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) { @@ -630,7 +638,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { // correctness. So we do not use them to compute the critical path. predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } @@ -779,7 +787,6 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) // Phase-local allocator that allocates scheduler internal data structures like // scheduling nodes, internel nodes map, dependencies, etc. - ScopedArenaAllocator allocator(graph_->GetArenaStack()); CriticalPathSchedulingNodeSelector critical_path_selector; RandomSchedulingNodeSelector random_selector; SchedulingNodeSelector* selector = schedule_randomly @@ -795,7 +802,7 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { - arm64::HSchedulerARM64 scheduler(&allocator, selector); + arm64::HSchedulerARM64 scheduler(selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; @@ -805,7 +812,7 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, case InstructionSet::kThumb2: case InstructionSet::kArm: { arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); - arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(selector, &arm_latency_visitor); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index c7683e04a7..fd48d844e6 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -251,12 +251,14 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { */ class SchedulingGraph : public ValueObject { public: - SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator) + SchedulingGraph(const HScheduler* scheduler, + ScopedArenaAllocator* allocator, + const HeapLocationCollector* heap_location_collector) : scheduler_(scheduler), allocator_(allocator), contains_scheduling_barrier_(false), nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), - heap_location_collector_(nullptr) {} + heap_location_collector_(heap_location_collector) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { std::unique_ptr<SchedulingNode> node( @@ -268,15 +270,6 @@ class SchedulingGraph : public ValueObject { return result; } - void Clear() { - nodes_map_.clear(); - contains_scheduling_barrier_ = false; - } - - void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) { - heap_location_collector_ = &heap_location_collector; - } - SchedulingNode* GetNode(const HInstruction* instr) const { auto it = nodes_map_.find(instr); if (it == nodes_map_.end()) { @@ -329,7 +322,7 @@ class SchedulingGraph : public ValueObject { ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; - const HeapLocationCollector* heap_location_collector_; + const HeapLocationCollector* const heap_location_collector_; }; /* @@ -377,6 +370,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { public: + virtual void Reset() {} virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) = 0; virtual ~SchedulingNodeSelector() {} @@ -418,6 +412,7 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { public: CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} + void Reset() OVERRIDE { prev_select_ = nullptr; } SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) OVERRIDE; @@ -434,16 +429,11 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { class HScheduler { public: - HScheduler(ScopedArenaAllocator* allocator, - SchedulingLatencyVisitor* latency_visitor, - SchedulingNodeSelector* selector) - : allocator_(allocator), - latency_visitor_(latency_visitor), + HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector) + : latency_visitor_(latency_visitor), selector_(selector), only_optimize_loop_blocks_(true), - scheduling_graph_(this, allocator), - cursor_(nullptr), - candidates_(allocator_->Adapter(kArenaAllocScheduler)) {} + cursor_(nullptr) {} virtual ~HScheduler() {} void Schedule(HGraph* graph); @@ -454,8 +444,9 @@ class HScheduler { virtual bool IsSchedulingBarrier(const HInstruction* instruction) const; protected: - void Schedule(HBasicBlock* block); - void Schedule(SchedulingNode* scheduling_node); + void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector); + void Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates); void Schedule(HInstruction* instruction); // Any instruction returning `false` via this method will prevent its @@ -476,19 +467,12 @@ class HScheduler { node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency()); } - ScopedArenaAllocator* const allocator_; SchedulingLatencyVisitor* const latency_visitor_; SchedulingNodeSelector* const selector_; bool only_optimize_loop_blocks_; - // We instantiate the members below as part of this class to avoid - // instantiating them locally for every chunk scheduled. - SchedulingGraph scheduling_graph_; // A pointer indicating where the next instruction to be scheduled will be inserted. HInstruction* cursor_; - // The list of candidates for scheduling. A node becomes a candidate when all - // its predecessors have been scheduled. - ScopedArenaVector<SchedulingNode*> candidates_; private: DISALLOW_COPY_AND_ASSIGN(HScheduler); diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 0cb8684376..2f369486b3 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -137,10 +137,9 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { class HSchedulerARM : public HScheduler { public: - HSchedulerARM(ScopedArenaAllocator* allocator, - SchedulingNodeSelector* selector, + HSchedulerARM(SchedulingNodeSelector* selector, SchedulingLatencyVisitorARM* arm_latency_visitor) - : HScheduler(allocator, arm_latency_visitor, selector) {} + : HScheduler(arm_latency_visitor, selector) {} ~HSchedulerARM() OVERRIDE {} bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 4f394d5e16..0d2f8d9fa0 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -134,8 +134,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { class HSchedulerARM64 : public HScheduler { public: - HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector) - : HScheduler(allocator, &arm64_latency_visitor_, selector) {} + explicit HSchedulerARM64(SchedulingNodeSelector* selector) + : HScheduler(&arm64_latency_visitor_, selector) {} ~HSchedulerARM64() OVERRIDE {} bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 7079e07ae1..fe23fb4cff 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -146,7 +146,9 @@ class SchedulerTest : public OptimizingUnitTest { environment->SetRawEnvAt(1, mul); mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); + SchedulingGraph scheduling_graph(scheduler, + GetScopedAllocator(), + /* heap_location_collector */ nullptr); // Instructions must be inserted in reverse order into the scheduling graph. for (HInstruction* instr : ReverseRange(block_instructions)) { scheduling_graph.AddNode(instr); @@ -276,11 +278,10 @@ class SchedulerTest : public OptimizingUnitTest { entry->AddInstruction(instr); } - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); HeapLocationCollector heap_location_collector(graph_); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); - scheduling_graph.SetHeapLocationCollector(heap_location_collector); + SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector); for (HInstruction* instr : ReverseRange(block_instructions)) { // Build scheduling graph with memory access aliasing information @@ -354,13 +355,13 @@ class SchedulerTest : public OptimizingUnitTest { #if defined(ART_ENABLE_CODEGEN_arm64) TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif @@ -369,14 +370,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index a5f7ff3a49..cd04ff042c 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -34,7 +34,7 @@ namespace art { * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. */ -class StackMapStream : public ValueObject { +class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { public: explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set) : instruction_set_(instruction_set), @@ -53,6 +53,7 @@ class StackMapStream : public ValueObject { current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)), + expected_num_dex_registers_(0u), temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream), temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) { } |