diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 103 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 19 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 83 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 19 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 4 | ||||
| -rw-r--r-- | runtime/oat.h | 4 | ||||
| -rw-r--r-- | runtime/stack_map.cc | 19 | ||||
| -rw-r--r-- | runtime/stack_map.h | 8 | ||||
| -rw-r--r-- | runtime/thread.cc | 6 |
9 files changed, 171 insertions, 94 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5a3b22cabd..260920cb0c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1413,7 +1413,9 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -1428,6 +1430,16 @@ void CodeGeneratorARM64::EmitJumpTables() { void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { EmitJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch64::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name */ nullptr); + } + // Ensure we emit the literal pool. __ FinalizeCode(); @@ -4746,9 +4758,18 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { + ExactAssemblyScope guard(GetVIXLAssembler(), 1 * vixl::aarch64::kInstructionSize); + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch64::Label* slow_path_entry = &it->second.label; + __ cbnz(mr, slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; + __ bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + } } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( @@ -6255,14 +6276,14 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter // the slow path to mark the GC root. // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. // // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. @@ -6276,20 +6297,18 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); if (fixup_label != nullptr) { - __ Bind(fixup_label); + __ bind(fixup_label); } static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, "GC root LDR must be 2 instruction (8B) before the return address label."); __ ldr(root_reg, MemOperand(obj.X(), offset)); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - __ Bind(&return_address); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } else { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter @@ -6361,18 +6380,17 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !use_load_acquire && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForFields && !use_load_acquire) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -6398,15 +6416,13 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); { - EmissionCheckScope guard(GetVIXLAssembler(), + ExactAssemblyScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + EmitBakerReadBarrierCbnz(custom_data); static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Field LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); @@ -6415,8 +6431,12 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); } MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; @@ -6452,17 +6472,17 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -6483,23 +6503,25 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); __ Add(temp.X(), obj.X(), Operand(data_offset)); { - EmissionCheckScope guard(GetVIXLAssembler(), + ExactAssemblyScope guard(GetVIXLAssembler(), (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); vixl::aarch64::Label return_address; __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + EmitBakerReadBarrierCbnz(custom_data); static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), "Array LDR must be 1 instruction (4B) before the return address label; " " 2 instructions (8B) for heap poisoning."); __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); } MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; @@ -6988,7 +7010,12 @@ void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, UNREACHABLE(); } - if (GetCompilerOptions().GenerateAnyDebugInfo()) { + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { std::ostringstream oss; oss << "BakerReadBarrierThunk"; switch (kind) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 93bab3180c..c07d1eaf95 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -619,9 +619,9 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new baker read barrier patch and return the label to be bound - // before the CBNZ instruction. - vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the CBNZ instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierCbnz(uint32_t custom_data); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -928,6 +928,19 @@ class CodeGeneratorARM64 : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch64::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsLinked() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch64::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + friend class linker::Arm64RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index e7b2c7853f..47d7360a5d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -108,14 +108,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " -static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { - ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); - __ bind(patch_label); - vixl32::Label placeholder_label; - __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. - __ bind(&placeholder_label); -} - static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { return rt.IsLow() && rn.IsLow() && offset < 32u; } @@ -2365,7 +2357,9 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give D30 and D31 as scratch register to VIXL. The register allocator only works on @@ -2421,6 +2415,16 @@ void CodeGeneratorARMVIXL::FixJumpTables() { void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { FixJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch32::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name */ nullptr); + } + GetAssembler()->FinalizeCode(); CodeGenerator::Finalize(allocator); @@ -8793,14 +8797,14 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in // the Marking Register) to decide whether we need to enter // the slow path to mark the GC root. // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. // // lr = &return_address; // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. @@ -8813,7 +8817,6 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( temps.Exclude(ip); bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); vixl32::Label return_address; @@ -8824,7 +8827,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( DCHECK_LT(offset, kReferenceLoadMinFarOffset); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitPlaceholderBne(this, bne_label); + EmitBakerReadBarrierBne(custom_data); __ Bind(&return_address); DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET @@ -8887,17 +8890,17 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -8926,7 +8929,6 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i UseScratchRegisterScope temps(GetVIXLAssembler()); temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); { vixl::EmissionCheckScope guard( @@ -8935,7 +8937,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); + EmitBakerReadBarrierBne(custom_data); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); if (needs_null_check) { @@ -8981,17 +8983,17 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); ScaleFactor scale_factor = TIMES_4; - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { + if (kBakerReadBarrierLinkTimeThunksEnableForArrays) { // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // // lr = &gray_return_address; // if (mr) { // Thread::Current()->GetIsGcMarking() @@ -9011,7 +9013,6 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i UseScratchRegisterScope temps(GetVIXLAssembler()); temps.Exclude(ip); uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); __ Add(data_reg, obj, Operand(data_offset)); { @@ -9021,7 +9022,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); + EmitBakerReadBarrierBne(custom_data); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. @@ -9492,9 +9493,20 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -vixl32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) { + ExactAssemblyScope eas(GetVIXLAssembler(), 1 * k32BitT32InstructionSizeInBytes); + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch32::Label* slow_path_entry = &it->second.label; + __ b(ne, EncodingSize(Wide), slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label; + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); + } } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { @@ -10086,7 +10098,12 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb UNREACHABLE(); } - if (GetCompilerOptions().GenerateAnyDebugInfo()) { + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { std::ostringstream oss; oss << "BakerReadBarrierThunk"; switch (kind) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index fc8cf98173..ef82f2e904 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -589,9 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); - // Add a new baker read barrier patch and return the label to be bound - // before the BNE instruction. - vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the BNE instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierBne(uint32_t custom_data); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -916,6 +916,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch32::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsReferenced() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch32::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + friend class linker::Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 8858cbe941..3918b65a62 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -301,10 +301,10 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { EncodeVarintBits(out, fp_spill_mask_); EncodeVarintBits(out, num_dex_registers_); EncodeTable(out, stack_maps_); - EncodeTable(out, inline_infos_); - EncodeTable(out, method_infos_); EncodeTable(out, register_masks_); EncodeTable(out, stack_masks_); + EncodeTable(out, inline_infos_); + EncodeTable(out, method_infos_); EncodeTable(out, dex_register_masks_); EncodeTable(out, dex_register_maps_); EncodeTable(out, dex_register_catalog_); diff --git a/runtime/oat.h b/runtime/oat.h index 0a34ea0a24..037c8f9c1a 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Last oat version changed reason: Pass Class reference to clinit entrypoint. - static constexpr uint8_t kOatVersion[] = { '1', '6', '0', '\0' }; + // Last oat version changed reason: Add stack map fast path for GC. + static constexpr uint8_t kOatVersion[] = { '1', '6', '1', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc index 1b78b5ac1d..d1000c5375 100644 --- a/runtime/stack_map.cc +++ b/runtime/stack_map.cc @@ -56,13 +56,16 @@ void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) { fp_spill_mask_ = DecodeVarintBits(reader); number_of_dex_registers_ = DecodeVarintBits(reader); DecodeTable(stack_maps_, reader, data); + DecodeTable(register_masks_, reader, data); + DecodeTable(stack_masks_, reader, data); + if (flags & DecodeFlags::GcMasksOnly) { + return; + } DecodeTable(inline_infos_, reader, data); DecodeTable(method_infos_, reader, data); if (flags & DecodeFlags::InlineInfoOnly) { return; } - DecodeTable(register_masks_, reader, data); - DecodeTable(stack_masks_, reader, data); DecodeTable(dex_register_masks_, reader, data); DecodeTable(dex_register_maps_, reader, data); DecodeTable(dex_register_catalog_, reader, data); @@ -97,10 +100,10 @@ size_t CodeInfo::Dedupe(std::vector<uint8_t>* out, const uint8_t* in, DedupeMap* EncodeVarintBits(writer, DecodeVarintBits(reader)); // fp_spill_mask_. EncodeVarintBits(writer, DecodeVarintBits(reader)); // number_of_dex_registers_. DedupeTable<StackMap>(writer, reader, dedupe_map); - DedupeTable<InlineInfo>(writer, reader, dedupe_map); - DedupeTable<MethodInfo>(writer, reader, dedupe_map); DedupeTable<RegisterMask>(writer, reader, dedupe_map); DedupeTable<MaskInfo>(writer, reader, dedupe_map); + DedupeTable<InlineInfo>(writer, reader, dedupe_map); + DedupeTable<MethodInfo>(writer, reader, dedupe_map); DedupeTable<MaskInfo>(writer, reader, dedupe_map); DedupeTable<DexRegisterMapInfo>(writer, reader, dedupe_map); DedupeTable<DexRegisterInfo>(writer, reader, dedupe_map); @@ -211,10 +214,10 @@ void CodeInfo::AddSizeStats(/*out*/ Stats* parent) const { Stats* stats = parent->Child("CodeInfo"); stats->AddBytes(Size()); AddTableSizeStats<StackMap>("StackMaps", stack_maps_, stats); - AddTableSizeStats<InlineInfo>("InlineInfos", inline_infos_, stats); - AddTableSizeStats<MethodInfo>("MethodInfo", method_infos_, stats); AddTableSizeStats<RegisterMask>("RegisterMasks", register_masks_, stats); AddTableSizeStats<MaskInfo>("StackMasks", stack_masks_, stats); + AddTableSizeStats<InlineInfo>("InlineInfos", inline_infos_, stats); + AddTableSizeStats<MethodInfo>("MethodInfo", method_infos_, stats); AddTableSizeStats<MaskInfo>("DexRegisterMasks", dex_register_masks_, stats); AddTableSizeStats<DexRegisterMapInfo>("DexRegisterMaps", dex_register_maps_, stats); AddTableSizeStats<DexRegisterInfo>("DexRegisterCatalog", dex_register_catalog_, stats); @@ -276,10 +279,10 @@ void CodeInfo::Dump(VariableIndentationOutputStream* vios, vios->Stream() << "CodeInfo\n"; ScopedIndentation indent1(vios); DumpTable<StackMap>(vios, "StackMaps", stack_maps_, verbose); - DumpTable<InlineInfo>(vios, "InlineInfos", inline_infos_, verbose); - DumpTable<MethodInfo>(vios, "MethodInfo", method_infos_, verbose); DumpTable<RegisterMask>(vios, "RegisterMasks", register_masks_, verbose); DumpTable<MaskInfo>(vios, "StackMasks", stack_masks_, verbose, true /* is_mask */); + DumpTable<InlineInfo>(vios, "InlineInfos", inline_infos_, verbose); + DumpTable<MethodInfo>(vios, "MethodInfo", method_infos_, verbose); DumpTable<MaskInfo>(vios, "DexRegisterMasks", dex_register_masks_, verbose, true /* is_mask */); DumpTable<DexRegisterMapInfo>(vios, "DexRegisterMaps", dex_register_maps_, verbose); DumpTable<DexRegisterInfo>(vios, "DexRegisterCatalog", dex_register_catalog_, verbose); diff --git a/runtime/stack_map.h b/runtime/stack_map.h index acde3e3a90..d6db05a3b8 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -271,9 +271,11 @@ class CodeInfo { public: enum DecodeFlags { Default = 0, + // Limits the decoding only to the data needed by GC. + GcMasksOnly = 1, // Limits the decoding only to the main stack map table and inline info table. // This is sufficient for many use cases and makes the header decoding faster. - InlineInfoOnly = 1, + InlineInfoOnly = 2, }; explicit CodeInfo(const uint8_t* data, DecodeFlags flags = DecodeFlags::Default) { @@ -446,10 +448,10 @@ class CodeInfo { uint32_t fp_spill_mask_; uint32_t number_of_dex_registers_; BitTable<StackMap> stack_maps_; - BitTable<InlineInfo> inline_infos_; - BitTable<MethodInfo> method_infos_; BitTable<RegisterMask> register_masks_; BitTable<MaskInfo> stack_masks_; + BitTable<InlineInfo> inline_infos_; + BitTable<MethodInfo> method_infos_; BitTable<MaskInfo> dex_register_masks_; BitTable<DexRegisterMapInfo> dex_register_maps_; BitTable<DexRegisterInfo> dex_register_catalog_; diff --git a/runtime/thread.cc b/runtime/thread.cc index 69ac01e1f0..0703a074d5 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3604,7 +3604,9 @@ class ReferenceMapVisitor : public StackVisitor { StackReference<mirror::Object>* vreg_base = reinterpret_cast<StackReference<mirror::Object>*>( reinterpret_cast<uintptr_t>(cur_quick_frame)); uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc()); - CodeInfo code_info(method_header); + CodeInfo code_info(method_header, kPrecise + ? CodeInfo::DecodeFlags::Default // We will need dex register maps. + : CodeInfo::DecodeFlags::GcMasksOnly); StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset); DCHECK(map.IsValid()); @@ -3621,7 +3623,7 @@ class ReferenceMapVisitor : public StackVisitor { vreg_info.VisitStack(&new_ref, i, this); if (ref != new_ref) { ref_addr->Assign(new_ref); - } + } } } } |