diff options
Diffstat (limited to 'compiler')
26 files changed, 1719 insertions, 323 deletions
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index aa529f8352..d0f66e2d8e 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -121,14 +121,15 @@ class LinkerPatch { enum class Type : uint8_t { kMethod, kCall, - kCallRelative, // NOTE: Actual patching is instruction_set-dependent. + kCallRelative, // NOTE: Actual patching is instruction_set-dependent. kType, - kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. + kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. + kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. kString, - kStringRelative, // NOTE: Actual patching is instruction_set-dependent. - kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. + kStringRelative, // NOTE: Actual patching is instruction_set-dependent. + kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. + kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. + kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. }; static LinkerPatch MethodPatch(size_t literal_offset, @@ -215,13 +216,21 @@ class LinkerPatch { const DexFile* target_dex_file, uint32_t pc_insn_offset, uint32_t element_offset) { - DCHECK(IsUint<32>(element_offset)); LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file); patch.pc_insn_offset_ = pc_insn_offset; patch.element_offset_ = element_offset; return patch; } + static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset, + uint32_t custom_value1 = 0u, + uint32_t custom_value2 = 0u) { + LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr); + patch.baker_custom_value1_ = custom_value1; + patch.baker_custom_value2_ = custom_value2; + return patch; + } + LinkerPatch(const LinkerPatch& other) = default; LinkerPatch& operator=(const LinkerPatch& other) = default; @@ -241,6 +250,7 @@ class LinkerPatch { case Type::kStringRelative: case Type::kStringBssEntry: case Type::kDexCacheArray: + case Type::kBakerReadBarrierBranch: return true; default: return false; @@ -301,6 +311,16 @@ class LinkerPatch { return pc_insn_offset_; } + uint32_t GetBakerCustomValue1() const { + DCHECK(patch_type_ == Type::kBakerReadBarrierBranch); + return baker_custom_value1_; + } + + uint32_t GetBakerCustomValue2() const { + DCHECK(patch_type_ == Type::kBakerReadBarrierBranch); + return baker_custom_value2_; + } + private: LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file) : target_dex_file_(target_dex_file), @@ -314,6 +334,7 @@ class LinkerPatch { } const DexFile* target_dex_file_; + // TODO: Clean up naming. Some patched locations are literals but others are not. uint32_t literal_offset_ : 24; // Method code size up to 16MiB. Type patch_type_ : 8; union { @@ -322,10 +343,12 @@ class LinkerPatch { uint32_t type_idx_; // Type index for Type patches. uint32_t string_idx_; // String index for String patches. uint32_t element_offset_; // Element offset in the dex cache arrays. + uint32_t baker_custom_value1_; static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators"); + static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators"); }; union { // Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`. @@ -334,7 +357,9 @@ class LinkerPatch { // Literal offset of the insn loading PC (same as literal_offset if it's the same insn, // may be different if the PC-relative addressing needs multiple insns). uint32_t pc_insn_offset_; + uint32_t baker_custom_value2_; static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators"); + static_assert(sizeof(baker_custom_value2_) <= sizeof(cmp2_), "needed by relational operators"); }; friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs); diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index 2471f798be..f55d5a6fb8 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -24,6 +24,118 @@ namespace art { namespace linker { +class ArmBaseRelativePatcher::ThunkData { + public: + ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset) + : code_(code), + offsets_(), + max_next_offset_(max_next_offset), + pending_offset_(0u) { + DCHECK(NeedsNextThunk()); // The data is constructed only when we expect to need the thunk. + } + + ThunkData(ThunkData&& src) = default; + + size_t CodeSize() const { + return code_.size(); + } + + ArrayRef<const uint8_t> GetCode() const { + return ArrayRef<const uint8_t>(code_); + } + + bool NeedsNextThunk() const { + return max_next_offset_ != 0u; + } + + uint32_t MaxNextOffset() const { + DCHECK(NeedsNextThunk()); + return max_next_offset_; + } + + void ClearMaxNextOffset() { + DCHECK(NeedsNextThunk()); + max_next_offset_ = 0u; + } + + void SetMaxNextOffset(uint32_t max_next_offset) { + DCHECK(!NeedsNextThunk()); + max_next_offset_ = max_next_offset; + } + + // Adjust the MaxNextOffset() down if needed to fit the code before the next thunk. + // Returns true if it was adjusted, false if the old value was kept. + bool MakeSpaceBefore(const ThunkData& next_thunk, size_t alignment) { + DCHECK(NeedsNextThunk()); + DCHECK(next_thunk.NeedsNextThunk()); + DCHECK_ALIGNED_PARAM(MaxNextOffset(), alignment); + DCHECK_ALIGNED_PARAM(next_thunk.MaxNextOffset(), alignment); + if (next_thunk.MaxNextOffset() - CodeSize() < MaxNextOffset()) { + max_next_offset_ = RoundDown(next_thunk.MaxNextOffset() - CodeSize(), alignment); + return true; + } else { + return false; + } + } + + uint32_t ReserveOffset(size_t offset) { + DCHECK(NeedsNextThunk()); + DCHECK_LE(offset, max_next_offset_); + max_next_offset_ = 0u; // The reserved offset should satisfy all pending references. + offsets_.push_back(offset); + return offset + CodeSize(); + } + + bool HasReservedOffset() const { + return !offsets_.empty(); + } + + uint32_t LastReservedOffset() const { + DCHECK(HasReservedOffset()); + return offsets_.back(); + } + + bool HasPendingOffset() const { + return pending_offset_ != offsets_.size(); + } + + uint32_t GetPendingOffset() const { + DCHECK(HasPendingOffset()); + return offsets_[pending_offset_]; + } + + void MarkPendingOffsetAsWritten() { + DCHECK(HasPendingOffset()); + ++pending_offset_; + } + + bool HasWrittenOffset() const { + return pending_offset_ != 0u; + } + + uint32_t LastWrittenOffset() const { + DCHECK(HasWrittenOffset()); + return offsets_[pending_offset_ - 1u]; + } + + private: + std::vector<uint8_t> code_; // The code of the thunk. + std::vector<uint32_t> offsets_; // Offsets at which the thunk needs to be written. + uint32_t max_next_offset_; // The maximum offset at which the next thunk can be placed. + uint32_t pending_offset_; // The index of the next offset to write. +}; + +class ArmBaseRelativePatcher::PendingThunkComparator { + public: + bool operator()(const ThunkData* lhs, const ThunkData* rhs) const { + DCHECK(lhs->HasPendingOffset()); + DCHECK(rhs->HasPendingOffset()); + // The top of the heap is defined to contain the highest element and we want to pick + // the thunk with the smallest pending offset, so use the reverse ordering, i.e. ">". + return lhs->GetPendingOffset() > rhs->GetPendingOffset(); + } +}; + uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref) { @@ -31,151 +143,305 @@ uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, } uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset, - MethodReference(nullptr, 0u), - aligned_offset); - if (needs_thunk) { - // All remaining patches will be handled by this thunk. - DCHECK(!unprocessed_patches_.empty()); - DCHECK_LE(aligned_offset - unprocessed_patches_.front().second, max_positive_displacement_); - unprocessed_patches_.clear(); - - thunk_locations_.push_back(aligned_offset); - offset = aligned_offset + thunk_code_.size(); + // For multi-oat compilations (boot image), ReserveSpaceEnd() is called for each oat file. + // Since we do not know here whether this is the last file or whether the next opportunity + // to place thunk will be soon enough, we need to reserve all needed thunks now. Code for + // subsequent oat files can still call back to them. + if (!unprocessed_method_call_patches_.empty()) { + ResolveMethodCalls(offset, MethodReference(nullptr, DexFile::kDexNoIndex)); + } + for (ThunkData* data : unreserved_thunks_) { + uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); + offset = data->ReserveOffset(thunk_offset); } + unreserved_thunks_.clear(); + // We also need to delay initiating the pending_thunks_ until the call to WriteThunks(). + // Check that the `pending_thunks_.capacity()` indicates that no WriteThunks() has taken place. + DCHECK_EQ(pending_thunks_.capacity(), 0u); return offset; } uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { - if (current_thunk_to_write_ == thunk_locations_.size()) { - return offset; + if (pending_thunks_.capacity() == 0u) { + if (thunks_.empty()) { + return offset; + } + // First call to WriteThunks(), prepare the thunks for writing. + pending_thunks_.reserve(thunks_.size()); + for (auto& entry : thunks_) { + ThunkData* data = &entry.second; + if (data->HasPendingOffset()) { + pending_thunks_.push_back(data); + } + } + std::make_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); } uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) { - ++current_thunk_to_write_; + while (!pending_thunks_.empty() && + pending_thunks_.front()->GetPendingOffset() == aligned_offset) { + // Write alignment bytes and code. uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { + if (aligned_code_delta != 0u && UNLIKELY(!WriteCodeAlignment(out, aligned_code_delta))) { return 0u; } - if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) { + if (UNLIKELY(!WriteThunk(out, pending_thunks_.front()->GetCode()))) { return 0u; } - offset = aligned_offset + thunk_code_.size(); + offset = aligned_offset + pending_thunks_.front()->CodeSize(); + // Mark the thunk as written at the pending offset and update the `pending_thunks_` heap. + std::pop_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); + pending_thunks_.back()->MarkPendingOffsetAsWritten(); + if (pending_thunks_.back()->HasPendingOffset()) { + std::push_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); + } else { + pending_thunks_.pop_back(); + } + aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); } + DCHECK(pending_thunks_.empty() || pending_thunks_.front()->GetPendingOffset() > aligned_offset); return offset; } ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set, - std::vector<uint8_t> thunk_code, - uint32_t max_positive_displacement, - uint32_t max_negative_displacement) - : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code), - max_positive_displacement_(max_positive_displacement), - max_negative_displacement_(max_negative_displacement), - thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() { + InstructionSet instruction_set) + : provider_(provider), + instruction_set_(instruction_set), + thunks_(), + unprocessed_method_call_patches_(), + method_call_thunk_(nullptr), + pending_thunks_() { +} + +ArmBaseRelativePatcher::~ArmBaseRelativePatcher() { + // All work done by member destructors. } uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref, uint32_t max_extra_space) { - uint32_t quick_code_size = compiled_method->GetQuickCode().size(); - uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size); - // Adjust for extra space required by the subclass. - next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space); - // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset. - // We need the MethodReference for that. - if (!unprocessed_patches_.empty() && - next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) { - bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset, - method_ref, - next_aligned_offset); - if (needs_thunk) { - // A single thunk will cover all pending patches. - unprocessed_patches_.clear(); - uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_); - thunk_locations_.push_back(thunk_location); - offset = thunk_location + thunk_code_.size(); + // Adjust code size for extra space required by the subclass. + uint32_t max_code_size = compiled_method->GetQuickCode().size() + max_extra_space; + uint32_t code_offset; + uint32_t next_aligned_offset; + while (true) { + code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); + next_aligned_offset = compiled_method->AlignCode(code_offset + max_code_size); + if (unreserved_thunks_.empty() || + unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { + break; } - } - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - unprocessed_patches_.emplace_back(patch.TargetMethod(), - quick_code_offset + patch.LiteralOffset()); + ThunkData* thunk = unreserved_thunks_.front(); + if (thunk == method_call_thunk_) { + ResolveMethodCalls(code_offset, method_ref); + // This may have changed `method_call_thunk_` data, so re-check if we need to reserve. + if (unreserved_thunks_.empty() || + unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { + break; + } + // We need to process the new `front()` whether it's still the `method_call_thunk_` or not. + thunk = unreserved_thunks_.front(); + } + unreserved_thunks_.pop_front(); + uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); + offset = thunk->ReserveOffset(thunk_offset); + if (thunk == method_call_thunk_) { + // All remaining method call patches will be handled by this thunk. + DCHECK(!unprocessed_method_call_patches_.empty()); + DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), + MaxPositiveDisplacement(ThunkType::kMethodCall)); + unprocessed_method_call_patches_.clear(); } } + + // Process patches and check that adding thunks for the current method did not push any + // thunks (previously existing or newly added) before `next_aligned_offset`. This is + // essentially a check that we never compile a method that's too big. The calls or branches + // from the method should be able to reach beyond the end of the method and over any pending + // thunks. (The number of different thunks should be relatively low and their code short.) + ProcessPatches(compiled_method, code_offset); + CHECK(unreserved_thunks_.empty() || + unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset); + return offset; } -uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset, - uint32_t target_offset) { +uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_offset, + uint32_t target_offset) { + DCHECK(method_call_thunk_ != nullptr); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; + uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); + uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) { + if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { // Unwritten thunks have higher offsets, check if it's within range. - DCHECK(current_thunk_to_write_ == thunk_locations_.size() || - thunk_locations_[current_thunk_to_write_] > patch_offset); - if (current_thunk_to_write_ != thunk_locations_.size() && - thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) { - displacement = thunk_locations_[current_thunk_to_write_] - patch_offset; + DCHECK(!method_call_thunk_->HasPendingOffset() || + method_call_thunk_->GetPendingOffset() > patch_offset); + if (method_call_thunk_->HasPendingOffset() && + method_call_thunk_->GetPendingOffset() - patch_offset <= max_positive_displacement) { + displacement = method_call_thunk_->GetPendingOffset() - patch_offset; } else { // We must have a previous thunk then. - DCHECK_NE(current_thunk_to_write_, 0u); - DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset); - displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset; - DCHECK(displacement >= -max_negative_displacement_); + DCHECK(method_call_thunk_->HasWrittenOffset()); + DCHECK_LT(method_call_thunk_->LastWrittenOffset(), patch_offset); + displacement = method_call_thunk_->LastWrittenOffset() - patch_offset; + DCHECK_GE(displacement, -max_negative_displacement); } } return displacement; } -bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset, - MethodReference method_ref, - uint32_t next_aligned_offset) { - // Process as many patches as possible, stop only on unresolved targets or calls too far back. - while (!unprocessed_patches_.empty()) { - MethodReference patch_ref = unprocessed_patches_.front().first; - uint32_t patch_offset = unprocessed_patches_.front().second; - DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset); - if (patch_ref.dex_file == method_ref.dex_file && - patch_ref.dex_method_index == method_ref.dex_method_index) { - DCHECK_GT(quick_code_offset, patch_offset); - if (quick_code_offset - patch_offset > max_positive_displacement_) { - return true; +uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset) { + auto it = thunks_.find(key); + CHECK(it != thunks_.end()); + const ThunkData& data = it->second; + if (data.HasWrittenOffset()) { + uint32_t offset = data.LastWrittenOffset(); + DCHECK_LT(offset, patch_offset); + if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) { + return offset; + } + } + DCHECK(data.HasPendingOffset()); + uint32_t offset = data.GetPendingOffset(); + DCHECK_GT(offset, patch_offset); + DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType())); + return offset; +} + +void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, + uint32_t code_offset) { + for (const LinkerPatch& patch : compiled_method->GetPatches()) { + uint32_t patch_offset = code_offset + patch.LiteralOffset(); + ThunkType key_type = static_cast<ThunkType>(-1); + ThunkData* old_data = nullptr; + if (patch.GetType() == LinkerPatch::Type::kCallRelative) { + key_type = ThunkType::kMethodCall; + unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); + if (method_call_thunk_ == nullptr) { + ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); + method_call_thunk_ = &it->second; + AddUnreservedThunk(method_call_thunk_); + } else { + old_data = method_call_thunk_; } - } else { - auto result = provider_->FindMethodOffset(patch_ref); - if (!result.first) { - // If still unresolved, check if we have a thunk within range. - if (thunk_locations_.empty() || - patch_offset - thunk_locations_.back() > max_negative_displacement_) { - // No thunk in range, we need a thunk if the next aligned offset - // is out of range, or if we're at the end of all code. - return (next_aligned_offset - patch_offset > max_positive_displacement_) || - (quick_code_offset == next_aligned_offset); // End of code. + } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { + ThunkKey key = GetBakerReadBarrierKey(patch); + key_type = key.GetType(); + auto lb = thunks_.lower_bound(key); + if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); + AddUnreservedThunk(&it->second); + } else { + old_data = &lb->second; + } + } + if (old_data != nullptr) { + // Shared path where an old thunk may need an update. + DCHECK(key_type != static_cast<ThunkType>(-1)); + DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); + if (old_data->NeedsNextThunk()) { + // Patches for a method are ordered by literal offset, so if we still need to place + // this thunk for a previous patch, that thunk shall be in range for this patch. + DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type)); + } else { + if (!old_data->HasReservedOffset() || + patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) { + old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type)); + AddUnreservedThunk(old_data); + } + } + } + } +} + +void ArmBaseRelativePatcher::AddUnreservedThunk(ThunkData* data) { + DCHECK(data->NeedsNextThunk()); + size_t index = unreserved_thunks_.size(); + while (index != 0u && data->MaxNextOffset() < unreserved_thunks_[index - 1u]->MaxNextOffset()) { + --index; + } + unreserved_thunks_.insert(unreserved_thunks_.begin() + index, data); + // We may need to update the max next offset(s) if the thunk code would not fit. + size_t alignment = GetInstructionSetAlignment(instruction_set_); + if (index + 1u != unreserved_thunks_.size()) { + // Note: Ignore the return value as we need to process previous thunks regardless. + data->MakeSpaceBefore(*unreserved_thunks_[index + 1u], alignment); + } + // Make space for previous thunks. Once we find a pending thunk that does + // not need an adjustment, we can stop. + while (index != 0u && unreserved_thunks_[index - 1u]->MakeSpaceBefore(*data, alignment)) { + --index; + data = unreserved_thunks_[index]; + } +} + +void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, + MethodReference method_ref) { + DCHECK(!unreserved_thunks_.empty()); + DCHECK(!unprocessed_method_call_patches_.empty()); + DCHECK(method_call_thunk_ != nullptr); + uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); + uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + // Process as many patches as possible, stop only on unresolved targets or calls too far back. + while (!unprocessed_method_call_patches_.empty()) { + MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); + uint32_t patch_offset = unprocessed_method_call_patches_.front().GetPatchOffset(); + DCHECK(!method_call_thunk_->HasReservedOffset() || + method_call_thunk_->LastReservedOffset() <= patch_offset); + if (!method_call_thunk_->HasReservedOffset() || + patch_offset - method_call_thunk_->LastReservedOffset() > max_negative_displacement) { + // No previous thunk in range, check if we can reach the target directly. + if (target_method.dex_file == method_ref.dex_file && + target_method.dex_method_index == method_ref.dex_method_index) { + DCHECK_GT(quick_code_offset, patch_offset); + if (quick_code_offset - patch_offset > max_positive_displacement) { + break; } } else { + auto result = provider_->FindMethodOffset(target_method); + if (!result.first) { + break; + } uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_); if (target_offset >= patch_offset) { - DCHECK_LE(target_offset - patch_offset, max_positive_displacement_); - } else { - // When calling back, check if we have a thunk that's closer than the actual target. - if (!thunk_locations_.empty()) { - target_offset = std::max(target_offset, thunk_locations_.back()); - } - if (patch_offset - target_offset > max_negative_displacement_) { - return true; - } + DCHECK_LE(target_offset - patch_offset, max_positive_displacement); + } else if (patch_offset - target_offset > max_negative_displacement) { + break; } } } - unprocessed_patches_.pop_front(); + unprocessed_method_call_patches_.pop_front(); } - return false; + if (!unprocessed_method_call_patches_.empty()) { + // Try to adjust the max next offset in `method_call_thunk_`. Do this conservatively only if + // the thunk shall be at the end of the `unreserved_thunks_` to avoid dealing with overlaps. + uint32_t new_max_next_offset = + unprocessed_method_call_patches_.front().GetPatchOffset() + max_positive_displacement; + if (new_max_next_offset > + unreserved_thunks_.back()->MaxNextOffset() + unreserved_thunks_.back()->CodeSize()) { + method_call_thunk_->ClearMaxNextOffset(); + method_call_thunk_->SetMaxNextOffset(new_max_next_offset); + if (method_call_thunk_ != unreserved_thunks_.back()) { + RemoveElement(unreserved_thunks_, method_call_thunk_); + unreserved_thunks_.push_back(method_call_thunk_); + } + } + } else { + // We have resolved all method calls, we do not need a new thunk anymore. + method_call_thunk_->ClearMaxNextOffset(); + RemoveElement(unreserved_thunks_, method_call_thunk_); + } +} + +inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, + ThunkType type) { + return RoundDown(patch_offset + MaxPositiveDisplacement(type), + GetInstructionSetAlignment(instruction_set_)); } } // namespace linker diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index 25fd35e1d6..2cb1b6c535 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -18,9 +18,11 @@ #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ #include <deque> +#include <vector> #include "linker/relative_patcher.h" #include "method_reference.h" +#include "safe_map.h" namespace art { namespace linker { @@ -35,32 +37,138 @@ class ArmBaseRelativePatcher : public RelativePatcher { protected: ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set, - std::vector<uint8_t> thunk_code, - uint32_t max_positive_displacement, - uint32_t max_negative_displacement); + InstructionSet instruction_set); + ~ArmBaseRelativePatcher(); + + enum class ThunkType { + kMethodCall, // Method call thunk. + kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset. + kBakerReadBarrierRoot, // Baker read barrier, GC root load. + }; + + struct BakerReadBarrierOffsetParams { + uint32_t holder_reg; // Holder object for reading lock word. + uint32_t base_reg; // Base register, different from holder for large offset. + // If base differs from holder, it should be a pre-defined + // register to limit the number of thunks we need to emit. + // The offset is retrieved using introspection. + }; + + struct BakerReadBarrierRootParams { + uint32_t root_reg; // The register holding the GC root. + uint32_t dummy; + }; + + struct RawThunkParams { + uint32_t first; + uint32_t second; + }; + + union ThunkParams { + RawThunkParams raw_params; + BakerReadBarrierOffsetParams offset_params; + BakerReadBarrierRootParams root_params; + }; + + class ThunkKey { + public: + ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { } + + ThunkType GetType() const { + return type_; + } + + BakerReadBarrierOffsetParams GetOffsetParams() const { + DCHECK(type_ == ThunkType::kBakerReadBarrierField); + return params_.offset_params; + } + + BakerReadBarrierRootParams GetRootParams() const { + DCHECK(type_ == ThunkType::kBakerReadBarrierRoot); + return params_.root_params; + } + + RawThunkParams GetRawParams() const { + return params_.raw_params; + } + + private: + ThunkType type_; + ThunkParams params_; + }; + + class ThunkKeyCompare { + public: + bool operator()(const ThunkKey& lhs, const ThunkKey& rhs) const { + if (lhs.GetType() != rhs.GetType()) { + return lhs.GetType() < rhs.GetType(); + } + if (lhs.GetRawParams().first != rhs.GetRawParams().first) { + return lhs.GetRawParams().first < rhs.GetRawParams().first; + } + return lhs.GetRawParams().second < rhs.GetRawParams().second; + } + }; uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref, uint32_t max_extra_space); - uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset); + uint32_t GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset); + + uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, + uint32_t target_offset); + + virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0; + virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; + virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0; + virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0; private: - bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref, - uint32_t next_aligned_offset); + class ThunkData; + + void ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset); + void AddUnreservedThunk(ThunkData* data); + + void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); + + uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type); RelativePatcherTargetProvider* const provider_; const InstructionSet instruction_set_; - const std::vector<uint8_t> thunk_code_; - const uint32_t max_positive_displacement_; - const uint32_t max_negative_displacement_; - std::vector<uint32_t> thunk_locations_; - size_t current_thunk_to_write_; - - // ReserveSpace() tracks unprocessed patches. - typedef std::pair<MethodReference, uint32_t> UnprocessedPatch; - std::deque<UnprocessedPatch> unprocessed_patches_; + + // The data for all thunks. + // SafeMap<> nodes don't move after being inserted, so we can use direct pointers to the data. + using ThunkMap = SafeMap<ThunkKey, ThunkData, ThunkKeyCompare>; + ThunkMap thunks_; + + // ReserveSpace() tracks unprocessed method call patches. These may be resolved later. + class UnprocessedMethodCallPatch { + public: + UnprocessedMethodCallPatch(uint32_t patch_offset, MethodReference target_method) + : patch_offset_(patch_offset), target_method_(target_method) { } + + uint32_t GetPatchOffset() const { + return patch_offset_; + } + + MethodReference GetTargetMethod() const { + return target_method_; + } + + private: + uint32_t patch_offset_; + MethodReference target_method_; + }; + std::deque<UnprocessedMethodCallPatch> unprocessed_method_call_patches_; + // Once we have compiled a method call thunk, cache pointer to the data. + ThunkData* method_call_thunk_; + + // Thunks + std::deque<ThunkData*> unreserved_thunks_; + + class PendingThunkComparator; + std::vector<ThunkData*> pending_thunks_; // Heap with the PendingThunkComparator. friend class Arm64RelativePatcherTest; friend class Thumb2RelativePatcherTest; diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index fa49fc4e6a..1a5d79ce70 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -23,9 +23,17 @@ namespace art { namespace linker { +// PC displacement from patch location; Thumb2 PC is always at instruction address + 4. +static constexpr int32_t kPcDisplacement = 4; + +// Maximum positive and negative displacement for method call measured from the patch location. +// (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from +// the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) +constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; +constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; + Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) - : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement) { + : ArmBaseRelativePatcher(provider, kThumb2) { } void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, @@ -36,7 +44,7 @@ void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, DCHECK_EQ(literal_offset & 1u, 0u); DCHECK_EQ(patch_offset & 1u, 0u); DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. DCHECK_EQ(displacement & 1u, 0u); DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. @@ -76,7 +84,20 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, SetInsn32(code, literal_offset, insn); } -std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { +void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; +} + +ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey( + const LinkerPatch& patch ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; + UNREACHABLE(); +} + +std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { + DCHECK(key.GetType() == ThunkType::kMethodCall); // The thunk just uses the entry point in the ArtMethod. This works even for calls // to the generic JNI and interpreter trampolines. ArenaPool pool; @@ -93,6 +114,16 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { return thunk_code; } +uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) { + DCHECK(type == ThunkType::kMethodCall); + return kMaxMethodCallPositiveDisplacement; +} + +uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) { + DCHECK(type == ThunkType::kMethodCall); + return kMaxMethodCallNegativeDisplacement; +} + void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { DCHECK_LE(offset + 4u, code->size()); DCHECK_EQ(offset & 1u, 0u); diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index d85739c51f..ab37802d0f 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -34,24 +34,24 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; - private: - static std::vector<uint8_t> CompileThunkCode(); + protected: + ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; + std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; + uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + private: void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); template <typename Vector> static uint32_t GetInsn32(Vector* code, uint32_t offset); - // PC displacement from patch location; Thumb2 PC is always at instruction address + 4. - static constexpr int32_t kPcDisplacement = 4; - - // Maximum positive and negative displacement measured from the patch location. - // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from - // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement; + friend class Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); }; diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index eace3d4326..f08270d934 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -63,7 +63,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); std::vector<uint8_t> method2_raw_code(method2_size); ArrayRef<const uint8_t> method2_code(method2_raw_code); - AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>()); + AddCompiledMethod(MethodRef(2u), method2_code); AddCompiledMethod(MethodRef(3u), method3_code, method3_patches); @@ -80,7 +80,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { } else { uint32_t thunk_end = CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) + - ThunkSize(); + MethodCallThunkSize(); uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */); return true; // Thunk present. @@ -94,24 +94,30 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { return result.second - 1 /* thumb mode */; } - uint32_t ThunkSize() { - return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size(); + std::vector<uint8_t> CompileMethodCallThunk() { + ArmBaseRelativePatcher::ThunkKey key( + ArmBaseRelativePatcher::ThunkType::kMethodCall, + ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + uint32_t MethodCallThunkSize() { + return CompileMethodCallThunk().size(); } bool CheckThunk(uint32_t thunk_offset) { - Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get()); - ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); if (output_.size() < thunk_offset + expected_code.size()) { LOG(ERROR) << "output_.size() == " << output_.size() << " < " << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); return false; } ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == expected_code) { + if (linked_code == ArrayRef<const uint8_t>(expected_code)) { return true; } // Log failure info. - DumpDiff(expected_code, linked_code); + DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); return false; } @@ -357,9 +363,10 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { uint32_t method3_offset = GetMethodOffset(3u); ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset)); uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); + uint32_t thunk_size = MethodCallThunkSize(); uint32_t thunk_offset = - RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2)); - DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()), + RoundDown(method3_header_offset - thunk_size, GetInstructionSetAlignment(kThumb2)); + DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), method3_header_offset); ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 9ddf200237..53797d280a 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -16,11 +16,17 @@ #include "linker/arm64/relative_patcher_arm64.h" +#include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method.h" +#include "base/bit_utils.h" #include "compiled_method.h" #include "driver/compiler_driver.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" #include "linker/output_stream.h" +#include "lock_word.h" +#include "mirror/object.h" +#include "mirror/array-inl.h" #include "oat.h" #include "oat_quick_method_header.h" #include "utils/arm64/assembler_arm64.h" @@ -30,17 +36,52 @@ namespace linker { namespace { +// Maximum positive and negative displacement for method call measured from the patch location. +// (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from +// the ARM64 PC pointing to the BL.) +constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u; +constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27); + +// Maximum positive and negative displacement for a conditional branch measured from the patch +// location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4] +// measured from the ARM64 PC pointing to the B.cond.) +constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u; +constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20); + +// The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. +constexpr uint32_t kAdrpThunkSize = 8u; + inline bool IsAdrpPatch(const LinkerPatch& patch) { - return (patch.IsPcRelative() && patch.GetType() != LinkerPatch::Type::kCallRelative) && - patch.LiteralOffset() == patch.PcInsnOffset(); + switch (patch.GetType()) { + case LinkerPatch::Type::kMethod: + case LinkerPatch::Type::kCall: + case LinkerPatch::Type::kCallRelative: + case LinkerPatch::Type::kType: + case LinkerPatch::Type::kString: + case LinkerPatch::Type::kBakerReadBarrierBranch: + return false; + case LinkerPatch::Type::kTypeRelative: + case LinkerPatch::Type::kTypeBssEntry: + case LinkerPatch::Type::kStringRelative: + case LinkerPatch::Type::kStringBssEntry: + case LinkerPatch::Type::kDexCacheArray: + return patch.LiteralOffset() == patch.PcInsnOffset(); + } +} + +inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) { + if (num_adrp == 0u) { + return 0u; + } + uint32_t alignment_bytes = CompiledMethod::AlignCode(code_size, kArm64) - code_size; + return kAdrpThunkSize * num_adrp + alignment_bytes; } } // anonymous namespace Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider, const Arm64InstructionSetFeatures* features) - : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(), - kMaxPositiveDisplacement, kMaxNegativeDisplacement), + : ArmBaseRelativePatcher(provider, kArm64), fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()), reserved_adrp_thunks_(0u), processed_adrp_thunks_(0u) { @@ -74,7 +115,9 @@ uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, ++num_adrp; } } - offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp); + ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); + uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size()); + offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space); if (num_adrp == 0u) { return offset; } @@ -82,7 +125,6 @@ uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, // Now that we have the actual offset where the code will be placed, locate the ADRP insns // that actually require the thunk. uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); DCHECK(compiled_method != nullptr); for (const LinkerPatch& patch : compiled_method->GetPatches()) { @@ -146,7 +188,7 @@ void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, DCHECK_EQ(literal_offset & 3u, 0u); DCHECK_EQ(patch_offset & 3u, 0u); DCHECK_EQ(target_offset & 3u, 0u); - uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u); + uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); DCHECK_EQ(displacement & 3u, 0u); DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. uint32_t insn = (displacement & 0x0fffffffu) >> 2; @@ -253,15 +295,184 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, } } -std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. +void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + DCHECK_ALIGNED(patch_offset, 4u); + uint32_t literal_offset = patch.LiteralOffset(); + DCHECK_ALIGNED(literal_offset, 4u); + DCHECK_LT(literal_offset, code->size()); + uint32_t insn = GetInsn(code, literal_offset); + DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) + ThunkKey key = GetBakerReadBarrierKey(patch); + if (kIsDebugBuild) { + // Check that the next instruction matches the expected LDR. + switch (key.GetType()) { + case ThunkType::kBakerReadBarrierField: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + // LDR (immediate) with correct base_reg. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5)); + break; + } + case ThunkType::kBakerReadBarrierRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn(code, literal_offset - 4u); + // LDR (immediate) with correct root_reg. + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + UNREACHABLE(); + } + } + uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); + DCHECK_ALIGNED(target_offset, 4u); + uint32_t disp = target_offset - patch_offset; + DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u); // 21-bit signed. + insn |= (disp << (5 - 2)) & 0x00ffffe0u; // Shift bits 2-20 to 5-23. + SetInsn(code, literal_offset, insn); +} + +ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( + const LinkerPatch& patch) { + DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); + uint32_t value = patch.GetBakerCustomValue1(); + BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value); + ThunkParams params; + switch (type) { + case BakerReadBarrierKind::kField: + params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.offset_params.base_reg); + params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); + CheckValidReg(params.offset_params.holder_reg); + break; + case BakerReadBarrierKind::kGcRoot: + params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.root_params.root_reg); + params.root_params.dummy = 0u; + DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); + break; + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type); + UNREACHABLE(); + } + constexpr uint8_t kTypeTranslationOffset = 1u; + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), + "Thunk type translation check."); + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), + "Thunk type translation check."); + return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset), + params); +} + +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register base_reg, + vixl::aarch64::MemOperand& lock_word, + vixl::aarch64::Label* slow_path) { + using namespace vixl::aarch64; // NOLINT(build/namespaces) + // Load the lock word containing the rb_state. + __ Ldr(ip0.W(), lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4, "Check field LDR offset"); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == -4, "Check array LDR offset"); + __ Sub(lr, lr, 4); // Adjust the return address one instruction back to the LDR. + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32)); + __ Br(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { + using namespace vixl::aarch64; // NOLINT(build/namespaces) ArenaPool pool; ArenaAllocator arena(&pool); arm64::Arm64Assembler assembler(&arena); - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + + switch (key.GetType()) { + case ThunkType::kMethodCall: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + break; + } + case ThunkType::kBakerReadBarrierField: { + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint (in IP1) that performs further checks + // on the reference and marks it if needed. + auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg); + auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + // If base_reg differs from holder_reg, the offset was too large and we must have + // emitted an explicit null check before the load. Otherwise, we need to null-check + // the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch64::Label throw_npe; + if (holder_reg.Is(base_reg)) { + __ Cbz(holder_reg.W(), &throw_npe); + } + vixl::aarch64::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. + __ Ubfx(ip0, ip0, 10, 12); // Extract the offset. + __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. + __ Br(ip1); // Jump to the entrypoint. + if (holder_reg.Is(base_reg)) { + // Add null check slow path. The stack map is at the address pointed to by LR. + __ Bind(&throw_npe); + int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); + __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset)); + __ Br(ip0); + } + break; + } + case ThunkType::kBakerReadBarrierRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label return_label, not_marked, forwarding_address; + __ Cbz(root_reg, &return_label); + MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip0.W(), lock_word); + __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); + __ Bind(&return_label); + __ Br(lr); + __ Bind(¬_marked); + __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); + __ B(&forwarding_address, mi); + // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to + // art_quick_read_barrier_mark_introspection_gc_roots. + __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip0.W(), root_reg); + __ Br(ip1); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); + __ Br(lr); + break; + } + } + // Ensure we emit the literal pool. assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); @@ -270,6 +481,28 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { return thunk_code; } +#undef __ + +uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) { + switch (type) { + case ThunkType::kMethodCall: + return kMaxMethodCallPositiveDisplacement; + case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierRoot: + return kMaxBcondPositiveDisplacement; + } +} + +uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) { + switch (type) { + case ThunkType::kMethodCall: + return kMaxMethodCallNegativeDisplacement; + case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierRoot: + return kMaxBcondNegativeDisplacement; + } +} + uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) { return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg. // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30. diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index a4a80185dc..7887cea5e6 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ #include "base/array_ref.h" +#include "base/bit_field.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { @@ -25,6 +26,27 @@ namespace linker { class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { public: + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kGcRoot, // GC root load. + kLast + }; + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + Arm64RelativePatcher(RelativePatcherTargetProvider* provider, const Arm64InstructionSetFeatures* features); @@ -41,9 +63,33 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; + + protected: + static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; + std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; + uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; private: - static std::vector<uint8_t> CompileThunkCode(); + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBitsForRegister = 5u; + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < 30u && reg != 16u && reg != 17u); + } + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, @@ -54,15 +100,6 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { template <typename Alloc> static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset); - // Maximum positive and negative displacement measured from the patch location. - // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from - // the ARM64 PC pointing to the BL.) - static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u; - static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27); - - // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. - static constexpr uint32_t kAdrpThunkSize = 8u; - const bool fix_cortex_a53_843419_; // Map original patch_offset to thunk offset. std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_; @@ -70,6 +107,8 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { size_t processed_adrp_thunks_; std::vector<uint8_t> current_method_thunks_; + friend class Arm64RelativePatcherTest; + DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher); }; diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index 9932c79a96..b4d35ab2a7 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -14,8 +14,11 @@ * limitations under the License. */ +#include "base/casts.h" #include "linker/relative_patcher_test.h" #include "linker/arm64/relative_patcher_arm64.h" +#include "lock_word.h" +#include "mirror/object.h" #include "oat_quick_method_header.h" namespace art { @@ -32,6 +35,9 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static const uint8_t kNopRawCode[]; static const ArrayRef<const uint8_t> kNopCode; + // NOP instruction. + static constexpr uint32_t kNopInsn = 0xd503201f; + // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits. static constexpr uint32_t kBlPlus0 = 0x94000000u; static constexpr uint32_t kBPlus0 = 0x14000000u; @@ -40,7 +46,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kBlPlusMax = 0x95ffffffu; static constexpr uint32_t kBlMinusMax = 0x96000000u; - // LDR immediate, 32-bit. + // LDR immediate, unsigned offset. static constexpr uint32_t kLdrWInsn = 0xb9400000u; // ADD/ADDS/SUB/SUBS immediate, 64-bit. @@ -61,6 +67,34 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu; static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; + // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. + static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011; + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + static_cast<uint8_t>(insn >> 16), + static_cast<uint8_t>(insn >> 24), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + + void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { + InsertInsn(code, code->size(), insn); + } + + std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { + std::vector<uint8_t> raw_code; + raw_code.reserve(insns.size() * 4u); + for (uint32_t insn : insns) { + PushBackInsn(&raw_code, insn); + } + return raw_code; + } + uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, const ArrayRef<const uint8_t>& last_method_code, @@ -93,8 +127,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { uint32_t chunk_code_size = chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader); gap_code.resize(chunk_code_size, 0u); - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code), - ArrayRef<const LinkerPatch>()); + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code)); method_idx += 1u; chunk_start += chunk_size; chunk_size = kSmallChunkSize; // For all but the first chunk. @@ -112,7 +145,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { // There may be a thunk before method2. if (last_result.second != last_method_offset) { // Thunk present. Check that there's only one. - uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize(); + uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + MethodCallThunkSize(); uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader)); } @@ -126,37 +159,49 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { return result.second; } - uint32_t ThunkSize() { - return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size(); + std::vector<uint8_t> CompileMethodCallThunk() { + ArmBaseRelativePatcher::ThunkKey key( + ArmBaseRelativePatcher::ThunkType::kMethodCall, + ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + uint32_t MethodCallThunkSize() { + return CompileMethodCallThunk().size(); } bool CheckThunk(uint32_t thunk_offset) { - Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get()); - ArrayRef<const uint8_t> expected_code(patcher->thunk_code_); + const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); if (output_.size() < thunk_offset + expected_code.size()) { LOG(ERROR) << "output_.size() == " << output_.size() << " < " << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); return false; } ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == expected_code) { + if (linked_code == ArrayRef<const uint8_t>(expected_code)) { return true; } // Log failure info. - DumpDiff(expected_code, linked_code); + DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); return false; } + std::vector<uint8_t> GenNops(size_t num_nops) { + std::vector<uint8_t> result; + result.reserve(num_nops * 4u + 4u); + for (size_t i = 0; i != num_nops; ++i) { + PushBackInsn(&result, kNopInsn); + } + return result; + } + std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { std::vector<uint8_t> result; result.reserve(num_nops * 4u + 4u); for (size_t i = 0; i != num_nops; ++i) { - result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + PushBackInsn(&result, kNopInsn); } - result.push_back(static_cast<uint8_t>(bl)); - result.push_back(static_cast<uint8_t>(bl >> 8)); - result.push_back(static_cast<uint8_t>(bl >> 16)); - result.push_back(static_cast<uint8_t>(bl >> 24)); + PushBackInsn(&result, bl); return result; } @@ -167,7 +212,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> result; result.reserve(num_nops * 4u + 8u); for (size_t i = 0; i != num_nops; ++i) { - result.insert(result.end(), kNopCode.begin(), kNopCode.end()); + PushBackInsn(&result, kNopInsn); } CHECK_ALIGNED(method_offset, 4u); CHECK_ALIGNED(target_offset, 4u); @@ -188,14 +233,8 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, // We take the sign bit from the disp, limiting disp to +- 2GiB. ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23. - result.push_back(static_cast<uint8_t>(adrp)); - result.push_back(static_cast<uint8_t>(adrp >> 8)); - result.push_back(static_cast<uint8_t>(adrp >> 16)); - result.push_back(static_cast<uint8_t>(adrp >> 24)); - result.push_back(static_cast<uint8_t>(use_insn)); - result.push_back(static_cast<uint8_t>(use_insn >> 8)); - result.push_back(static_cast<uint8_t>(use_insn >> 16)); - result.push_back(static_cast<uint8_t>(use_insn >> 24)); + PushBackInsn(&result, adrp); + PushBackInsn(&result, use_insn); return result; } @@ -208,7 +247,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) { dex_cache_arrays_begin_ = dex_cache_arrays_begin; auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset), }; @@ -233,7 +272,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { constexpr uint32_t kStringIndex = 1u; string_index_to_offset_map_.Put(kStringIndex, string_offset); auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), }; @@ -247,16 +286,6 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } - void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { - CHECK_LE(pos, code->size()); - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8), - static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24), - }; - static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } - void PrepareNopsAdrpInsn2Ldr(size_t num_nops, uint32_t insn2, uint32_t dex_cache_arrays_begin, @@ -264,7 +293,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { dex_cache_arrays_begin_ = dex_cache_arrays_begin; auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. InsertInsn(&code, num_nops * 4u + 4u, insn2); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset), LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset), }; @@ -279,7 +308,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { string_index_to_offset_map_.Put(kStringIndex, string_offset); auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. InsertInsn(&code, num_nops * 4u + 4u, insn2); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), }; @@ -329,7 +358,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { InsertInsn(&expected_thunk_code, 4u, b_in); ASSERT_EQ(expected_thunk_code.size(), 8u); - uint32_t thunk_size = ThunkSize(); + uint32_t thunk_size = MethodCallThunkSize(); ASSERT_EQ(thunk_offset + thunk_size, output_.size()); ASSERT_EQ(thunk_size, expected_thunk_code.size()); ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size); @@ -433,6 +462,33 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset); } + + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { + const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); + auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); + auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + + uint32_t GetOutputInsn(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 4u); + return (static_cast<uint32_t>(output_[offset]) << 0) | + (static_cast<uint32_t>(output_[offset + 1]) << 8) | + (static_cast<uint32_t>(output_[offset + 2]) << 16) | + (static_cast<uint32_t>(output_[offset + 3]) << 24); + } + + void TestBakerField(uint32_t offset, uint32_t root_reg); }; const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { @@ -458,24 +514,22 @@ class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest { }; TEST_F(Arm64RelativePatcherTestDefault, CallSelf) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); Link(); - static const uint8_t expected_code[] = { - 0x00, 0x00, 0x00, 0x94 - }; + const std::vector<uint8_t> expected_code = RawCode({kBlPlus0}); EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } TEST_F(Arm64RelativePatcherTestDefault, CallOther) { - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { + const LinkerPatch method2_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); @@ -486,9 +540,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOther) { uint32_t diff_after = method2_offset - method1_offset; CHECK_ALIGNED(diff_after, 4u); ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits. - static const uint8_t method1_expected_code[] = { - static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94 - }; + const std::vector<uint8_t> method1_expected_code = RawCode({kBlPlus0 + (diff_after >> 2)}); EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); uint32_t diff_before = method1_offset - method2_offset; CHECK_ALIGNED(diff_before, 4u); @@ -498,7 +550,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOther) { } TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -518,7 +570,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) { constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. ArrayRef<const uint8_t> last_method_code(last_method_raw_code); ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - LinkerPatch last_method_patches[] = { + const LinkerPatch last_method_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index), }; @@ -551,7 +603,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), }; @@ -577,7 +629,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs. ArrayRef<const uint8_t> last_method_code(last_method_raw_code); ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - LinkerPatch last_method_patches[] = { + const LinkerPatch last_method_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), }; @@ -603,7 +655,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), }; @@ -620,9 +672,10 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { uint32_t last_method_offset = GetMethodOffset(last_method_idx); ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset)); uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); + uint32_t thunk_size = MethodCallThunkSize(); uint32_t thunk_offset = - RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64)); - DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()), + RoundDown(last_method_header_offset - thunk_size, GetInstructionSetAlignment(kArm64)); + DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), last_method_header_offset); uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); CHECK_ALIGNED(diff, 4u); @@ -637,7 +690,7 @@ TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. ArrayRef<const uint8_t> last_method_code(last_method_raw_code); ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - LinkerPatch last_method_patches[] = { + const LinkerPatch last_method_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), }; @@ -832,5 +885,383 @@ TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) +void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + // LR and SP/ZR are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 16 * KB); + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = + Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); + uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. + ASSERT_GE(output_.size() - gray_check_offset, 4u); + ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + gray_check_offset +=4u; + } + // Verify that the lock word for gray bit check is loaded from the holder address. + static constexpr size_t kGrayCheckInsns = 5; + ASSERT_GE(output_.size() - gray_check_offset, 4u * kGrayCheckInsns); + const uint32_t load_lock_word = + kLdrWInsn | + (mirror::Object::MonitorOffset().Uint32Value() << (10 - 2)) | + (holder_reg << 5) | + /* ip0 */ 16; + EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); + // Verify the gray bit check. + const uint32_t check_gray_bit_witout_offset = + 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f); + // Verify the fake dependency. + const uint32_t fake_dependency = + 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32 + (/* ip0 */ 16 << 16) | // Xm = ip0 + (base_reg << 5) | // Xn = base_reg + base_reg; // Xd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn(gray_check_offset + 12u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); + } + } +} + +#define TEST_BAKER_FIELD(offset, root_reg) \ + TEST_F(Arm64RelativePatcherTestDefault, \ + BakerOffset##offset##_##root_reg) { \ + TestBakerField(offset, root_reg); \ + } + +TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15) +TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29) + +TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { + // One thunk in the middle with maximum distance branches to it from both sides. + // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 + // allows the branch to reach that thunk. + size_t filler1_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + // Allow reaching the thunk from the very beginning of a method 1MiB away. Backward branch + // reaches the full 1MiB. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + constexpr uint32_t kLiteralOffset2 = 0; + const std::vector<uint8_t> raw_code2 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; + const uint32_t cbnz_max_backward = kCbnzIP1Plus0Insn | 0x00800000; + const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({cbnz_max_backward, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { + // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction + // earlier, so the thunk is emitted before the filler. + // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 0; + const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 + // allows the branch to reach that thunk. + size_t filler1_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + Link(); + + const uint32_t cbnz_offset = RoundUp(raw_code1.size(), kArm64Alignment) - kLiteralOffset1; + const uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + const std::vector<uint8_t> expected_code1 = RawCode({cbnz, kLdrWInsn, kNopInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { + // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded + // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. + // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 + // allows the branch to reach that thunk. + size_t filler1_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + // If not for the extra NOP, this would allow reaching the thunk from the very beginning + // of a method 1MiB away. Backward branch reaches the full 1MiB. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + // Extra NOP compared to BakerOffsetThunkInTheMiddle. + constexpr uint32_t kLiteralOffset2 = 4; + const std::vector<uint8_t> raw_code2 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; + const uint32_t cbnz_last_offset = RoundUp(raw_code2.size(), kArm64Alignment) - kLiteralOffset2; + const uint32_t cbnz_last = kCbnzIP1Plus0Insn | (cbnz_last_offset << (5 - 2)); + const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({kNopInsn, cbnz_last, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + // LR and SP/ZR are reserved. + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 4u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({ldr, kCbnzIP1Plus0Insn}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); + uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({ldr, cbnz}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + ASSERT_GE(output_.size() - thunk_offset, 4u); + ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); + } +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerAndMethodCallInteraction) { + // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` + // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily + // hold when we're reserving thunks of different sizes. This test exposes the situation + // by using Baker thunks and a method call thunk. + + // Add a method call patch that can reach to method 1 offset + 128MiB. + uint32_t method_idx = 0u; + constexpr size_t kMethodCallLiteralOffset = 4u; + constexpr uint32_t kMissingMethodIdx = 2u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); + const LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), + }; + ArrayRef<const uint8_t> code1(raw_code1); + ++method_idx; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); + + // Skip kMissingMethodIdx. + ++method_idx; + ASSERT_EQ(kMissingMethodIdx, method_idx); + // Add a method with the right size that the method code for the next one starts 1MiB + // after code for method 1. + size_t filler_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> filler_code = GenNops(filler_size / 4u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + // Add 126 methods with 1MiB code+header, making the code for the next method start 1MiB + // before the currently scheduled MaxNextOffset() for the method call thunk. + for (uint32_t i = 0; i != 126; ++i) { + filler_size = 1 * MB - sizeof(OatQuickMethodHeader); + filler_code = GenNops(filler_size / 4u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + } + + // Add 2 Baker GC root patches to the last method, one that would allow the thunk at + // 1MiB + kArm64Alignment, i.e. kArm64Alignment after the method call thunk, and the + // second that needs it kArm64Alignment after that. Given the size of the GC root thunk + // is more than the space required by the method call thunk plus kArm64Alignment, + // this pushes the first GC root thunk's pending MaxNextOffset() before the method call + // thunk's pending MaxNextOffset() which needs to be adjusted. + ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArm64Alignment) + kArm64Alignment, + CompileBakerGcRootThunk(/* root_reg */ 0).size()); + static_assert(kArm64Alignment == 16, "Code below assumes kArm64Alignment == 16"); + constexpr size_t kBakerLiteralOffset1 = 4u + kArm64Alignment; + constexpr size_t kBakerLiteralOffset2 = 4u + 2 * kArm64Alignment; + // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | root_reg`. + const uint32_t ldr1 = kLdrWInsn | /* root_reg */ 1; + const uint32_t ldr2 = kLdrWInsn | /* root_reg */ 2; + const std::vector<uint8_t> last_method_raw_code = RawCode({ + kNopInsn, kNopInsn, kNopInsn, kNopInsn, // Padding before first GC root read barrier. + ldr1, kCbnzIP1Plus0Insn, // First GC root LDR with read barrier. + kNopInsn, kNopInsn, // Padding before second GC root read barrier. + ldr2, kCbnzIP1Plus0Insn, // Second GC root LDR with read barrier. + }); + uint32_t encoded_data1 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); + uint32_t encoded_data2 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); + const LinkerPatch last_method_patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), + ArrayRef<const uint8_t>(last_method_raw_code), + ArrayRef<const LinkerPatch>(last_method_patches)); + + // The main purpose of the test is to check that Link() does not cause a crash. + Link(); + + ASSERT_EQ(127 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc index fe5f9a948a..8da530f7cc 100644 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -117,5 +117,11 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, (*code)[literal_low_offset + 1] = static_cast<uint8_t>(diff >> 8); } +void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; +} + } // namespace linker } // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h index 4ff2f2f24f..852a345aa6 100644 --- a/compiler/linker/mips/relative_patcher_mips.h +++ b/compiler/linker/mips/relative_patcher_mips.h @@ -41,6 +41,9 @@ class MipsRelativePatcher FINAL : public RelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; private: // We'll maximize the range of a single load instruction for dex cache array accesses diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc index c47971635b..3488d6d21c 100644 --- a/compiler/linker/mips64/relative_patcher_mips64.cc +++ b/compiler/linker/mips64/relative_patcher_mips64.cc @@ -107,5 +107,11 @@ void Mips64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, (*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8); } +void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; +} + } // namespace linker } // namespace art diff --git a/compiler/linker/mips64/relative_patcher_mips64.h b/compiler/linker/mips64/relative_patcher_mips64.h index 8ef8cebe2f..f478d7f2ef 100644 --- a/compiler/linker/mips64/relative_patcher_mips64.h +++ b/compiler/linker/mips64/relative_patcher_mips64.h @@ -39,6 +39,9 @@ class Mips64RelativePatcher FINAL : public RelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; private: DISALLOW_COPY_AND_ASSIGN(Mips64RelativePatcher); diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h index dbda03fd3b..247b29017e 100644 --- a/compiler/linker/multi_oat_relative_patcher.h +++ b/compiler/linker/multi_oat_relative_patcher.h @@ -112,6 +112,13 @@ class MultiOatRelativePatcher FINAL { relative_patcher_->PatchPcRelativeReference(code, patch, patch_offset, target_offset); } + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + patch_offset += adjustment_; + relative_patcher_->PatchBakerReadBarrierBranch(code, patch, patch_offset); + } + // Wrappers around RelativePatcher for statistics retrieval. uint32_t CodeAlignmentSize() const; uint32_t RelativeCallThunksSize() const; diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc index 92a96a0bd3..951588a857 100644 --- a/compiler/linker/multi_oat_relative_patcher_test.cc +++ b/compiler/linker/multi_oat_relative_patcher_test.cc @@ -63,7 +63,7 @@ class MultiOatRelativePatcherTest : public testing::Test { if (next_write_call_thunk_ != 0u) { offset += next_write_call_thunk_; std::vector<uint8_t> thunk(next_write_call_thunk_, 'c'); - bool success = WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk)); + bool success = WriteThunk(out, ArrayRef<const uint8_t>(thunk)); CHECK(success); next_write_call_thunk_ = 0u; } @@ -95,6 +95,12 @@ class MultiOatRelativePatcherTest : public testing::Test { last_target_offset_ = target_offset; } + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; + } + uint32_t last_reserve_offset_ = 0u; MethodReference last_reserve_method_ = kNullMethodRef; uint32_t next_reserve_adjustment_ = 0u; diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc index f1538b10cc..ee49453938 100644 --- a/compiler/linker/relative_patcher.cc +++ b/compiler/linker/relative_patcher.cc @@ -75,6 +75,12 @@ std::unique_ptr<RelativePatcher> RelativePatcher::Create( LOG(FATAL) << "Unexpected relative dex cache array patch."; } + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unexpected baker read barrier branch patch."; + } + private: DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone); }; @@ -127,7 +133,7 @@ bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_cod return true; } -bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { +bool RelativePatcher::WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { return false; } diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h index 15e955b2c6..38c8228422 100644 --- a/compiler/linker/relative_patcher.h +++ b/compiler/linker/relative_patcher.h @@ -109,6 +109,11 @@ class RelativePatcher { uint32_t patch_offset, uint32_t target_offset) = 0; + // Patch a branch to a Baker read barrier thunk. + virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) = 0; + protected: RelativePatcher() : size_code_alignment_(0u), @@ -117,7 +122,7 @@ class RelativePatcher { } bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); - bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); + bool WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); private: diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index 908cb412bf..d9a87a0cfd 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -76,9 +76,10 @@ class RelativePatcherTest : public testing::Test { return MethodReference(nullptr, method_idx); } - void AddCompiledMethod(MethodReference method_ref, - const ArrayRef<const uint8_t>& code, - const ArrayRef<const LinkerPatch>& patches) { + void AddCompiledMethod( + MethodReference method_ref, + const ArrayRef<const uint8_t>& code, + const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()) { compiled_method_refs_.push_back(method_ref); compiled_methods_.emplace_back(new CompiledMethod( &driver_, @@ -169,6 +170,10 @@ class RelativePatcherTest : public testing::Test { patch, offset + patch.LiteralOffset(), target_offset); + } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { + patcher_->PatchBakerReadBarrierBranch(&patched_code_, + patch, + offset + patch.LiteralOffset()); } else { LOG(FATAL) << "Bad patch type. " << patch.GetType(); UNREACHABLE(); diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc index 768d31abf4..6967b0b6c2 100644 --- a/compiler/linker/x86/relative_patcher_x86.cc +++ b/compiler/linker/x86/relative_patcher_x86.cc @@ -56,5 +56,11 @@ void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24); } +void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; +} + } // namespace linker } // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h index fbf9ad4671..63a8338722 100644 --- a/compiler/linker/x86/relative_patcher_x86.h +++ b/compiler/linker/x86/relative_patcher_x86.h @@ -30,6 +30,9 @@ class X86RelativePatcher FINAL : public X86BaseRelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; }; } // namespace linker diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc index 2ff69308c4..156ece9909 100644 --- a/compiler/linker/x86_64/relative_patcher_x86_64.cc +++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc @@ -34,5 +34,11 @@ void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; } +void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, + const LinkerPatch& patch ATTRIBUTE_UNUSED, + uint32_t patch_offset ATTRIBUTE_UNUSED) { + LOG(FATAL) << "UNIMPLEMENTED"; +} + } // namespace linker } // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h index 11bb6d59e3..4f3ec498cb 100644 --- a/compiler/linker/x86_64/relative_patcher_x86_64.h +++ b/compiler/linker/x86_64/relative_patcher_x86_64.h @@ -30,6 +30,9 @@ class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { const LinkerPatch& patch, uint32_t patch_offset, uint32_t target_offset) OVERRIDE; + void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) OVERRIDE; }; } // namespace linker diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 105db1d2d0..1781643afd 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1348,6 +1348,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { PatchObjectAddress(&patched_code_, literal_offset, type); break; } + case LinkerPatch::Type::kBakerReadBarrierBranch: { + writer_->relative_patcher_->PatchBakerReadBarrierBranch(&patched_code_, + patch, + offset_ + literal_offset); + break; + } default: { DCHECK(false) << "Unexpected linker patch type: " << patch.GetType(); break; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b39a0e43fa..8faaec1de7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -16,6 +16,7 @@ #include "code_generator_arm64.h" +#include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method.h" #include "code_generator_utils.h" @@ -25,6 +26,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm64.h" +#include "linker/arm64/relative_patcher_arm64.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -81,6 +83,26 @@ static constexpr int kCurrentMethodStackOffset = 0; // generates less code/data with a small num_entries. static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle +// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// Some instructions have special requirements for a temporary, for example +// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require +// temp that's not an R0 (to avoid an extra move) and Baker read barrier field +// loads with large offsets need a fixed register to limit the number of link-time +// thunks we generate. For these and similar cases, we want to reserve a specific +// register that's neither callee-save nor an argument register. We choose x15. +inline Location FixedTempLocation() { + return Location::RegisterLocation(x15.GetCode()); +} + inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -298,23 +320,22 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the page address of - // the entry which is in a scratch register. Make sure it's not used for saving/restoring - // registers. Exclude the scratch register also for non-Baker read barrier for simplicity. + InvokeRuntimeCallingConvention calling_convention; + // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp + // register, make sure it's not clobbered by the call or by saving/restoring registers. DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); bool is_load_class_bss_entry = (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); - UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); if (is_load_class_bss_entry) { - // This temp is a scratch register. DCHECK(bss_entry_temp_.IsValid()); - temps.Exclude(bss_entry_temp_); + DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0))); + DCHECK( + !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_)); } __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; dex::TypeIndex type_index = cls_->GetTypeIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage @@ -387,14 +408,15 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - // temp_ is a scratch register. Make sure it's not used for saving/restoring registers. - UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); - temps.Exclude(temp_); + InvokeRuntimeCallingConvention calling_convention; + // Make sure `temp_` is not clobbered by the call or by saving/restoring registers. + DCHECK(temp_.IsValid()); + DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0))); + DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_)); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); @@ -1416,6 +1438,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2236,7 +2259,8 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { } } -void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { +void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, + const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = @@ -2250,7 +2274,17 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + !field_info.IsVolatile()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // non-volatile loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(FixedTempLocation()); + } + } else { + locations->AddTemp(Location::RequiresRegister()); + } } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { @@ -2279,7 +2313,8 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, // Object FieldGet with Baker's read barrier case. // /* HeapReference<Object> */ out = *(base + offset) Register base = RegisterFrom(base_loc, Primitive::kPrimNot); - Register temp = WRegisterFrom(locations->GetTemp(0)); + Location maybe_temp = + (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); // Note that potential implicit null checks are handled in this // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( @@ -2287,7 +2322,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, out, base, offset, - temp, + maybe_temp, /* needs_null_check */ true, field_info.IsVolatile()); } else { @@ -2672,7 +2707,21 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(FixedTempLocation()); + } + } else { + locations->AddTemp(Location::RequiresRegister()); + } } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); @@ -2708,11 +2757,25 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. - Register temp = WRegisterFrom(locations->GetTemp(0)); // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + Location maybe_temp = + (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj.W(), + offset, + maybe_temp, + /* needs_null_check */ true, + /* use_load_acquire */ false); + } else { + Register temp = WRegisterFrom(locations->GetTemp(0)); + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); + } } else { // General case. MemOperand source = HeapOperand(obj); @@ -3742,7 +3805,7 @@ void CodeGeneratorARM64::GenerateNop() { } void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { - HandleFieldGet(instruction); + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -4544,6 +4607,11 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, @@ -4642,7 +4710,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc pc_relative_string_patches_.size() + boot_image_type_patches_.size() + pc_relative_type_patches_.size() + - type_bss_entry_patches_.size(); + type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), @@ -4676,6 +4745,10 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } @@ -4788,8 +4861,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks - // to the custom calling convention) or by marking, so we shall use IP1. + locations->AddTemp(FixedTempLocation()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); @@ -4866,11 +4938,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative Class .bss entry patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - // We can go to slow path even with non-zero reference and in that case marking - // can clobber IP0, so we need to use IP1 which shall be preserved. - bss_entry_temp = ip1; - UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); - temps.Exclude(bss_entry_temp); + bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0)); bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp); // Add LDR with its PC-relative Class patch. @@ -4977,8 +5045,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks - // to the custom calling convention) or by marking, so we shall use IP1. + locations->AddTemp(FixedTempLocation()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); @@ -5029,11 +5096,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // We could use IP0 as the marking shall not clobber IP0 if the reference is null and - // that's when we need the slow path. But let's not rely on such details and use IP1. - Register temp = ip1; - UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); - temps.Exclude(temp); + Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0)); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); // Add LDR with its PC-relative String patch. @@ -5468,7 +5531,7 @@ void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { } void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { - HandleFieldGet(instruction); + HandleFieldGet(instruction, instruction->GetFieldInfo()); } void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { @@ -5777,7 +5840,6 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( Register out_reg = RegisterFrom(out, type); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - Register temp_reg = RegisterFrom(maybe_temp, type); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -5785,7 +5847,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( out, out_reg, offset, - temp_reg, + maybe_temp, /* needs_null_check */ false, /* use_load_acquire */ false); } else { @@ -5793,6 +5855,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( // Save the value of `out` into `maybe_temp` before overwriting it // in the following move operation, as we will need it for the // read barrier below. + Register temp_reg = RegisterFrom(maybe_temp, type); __ Mov(temp_reg, out_reg); // /* HeapReference<Object> */ out = *(out + offset) __ Ldr(out_reg, HeapOperand(out_reg, offset)); @@ -5820,13 +5883,12 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. - Register temp_reg = RegisterFrom(maybe_temp, type); // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, out, obj_reg, offset, - temp_reg, + maybe_temp, /* needs_null_check */ false, /* use_load_acquire */ false); } else { @@ -5857,52 +5919,97 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Register temp = lr; - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( - instruction, root, /* entrypoint */ LocationFrom(temp)); - codegen_->AddSlowPath(slow_path); - - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp, MemOperand(tr, entry_point_offset)); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - if (fixup_label == nullptr) { - __ Ldr(root_reg, MemOperand(obj, offset)); + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = + linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(ip1, MemOperand(tr, entry_point_offset)); + EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + if (fixup_label != nullptr) { + __ Bind(fixup_label); + } + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instruction (8B) before the return address label."); + __ ldr(root_reg, MemOperand(obj.X(), offset)); + __ Bind(cbnz_label); + __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + __ Bind(&return_address); } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Register temp = lr; + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( + instruction, root, /* entrypoint */ LocationFrom(temp)); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp, MemOperand(tr, entry_point_offset)); + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + if (fixup_label == nullptr) { + __ Ldr(root_reg, MemOperand(obj, offset)); + } else { + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); + } + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Cbnz(temp, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Cbnz(temp, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -5932,13 +6039,76 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins Location ref, Register obj, uint32_t offset, - Register temp, + Location maybe_temp, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !use_load_acquire && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> root = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( + base.GetCode(), + obj.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + + // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(ip1, MemOperand(tr, entry_point_offset)); + EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4, + "Field LDR must be 1 instruction (4B) before the return address label."); + __ ldr(RegisterFrom(ref, Primitive::kPrimNot), MemOperand(base.X(), offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) + Register temp = WRegisterFrom(maybe_temp); Location no_index = Location::NoLocation(); size_t no_scale_factor = 0u; GenerateReferenceLoadWithBakerReadBarrier(instruction, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 869aad2942..58feea2423 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -351,7 +351,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); - void HandleFieldGet(HInstruction* instruction); + void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleInvoke(HInvoke* instr); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* instr); @@ -579,6 +579,10 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t element_offset, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new baker read barrier patch and return the label to be bound + // before the CBNZ instruction. + vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral( const DexFile& dex_file, dex::StringIndex string_index); @@ -610,7 +614,7 @@ class CodeGeneratorARM64 : public CodeGenerator { Location ref, vixl::aarch64::Register obj, uint32_t offset, - vixl::aarch64::Register temp, + Location maybe_temp, bool needs_null_check, bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap @@ -738,6 +742,13 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* pc_insn_label; }; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + vixl::aarch64::Label label; + uint32_t custom_data; + }; + vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, @@ -777,6 +788,8 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 423fd3c6ae..77dcb5a55f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2507,9 +2507,11 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); + Location temp3_loc; // Used only for Baker read barrier. Register temp3; if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - temp3 = WRegisterFrom(locations->GetTemp(2)); + temp3_loc = locations->GetTemp(2); + temp3 = WRegisterFrom(temp3_loc); } else { temp3 = temps.AcquireW(); } @@ -2527,7 +2529,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1_loc, src.W(), class_offset, - temp2, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); // Bail out if the source is not a non primitive array. @@ -2536,7 +2538,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1_loc, temp1, component_offset, - temp2, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); @@ -2553,7 +2555,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1_loc, dest.W(), class_offset, - temp2, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); @@ -2570,7 +2572,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp2_loc, temp1, component_offset, - temp3, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); @@ -2589,7 +2591,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp2_loc, src.W(), class_offset, - temp3, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. @@ -2603,7 +2605,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1_loc, temp1, component_offset, - temp2, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); // /* HeapReference<Class> */ temp1 = temp1->super_class_ @@ -2687,7 +2689,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1_loc, src.W(), class_offset, - temp2, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); // /* HeapReference<Class> */ temp2 = temp1->component_type_ @@ -2695,7 +2697,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp2_loc, temp1, component_offset, - temp3, + temp3_loc, /* needs_null_check */ false, /* use_load_acquire */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); |