ARM64: Use link-time generated thunks for Baker CC read barrier.
Remaining work for follow-up CLs:
- array loads,
- volatile field loads,
- use implicit null check in field thunk.
Test: Added tests to relative_patcher_arm64
Test: New run-test 160-read-barrier-stress
Test: m test-art-target-gtest on Nexus 6P.
Test: testrunner.py --target on Nexus 6P.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Id68ff171c55a3f1bf1ac1b657f480531aa7b3710
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index aa529f8..d0f66e2 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -121,14 +121,15 @@
enum class Type : uint8_t {
kMethod,
kCall,
- kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
kType,
- kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
- kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
+ kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kString,
- kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
- kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
- kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent.
+ kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
+ kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent.
+ kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
};
static LinkerPatch MethodPatch(size_t literal_offset,
@@ -215,13 +216,21 @@
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
uint32_t element_offset) {
- DCHECK(IsUint<32>(element_offset));
LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file);
patch.pc_insn_offset_ = pc_insn_offset;
patch.element_offset_ = element_offset;
return patch;
}
+ static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
+ uint32_t custom_value1 = 0u,
+ uint32_t custom_value2 = 0u) {
+ LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr);
+ patch.baker_custom_value1_ = custom_value1;
+ patch.baker_custom_value2_ = custom_value2;
+ return patch;
+ }
+
LinkerPatch(const LinkerPatch& other) = default;
LinkerPatch& operator=(const LinkerPatch& other) = default;
@@ -241,6 +250,7 @@
case Type::kStringRelative:
case Type::kStringBssEntry:
case Type::kDexCacheArray:
+ case Type::kBakerReadBarrierBranch:
return true;
default:
return false;
@@ -301,6 +311,16 @@
return pc_insn_offset_;
}
+ uint32_t GetBakerCustomValue1() const {
+ DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
+ return baker_custom_value1_;
+ }
+
+ uint32_t GetBakerCustomValue2() const {
+ DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
+ return baker_custom_value2_;
+ }
+
private:
LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file)
: target_dex_file_(target_dex_file),
@@ -314,6 +334,7 @@
}
const DexFile* target_dex_file_;
+ // TODO: Clean up naming. Some patched locations are literals but others are not.
uint32_t literal_offset_ : 24; // Method code size up to 16MiB.
Type patch_type_ : 8;
union {
@@ -322,10 +343,12 @@
uint32_t type_idx_; // Type index for Type patches.
uint32_t string_idx_; // String index for String patches.
uint32_t element_offset_; // Element offset in the dex cache arrays.
+ uint32_t baker_custom_value1_;
static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
+ static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators");
};
union {
// Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`.
@@ -334,7 +357,9 @@
// Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
// may be different if the PC-relative addressing needs multiple insns).
uint32_t pc_insn_offset_;
+ uint32_t baker_custom_value2_;
static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators");
+ static_assert(sizeof(baker_custom_value2_) <= sizeof(cmp2_), "needed by relational operators");
};
friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 2471f79..f55d5a6 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -24,6 +24,118 @@
namespace art {
namespace linker {
+class ArmBaseRelativePatcher::ThunkData {
+ public:
+ ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset)
+ : code_(code),
+ offsets_(),
+ max_next_offset_(max_next_offset),
+ pending_offset_(0u) {
+ DCHECK(NeedsNextThunk()); // The data is constructed only when we expect to need the thunk.
+ }
+
+ ThunkData(ThunkData&& src) = default;
+
+ size_t CodeSize() const {
+ return code_.size();
+ }
+
+ ArrayRef<const uint8_t> GetCode() const {
+ return ArrayRef<const uint8_t>(code_);
+ }
+
+ bool NeedsNextThunk() const {
+ return max_next_offset_ != 0u;
+ }
+
+ uint32_t MaxNextOffset() const {
+ DCHECK(NeedsNextThunk());
+ return max_next_offset_;
+ }
+
+ void ClearMaxNextOffset() {
+ DCHECK(NeedsNextThunk());
+ max_next_offset_ = 0u;
+ }
+
+ void SetMaxNextOffset(uint32_t max_next_offset) {
+ DCHECK(!NeedsNextThunk());
+ max_next_offset_ = max_next_offset;
+ }
+
+ // Adjust the MaxNextOffset() down if needed to fit the code before the next thunk.
+ // Returns true if it was adjusted, false if the old value was kept.
+ bool MakeSpaceBefore(const ThunkData& next_thunk, size_t alignment) {
+ DCHECK(NeedsNextThunk());
+ DCHECK(next_thunk.NeedsNextThunk());
+ DCHECK_ALIGNED_PARAM(MaxNextOffset(), alignment);
+ DCHECK_ALIGNED_PARAM(next_thunk.MaxNextOffset(), alignment);
+ if (next_thunk.MaxNextOffset() - CodeSize() < MaxNextOffset()) {
+ max_next_offset_ = RoundDown(next_thunk.MaxNextOffset() - CodeSize(), alignment);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ uint32_t ReserveOffset(size_t offset) {
+ DCHECK(NeedsNextThunk());
+ DCHECK_LE(offset, max_next_offset_);
+ max_next_offset_ = 0u; // The reserved offset should satisfy all pending references.
+ offsets_.push_back(offset);
+ return offset + CodeSize();
+ }
+
+ bool HasReservedOffset() const {
+ return !offsets_.empty();
+ }
+
+ uint32_t LastReservedOffset() const {
+ DCHECK(HasReservedOffset());
+ return offsets_.back();
+ }
+
+ bool HasPendingOffset() const {
+ return pending_offset_ != offsets_.size();
+ }
+
+ uint32_t GetPendingOffset() const {
+ DCHECK(HasPendingOffset());
+ return offsets_[pending_offset_];
+ }
+
+ void MarkPendingOffsetAsWritten() {
+ DCHECK(HasPendingOffset());
+ ++pending_offset_;
+ }
+
+ bool HasWrittenOffset() const {
+ return pending_offset_ != 0u;
+ }
+
+ uint32_t LastWrittenOffset() const {
+ DCHECK(HasWrittenOffset());
+ return offsets_[pending_offset_ - 1u];
+ }
+
+ private:
+ std::vector<uint8_t> code_; // The code of the thunk.
+ std::vector<uint32_t> offsets_; // Offsets at which the thunk needs to be written.
+ uint32_t max_next_offset_; // The maximum offset at which the next thunk can be placed.
+ uint32_t pending_offset_; // The index of the next offset to write.
+};
+
+class ArmBaseRelativePatcher::PendingThunkComparator {
+ public:
+ bool operator()(const ThunkData* lhs, const ThunkData* rhs) const {
+ DCHECK(lhs->HasPendingOffset());
+ DCHECK(rhs->HasPendingOffset());
+ // The top of the heap is defined to contain the highest element and we want to pick
+ // the thunk with the smallest pending offset, so use the reverse ordering, i.e. ">".
+ return lhs->GetPendingOffset() > rhs->GetPendingOffset();
+ }
+};
+
uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset,
const CompiledMethod* compiled_method,
MethodReference method_ref) {
@@ -31,151 +143,305 @@
}
uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) {
- uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
- bool needs_thunk = ReserveSpaceProcessPatches(aligned_offset,
- MethodReference(nullptr, 0u),
- aligned_offset);
- if (needs_thunk) {
- // All remaining patches will be handled by this thunk.
- DCHECK(!unprocessed_patches_.empty());
- DCHECK_LE(aligned_offset - unprocessed_patches_.front().second, max_positive_displacement_);
- unprocessed_patches_.clear();
-
- thunk_locations_.push_back(aligned_offset);
- offset = aligned_offset + thunk_code_.size();
+ // For multi-oat compilations (boot image), ReserveSpaceEnd() is called for each oat file.
+ // Since we do not know here whether this is the last file or whether the next opportunity
+ // to place thunk will be soon enough, we need to reserve all needed thunks now. Code for
+ // subsequent oat files can still call back to them.
+ if (!unprocessed_method_call_patches_.empty()) {
+ ResolveMethodCalls(offset, MethodReference(nullptr, DexFile::kDexNoIndex));
}
+ for (ThunkData* data : unreserved_thunks_) {
+ uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_);
+ offset = data->ReserveOffset(thunk_offset);
+ }
+ unreserved_thunks_.clear();
+ // We also need to delay initiating the pending_thunks_ until the call to WriteThunks().
+ // Check that the `pending_thunks_.capacity()` indicates that no WriteThunks() has taken place.
+ DCHECK_EQ(pending_thunks_.capacity(), 0u);
return offset;
}
uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) {
- if (current_thunk_to_write_ == thunk_locations_.size()) {
- return offset;
+ if (pending_thunks_.capacity() == 0u) {
+ if (thunks_.empty()) {
+ return offset;
+ }
+ // First call to WriteThunks(), prepare the thunks for writing.
+ pending_thunks_.reserve(thunks_.size());
+ for (auto& entry : thunks_) {
+ ThunkData* data = &entry.second;
+ if (data->HasPendingOffset()) {
+ pending_thunks_.push_back(data);
+ }
+ }
+ std::make_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
}
uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
- if (UNLIKELY(aligned_offset == thunk_locations_[current_thunk_to_write_])) {
- ++current_thunk_to_write_;
+ while (!pending_thunks_.empty() &&
+ pending_thunks_.front()->GetPendingOffset() == aligned_offset) {
+ // Write alignment bytes and code.
uint32_t aligned_code_delta = aligned_offset - offset;
- if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) {
+ if (aligned_code_delta != 0u && UNLIKELY(!WriteCodeAlignment(out, aligned_code_delta))) {
return 0u;
}
- if (UNLIKELY(!WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk_code_)))) {
+ if (UNLIKELY(!WriteThunk(out, pending_thunks_.front()->GetCode()))) {
return 0u;
}
- offset = aligned_offset + thunk_code_.size();
+ offset = aligned_offset + pending_thunks_.front()->CodeSize();
+ // Mark the thunk as written at the pending offset and update the `pending_thunks_` heap.
+ std::pop_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
+ pending_thunks_.back()->MarkPendingOffsetAsWritten();
+ if (pending_thunks_.back()->HasPendingOffset()) {
+ std::push_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator());
+ } else {
+ pending_thunks_.pop_back();
+ }
+ aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_);
}
+ DCHECK(pending_thunks_.empty() || pending_thunks_.front()->GetPendingOffset() > aligned_offset);
return offset;
}
ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
- InstructionSet instruction_set,
- std::vector<uint8_t> thunk_code,
- uint32_t max_positive_displacement,
- uint32_t max_negative_displacement)
- : provider_(provider), instruction_set_(instruction_set), thunk_code_(thunk_code),
- max_positive_displacement_(max_positive_displacement),
- max_negative_displacement_(max_negative_displacement),
- thunk_locations_(), current_thunk_to_write_(0u), unprocessed_patches_() {
+ InstructionSet instruction_set)
+ : provider_(provider),
+ instruction_set_(instruction_set),
+ thunks_(),
+ unprocessed_method_call_patches_(),
+ method_call_thunk_(nullptr),
+ pending_thunks_() {
+}
+
+ArmBaseRelativePatcher::~ArmBaseRelativePatcher() {
+ // All work done by member destructors.
}
uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset,
const CompiledMethod* compiled_method,
MethodReference method_ref,
uint32_t max_extra_space) {
- uint32_t quick_code_size = compiled_method->GetQuickCode().size();
- uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
- uint32_t next_aligned_offset = compiled_method->AlignCode(quick_code_offset + quick_code_size);
- // Adjust for extra space required by the subclass.
- next_aligned_offset = compiled_method->AlignCode(next_aligned_offset + max_extra_space);
- // TODO: ignore unprocessed patches targeting this method if they can reach quick_code_offset.
- // We need the MethodReference for that.
- if (!unprocessed_patches_.empty() &&
- next_aligned_offset - unprocessed_patches_.front().second > max_positive_displacement_) {
- bool needs_thunk = ReserveSpaceProcessPatches(quick_code_offset,
- method_ref,
- next_aligned_offset);
- if (needs_thunk) {
- // A single thunk will cover all pending patches.
- unprocessed_patches_.clear();
- uint32_t thunk_location = CompiledMethod::AlignCode(offset, instruction_set_);
- thunk_locations_.push_back(thunk_location);
- offset = thunk_location + thunk_code_.size();
+ // Adjust code size for extra space required by the subclass.
+ uint32_t max_code_size = compiled_method->GetQuickCode().size() + max_extra_space;
+ uint32_t code_offset;
+ uint32_t next_aligned_offset;
+ while (true) {
+ code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
+ next_aligned_offset = compiled_method->AlignCode(code_offset + max_code_size);
+ if (unreserved_thunks_.empty() ||
+ unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) {
+ break;
+ }
+ ThunkData* thunk = unreserved_thunks_.front();
+ if (thunk == method_call_thunk_) {
+ ResolveMethodCalls(code_offset, method_ref);
+ // This may have changed `method_call_thunk_` data, so re-check if we need to reserve.
+ if (unreserved_thunks_.empty() ||
+ unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) {
+ break;
+ }
+ // We need to process the new `front()` whether it's still the `method_call_thunk_` or not.
+ thunk = unreserved_thunks_.front();
+ }
+ unreserved_thunks_.pop_front();
+ uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_);
+ offset = thunk->ReserveOffset(thunk_offset);
+ if (thunk == method_call_thunk_) {
+ // All remaining method call patches will be handled by this thunk.
+ DCHECK(!unprocessed_method_call_patches_.empty());
+ DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(),
+ MaxPositiveDisplacement(ThunkType::kMethodCall));
+ unprocessed_method_call_patches_.clear();
}
}
- for (const LinkerPatch& patch : compiled_method->GetPatches()) {
- if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
- unprocessed_patches_.emplace_back(patch.TargetMethod(),
- quick_code_offset + patch.LiteralOffset());
- }
- }
+
+ // Process patches and check that adding thunks for the current method did not push any
+ // thunks (previously existing or newly added) before `next_aligned_offset`. This is
+ // essentially a check that we never compile a method that's too big. The calls or branches
+ // from the method should be able to reach beyond the end of the method and over any pending
+ // thunks. (The number of different thunks should be relatively low and their code short.)
+ ProcessPatches(compiled_method, code_offset);
+ CHECK(unreserved_thunks_.empty() ||
+ unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset);
+
return offset;
}
-uint32_t ArmBaseRelativePatcher::CalculateDisplacement(uint32_t patch_offset,
- uint32_t target_offset) {
+uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_offset,
+ uint32_t target_offset) {
+ DCHECK(method_call_thunk_ != nullptr);
// Unsigned arithmetic with its well-defined overflow behavior is just fine here.
uint32_t displacement = target_offset - patch_offset;
+ uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
+ uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
// NOTE: With unsigned arithmetic we do mean to use && rather than || below.
- if (displacement > max_positive_displacement_ && displacement < -max_negative_displacement_) {
+ if (displacement > max_positive_displacement && displacement < -max_negative_displacement) {
// Unwritten thunks have higher offsets, check if it's within range.
- DCHECK(current_thunk_to_write_ == thunk_locations_.size() ||
- thunk_locations_[current_thunk_to_write_] > patch_offset);
- if (current_thunk_to_write_ != thunk_locations_.size() &&
- thunk_locations_[current_thunk_to_write_] - patch_offset < max_positive_displacement_) {
- displacement = thunk_locations_[current_thunk_to_write_] - patch_offset;
+ DCHECK(!method_call_thunk_->HasPendingOffset() ||
+ method_call_thunk_->GetPendingOffset() > patch_offset);
+ if (method_call_thunk_->HasPendingOffset() &&
+ method_call_thunk_->GetPendingOffset() - patch_offset <= max_positive_displacement) {
+ displacement = method_call_thunk_->GetPendingOffset() - patch_offset;
} else {
// We must have a previous thunk then.
- DCHECK_NE(current_thunk_to_write_, 0u);
- DCHECK_LT(thunk_locations_[current_thunk_to_write_ - 1], patch_offset);
- displacement = thunk_locations_[current_thunk_to_write_ - 1] - patch_offset;
- DCHECK(displacement >= -max_negative_displacement_);
+ DCHECK(method_call_thunk_->HasWrittenOffset());
+ DCHECK_LT(method_call_thunk_->LastWrittenOffset(), patch_offset);
+ displacement = method_call_thunk_->LastWrittenOffset() - patch_offset;
+ DCHECK_GE(displacement, -max_negative_displacement);
}
}
return displacement;
}
-bool ArmBaseRelativePatcher::ReserveSpaceProcessPatches(uint32_t quick_code_offset,
- MethodReference method_ref,
- uint32_t next_aligned_offset) {
- // Process as many patches as possible, stop only on unresolved targets or calls too far back.
- while (!unprocessed_patches_.empty()) {
- MethodReference patch_ref = unprocessed_patches_.front().first;
- uint32_t patch_offset = unprocessed_patches_.front().second;
- DCHECK(thunk_locations_.empty() || thunk_locations_.back() <= patch_offset);
- if (patch_ref.dex_file == method_ref.dex_file &&
- patch_ref.dex_method_index == method_ref.dex_method_index) {
- DCHECK_GT(quick_code_offset, patch_offset);
- if (quick_code_offset - patch_offset > max_positive_displacement_) {
- return true;
- }
- } else {
- auto result = provider_->FindMethodOffset(patch_ref);
- if (!result.first) {
- // If still unresolved, check if we have a thunk within range.
- if (thunk_locations_.empty() ||
- patch_offset - thunk_locations_.back() > max_negative_displacement_) {
- // No thunk in range, we need a thunk if the next aligned offset
- // is out of range, or if we're at the end of all code.
- return (next_aligned_offset - patch_offset > max_positive_displacement_) ||
- (quick_code_offset == next_aligned_offset); // End of code.
- }
+uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset) {
+ auto it = thunks_.find(key);
+ CHECK(it != thunks_.end());
+ const ThunkData& data = it->second;
+ if (data.HasWrittenOffset()) {
+ uint32_t offset = data.LastWrittenOffset();
+ DCHECK_LT(offset, patch_offset);
+ if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) {
+ return offset;
+ }
+ }
+ DCHECK(data.HasPendingOffset());
+ uint32_t offset = data.GetPendingOffset();
+ DCHECK_GT(offset, patch_offset);
+ DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType()));
+ return offset;
+}
+
+void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method,
+ uint32_t code_offset) {
+ for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ uint32_t patch_offset = code_offset + patch.LiteralOffset();
+ ThunkType key_type = static_cast<ThunkType>(-1);
+ ThunkData* old_data = nullptr;
+ if (patch.GetType() == LinkerPatch::Type::kCallRelative) {
+ key_type = ThunkType::kMethodCall;
+ unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod());
+ if (method_call_thunk_ == nullptr) {
+ ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces)
+ uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+ auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset));
+ method_call_thunk_ = &it->second;
+ AddUnreservedThunk(method_call_thunk_);
} else {
- uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
- if (target_offset >= patch_offset) {
- DCHECK_LE(target_offset - patch_offset, max_positive_displacement_);
- } else {
- // When calling back, check if we have a thunk that's closer than the actual target.
- if (!thunk_locations_.empty()) {
- target_offset = std::max(target_offset, thunk_locations_.back());
- }
- if (patch_offset - target_offset > max_negative_displacement_) {
- return true;
- }
+ old_data = method_call_thunk_;
+ }
+ } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
+ ThunkKey key = GetBakerReadBarrierKey(patch);
+ key_type = key.GetType();
+ auto lb = thunks_.lower_bound(key);
+ if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) {
+ uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type);
+ auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset));
+ AddUnreservedThunk(&it->second);
+ } else {
+ old_data = &lb->second;
+ }
+ }
+ if (old_data != nullptr) {
+ // Shared path where an old thunk may need an update.
+ DCHECK(key_type != static_cast<ThunkType>(-1));
+ DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset);
+ if (old_data->NeedsNextThunk()) {
+ // Patches for a method are ordered by literal offset, so if we still need to place
+ // this thunk for a previous patch, that thunk shall be in range for this patch.
+ DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type));
+ } else {
+ if (!old_data->HasReservedOffset() ||
+ patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) {
+ old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type));
+ AddUnreservedThunk(old_data);
}
}
}
- unprocessed_patches_.pop_front();
}
- return false;
+}
+
+void ArmBaseRelativePatcher::AddUnreservedThunk(ThunkData* data) {
+ DCHECK(data->NeedsNextThunk());
+ size_t index = unreserved_thunks_.size();
+ while (index != 0u && data->MaxNextOffset() < unreserved_thunks_[index - 1u]->MaxNextOffset()) {
+ --index;
+ }
+ unreserved_thunks_.insert(unreserved_thunks_.begin() + index, data);
+ // We may need to update the max next offset(s) if the thunk code would not fit.
+ size_t alignment = GetInstructionSetAlignment(instruction_set_);
+ if (index + 1u != unreserved_thunks_.size()) {
+ // Note: Ignore the return value as we need to process previous thunks regardless.
+ data->MakeSpaceBefore(*unreserved_thunks_[index + 1u], alignment);
+ }
+ // Make space for previous thunks. Once we find a pending thunk that does
+ // not need an adjustment, we can stop.
+ while (index != 0u && unreserved_thunks_[index - 1u]->MakeSpaceBefore(*data, alignment)) {
+ --index;
+ data = unreserved_thunks_[index];
+ }
+}
+
+void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset,
+ MethodReference method_ref) {
+ DCHECK(!unreserved_thunks_.empty());
+ DCHECK(!unprocessed_method_call_patches_.empty());
+ DCHECK(method_call_thunk_ != nullptr);
+ uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall);
+ uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall);
+ // Process as many patches as possible, stop only on unresolved targets or calls too far back.
+ while (!unprocessed_method_call_patches_.empty()) {
+ MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod();
+ uint32_t patch_offset = unprocessed_method_call_patches_.front().GetPatchOffset();
+ DCHECK(!method_call_thunk_->HasReservedOffset() ||
+ method_call_thunk_->LastReservedOffset() <= patch_offset);
+ if (!method_call_thunk_->HasReservedOffset() ||
+ patch_offset - method_call_thunk_->LastReservedOffset() > max_negative_displacement) {
+ // No previous thunk in range, check if we can reach the target directly.
+ if (target_method.dex_file == method_ref.dex_file &&
+ target_method.dex_method_index == method_ref.dex_method_index) {
+ DCHECK_GT(quick_code_offset, patch_offset);
+ if (quick_code_offset - patch_offset > max_positive_displacement) {
+ break;
+ }
+ } else {
+ auto result = provider_->FindMethodOffset(target_method);
+ if (!result.first) {
+ break;
+ }
+ uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_);
+ if (target_offset >= patch_offset) {
+ DCHECK_LE(target_offset - patch_offset, max_positive_displacement);
+ } else if (patch_offset - target_offset > max_negative_displacement) {
+ break;
+ }
+ }
+ }
+ unprocessed_method_call_patches_.pop_front();
+ }
+ if (!unprocessed_method_call_patches_.empty()) {
+ // Try to adjust the max next offset in `method_call_thunk_`. Do this conservatively only if
+ // the thunk shall be at the end of the `unreserved_thunks_` to avoid dealing with overlaps.
+ uint32_t new_max_next_offset =
+ unprocessed_method_call_patches_.front().GetPatchOffset() + max_positive_displacement;
+ if (new_max_next_offset >
+ unreserved_thunks_.back()->MaxNextOffset() + unreserved_thunks_.back()->CodeSize()) {
+ method_call_thunk_->ClearMaxNextOffset();
+ method_call_thunk_->SetMaxNextOffset(new_max_next_offset);
+ if (method_call_thunk_ != unreserved_thunks_.back()) {
+ RemoveElement(unreserved_thunks_, method_call_thunk_);
+ unreserved_thunks_.push_back(method_call_thunk_);
+ }
+ }
+ } else {
+ // We have resolved all method calls, we do not need a new thunk anymore.
+ method_call_thunk_->ClearMaxNextOffset();
+ RemoveElement(unreserved_thunks_, method_call_thunk_);
+ }
+}
+
+inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset,
+ ThunkType type) {
+ return RoundDown(patch_offset + MaxPositiveDisplacement(type),
+ GetInstructionSetAlignment(instruction_set_));
}
} // namespace linker
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h
index 25fd35e..2cb1b6c 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.h
+++ b/compiler/linker/arm/relative_patcher_arm_base.h
@@ -18,9 +18,11 @@
#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_
#include <deque>
+#include <vector>
#include "linker/relative_patcher.h"
#include "method_reference.h"
+#include "safe_map.h"
namespace art {
namespace linker {
@@ -35,32 +37,138 @@
protected:
ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider,
- InstructionSet instruction_set,
- std::vector<uint8_t> thunk_code,
- uint32_t max_positive_displacement,
- uint32_t max_negative_displacement);
+ InstructionSet instruction_set);
+ ~ArmBaseRelativePatcher();
+
+ enum class ThunkType {
+ kMethodCall, // Method call thunk.
+ kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset.
+ kBakerReadBarrierRoot, // Baker read barrier, GC root load.
+ };
+
+ struct BakerReadBarrierOffsetParams {
+ uint32_t holder_reg; // Holder object for reading lock word.
+ uint32_t base_reg; // Base register, different from holder for large offset.
+ // If base differs from holder, it should be a pre-defined
+ // register to limit the number of thunks we need to emit.
+ // The offset is retrieved using introspection.
+ };
+
+ struct BakerReadBarrierRootParams {
+ uint32_t root_reg; // The register holding the GC root.
+ uint32_t dummy;
+ };
+
+ struct RawThunkParams {
+ uint32_t first;
+ uint32_t second;
+ };
+
+ union ThunkParams {
+ RawThunkParams raw_params;
+ BakerReadBarrierOffsetParams offset_params;
+ BakerReadBarrierRootParams root_params;
+ };
+
+ class ThunkKey {
+ public:
+ ThunkKey(ThunkType type, ThunkParams params) : type_(type), params_(params) { }
+
+ ThunkType GetType() const {
+ return type_;
+ }
+
+ BakerReadBarrierOffsetParams GetOffsetParams() const {
+ DCHECK(type_ == ThunkType::kBakerReadBarrierField);
+ return params_.offset_params;
+ }
+
+ BakerReadBarrierRootParams GetRootParams() const {
+ DCHECK(type_ == ThunkType::kBakerReadBarrierRoot);
+ return params_.root_params;
+ }
+
+ RawThunkParams GetRawParams() const {
+ return params_.raw_params;
+ }
+
+ private:
+ ThunkType type_;
+ ThunkParams params_;
+ };
+
+ class ThunkKeyCompare {
+ public:
+ bool operator()(const ThunkKey& lhs, const ThunkKey& rhs) const {
+ if (lhs.GetType() != rhs.GetType()) {
+ return lhs.GetType() < rhs.GetType();
+ }
+ if (lhs.GetRawParams().first != rhs.GetRawParams().first) {
+ return lhs.GetRawParams().first < rhs.GetRawParams().first;
+ }
+ return lhs.GetRawParams().second < rhs.GetRawParams().second;
+ }
+ };
uint32_t ReserveSpaceInternal(uint32_t offset,
const CompiledMethod* compiled_method,
MethodReference method_ref,
uint32_t max_extra_space);
- uint32_t CalculateDisplacement(uint32_t patch_offset, uint32_t target_offset);
+ uint32_t GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset);
+
+ uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset,
+ uint32_t target_offset);
+
+ virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0;
+ virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0;
+ virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0;
+ virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0;
private:
- bool ReserveSpaceProcessPatches(uint32_t quick_code_offset, MethodReference method_ref,
- uint32_t next_aligned_offset);
+ class ThunkData;
+
+ void ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset);
+ void AddUnreservedThunk(ThunkData* data);
+
+ void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref);
+
+ uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type);
RelativePatcherTargetProvider* const provider_;
const InstructionSet instruction_set_;
- const std::vector<uint8_t> thunk_code_;
- const uint32_t max_positive_displacement_;
- const uint32_t max_negative_displacement_;
- std::vector<uint32_t> thunk_locations_;
- size_t current_thunk_to_write_;
- // ReserveSpace() tracks unprocessed patches.
- typedef std::pair<MethodReference, uint32_t> UnprocessedPatch;
- std::deque<UnprocessedPatch> unprocessed_patches_;
+ // The data for all thunks.
+ // SafeMap<> nodes don't move after being inserted, so we can use direct pointers to the data.
+ using ThunkMap = SafeMap<ThunkKey, ThunkData, ThunkKeyCompare>;
+ ThunkMap thunks_;
+
+ // ReserveSpace() tracks unprocessed method call patches. These may be resolved later.
+ class UnprocessedMethodCallPatch {
+ public:
+ UnprocessedMethodCallPatch(uint32_t patch_offset, MethodReference target_method)
+ : patch_offset_(patch_offset), target_method_(target_method) { }
+
+ uint32_t GetPatchOffset() const {
+ return patch_offset_;
+ }
+
+ MethodReference GetTargetMethod() const {
+ return target_method_;
+ }
+
+ private:
+ uint32_t patch_offset_;
+ MethodReference target_method_;
+ };
+ std::deque<UnprocessedMethodCallPatch> unprocessed_method_call_patches_;
+ // Once we have compiled a method call thunk, cache pointer to the data.
+ ThunkData* method_call_thunk_;
+
+ // Thunks
+ std::deque<ThunkData*> unreserved_thunks_;
+
+ class PendingThunkComparator;
+ std::vector<ThunkData*> pending_thunks_; // Heap with the PendingThunkComparator.
friend class Arm64RelativePatcherTest;
friend class Thumb2RelativePatcherTest;
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index fa49fc4..1a5d79c 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -23,9 +23,17 @@
namespace art {
namespace linker {
+// PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
+static constexpr int32_t kPcDisplacement = 4;
+
+// Maximum positive and negative displacement for method call measured from the patch location.
+// (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
+// the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
+constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
+constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement;
+
Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider)
- : ArmBaseRelativePatcher(provider, kThumb2, CompileThunkCode(),
- kMaxPositiveDisplacement, kMaxNegativeDisplacement) {
+ : ArmBaseRelativePatcher(provider, kThumb2) {
}
void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code,
@@ -36,7 +44,7 @@
DCHECK_EQ(literal_offset & 1u, 0u);
DCHECK_EQ(patch_offset & 1u, 0u);
DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit.
- uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch.
DCHECK_EQ(displacement & 1u, 0u);
DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed.
@@ -76,7 +84,20 @@
SetInsn32(code, literal_offset, insn);
}
-std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() {
+void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+}
+
+ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey(
+ const LinkerPatch& patch ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+ UNREACHABLE();
+}
+
+std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) {
+ DCHECK(key.GetType() == ThunkType::kMethodCall);
// The thunk just uses the entry point in the ArtMethod. This works even for calls
// to the generic JNI and interpreter trampolines.
ArenaPool pool;
@@ -93,6 +114,16 @@
return thunk_code;
}
+uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
+ DCHECK(type == ThunkType::kMethodCall);
+ return kMaxMethodCallPositiveDisplacement;
+}
+
+uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
+ DCHECK(type == ThunkType::kMethodCall);
+ return kMaxMethodCallNegativeDisplacement;
+}
+
void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
DCHECK_LE(offset + 4u, code->size());
DCHECK_EQ(offset & 1u, 0u);
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index d85739c..ab37802 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -34,24 +34,24 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
+
+ protected:
+ ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
+ std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
+ uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
+ uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
private:
- static std::vector<uint8_t> CompileThunkCode();
-
void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value);
static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset);
template <typename Vector>
static uint32_t GetInsn32(Vector* code, uint32_t offset);
- // PC displacement from patch location; Thumb2 PC is always at instruction address + 4.
- static constexpr int32_t kPcDisplacement = 4;
-
- // Maximum positive and negative displacement measured from the patch location.
- // (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from
- // the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.)
- static constexpr uint32_t kMaxPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement;
- static constexpr uint32_t kMaxNegativeDisplacement = (1u << 24) - kPcDisplacement;
+ friend class Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
};
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index eace3d4..f08270d 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -63,7 +63,7 @@
const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset);
std::vector<uint8_t> method2_raw_code(method2_size);
ArrayRef<const uint8_t> method2_code(method2_raw_code);
- AddCompiledMethod(MethodRef(2u), method2_code, ArrayRef<const LinkerPatch>());
+ AddCompiledMethod(MethodRef(2u), method2_code);
AddCompiledMethod(MethodRef(3u), method3_code, method3_patches);
@@ -80,7 +80,7 @@
} else {
uint32_t thunk_end =
CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), kThumb2) +
- ThunkSize();
+ MethodCallThunkSize();
uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */);
return true; // Thunk present.
@@ -94,24 +94,30 @@
return result.second - 1 /* thumb mode */;
}
- uint32_t ThunkSize() {
- return static_cast<Thumb2RelativePatcher*>(patcher_.get())->thunk_code_.size();
+ std::vector<uint8_t> CompileMethodCallThunk() {
+ ArmBaseRelativePatcher::ThunkKey key(
+ ArmBaseRelativePatcher::ThunkType::kMethodCall,
+ ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces)
+ return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ uint32_t MethodCallThunkSize() {
+ return CompileMethodCallThunk().size();
}
bool CheckThunk(uint32_t thunk_offset) {
- Thumb2RelativePatcher* patcher = static_cast<Thumb2RelativePatcher*>(patcher_.get());
- ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+ const std::vector<uint8_t> expected_code = CompileMethodCallThunk();
if (output_.size() < thunk_offset + expected_code.size()) {
LOG(ERROR) << "output_.size() == " << output_.size() << " < "
<< "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
return false;
}
ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
- if (linked_code == expected_code) {
+ if (linked_code == ArrayRef<const uint8_t>(expected_code)) {
return true;
}
// Log failure info.
- DumpDiff(expected_code, linked_code);
+ DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code);
return false;
}
@@ -357,9 +363,10 @@
uint32_t method3_offset = GetMethodOffset(3u);
ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset));
uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader);
+ uint32_t thunk_size = MethodCallThunkSize();
uint32_t thunk_offset =
- RoundDown(method3_header_offset - ThunkSize(), GetInstructionSetAlignment(kThumb2));
- DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+ RoundDown(method3_header_offset - thunk_size, GetInstructionSetAlignment(kThumb2));
+ DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size),
method3_header_offset);
ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset));
uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */);
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 9ddf200..53797d2 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -16,11 +16,17 @@
#include "linker/arm64/relative_patcher_arm64.h"
+#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
#include "art_method.h"
+#include "base/bit_utils.h"
#include "compiled_method.h"
#include "driver/compiler_driver.h"
+#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "linker/output_stream.h"
+#include "lock_word.h"
+#include "mirror/object.h"
+#include "mirror/array-inl.h"
#include "oat.h"
#include "oat_quick_method_header.h"
#include "utils/arm64/assembler_arm64.h"
@@ -30,17 +36,52 @@
namespace {
+// Maximum positive and negative displacement for method call measured from the patch location.
+// (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from
+// the ARM64 PC pointing to the BL.)
+constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u;
+constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27);
+
+// Maximum positive and negative displacement for a conditional branch measured from the patch
+// location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4]
+// measured from the ARM64 PC pointing to the B.cond.)
+constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u;
+constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20);
+
+// The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
+constexpr uint32_t kAdrpThunkSize = 8u;
+
inline bool IsAdrpPatch(const LinkerPatch& patch) {
- return (patch.IsPcRelative() && patch.GetType() != LinkerPatch::Type::kCallRelative) &&
- patch.LiteralOffset() == patch.PcInsnOffset();
+ switch (patch.GetType()) {
+ case LinkerPatch::Type::kMethod:
+ case LinkerPatch::Type::kCall:
+ case LinkerPatch::Type::kCallRelative:
+ case LinkerPatch::Type::kType:
+ case LinkerPatch::Type::kString:
+ case LinkerPatch::Type::kBakerReadBarrierBranch:
+ return false;
+ case LinkerPatch::Type::kTypeRelative:
+ case LinkerPatch::Type::kTypeBssEntry:
+ case LinkerPatch::Type::kStringRelative:
+ case LinkerPatch::Type::kStringBssEntry:
+ case LinkerPatch::Type::kDexCacheArray:
+ return patch.LiteralOffset() == patch.PcInsnOffset();
+ }
+}
+
+inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) {
+ if (num_adrp == 0u) {
+ return 0u;
+ }
+ uint32_t alignment_bytes = CompiledMethod::AlignCode(code_size, kArm64) - code_size;
+ return kAdrpThunkSize * num_adrp + alignment_bytes;
}
} // anonymous namespace
Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
const Arm64InstructionSetFeatures* features)
- : ArmBaseRelativePatcher(provider, kArm64, CompileThunkCode(),
- kMaxPositiveDisplacement, kMaxNegativeDisplacement),
+ : ArmBaseRelativePatcher(provider, kArm64),
fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()),
reserved_adrp_thunks_(0u),
processed_adrp_thunks_(0u) {
@@ -74,7 +115,9 @@
++num_adrp;
}
}
- offset = ReserveSpaceInternal(offset, compiled_method, method_ref, kAdrpThunkSize * num_adrp);
+ ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
+ uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size());
+ offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space);
if (num_adrp == 0u) {
return offset;
}
@@ -82,7 +125,6 @@
// Now that we have the actual offset where the code will be placed, locate the ADRP insns
// that actually require the thunk.
uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader));
- ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size());
DCHECK(compiled_method != nullptr);
for (const LinkerPatch& patch : compiled_method->GetPatches()) {
@@ -146,7 +188,7 @@
DCHECK_EQ(literal_offset & 3u, 0u);
DCHECK_EQ(patch_offset & 3u, 0u);
DCHECK_EQ(target_offset & 3u, 0u);
- uint32_t displacement = CalculateDisplacement(patch_offset, target_offset & ~1u);
+ uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u);
DCHECK_EQ(displacement & 3u, 0u);
DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed.
uint32_t insn = (displacement & 0x0fffffffu) >> 2;
@@ -253,15 +295,184 @@
}
}
-std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() {
- // The thunk just uses the entry point in the ArtMethod. This works even for calls
- // to the generic JNI and interpreter trampolines.
+void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) {
+ DCHECK_ALIGNED(patch_offset, 4u);
+ uint32_t literal_offset = patch.LiteralOffset();
+ DCHECK_ALIGNED(literal_offset, 4u);
+ DCHECK_LT(literal_offset, code->size());
+ uint32_t insn = GetInsn(code, literal_offset);
+ DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched)
+ ThunkKey key = GetBakerReadBarrierKey(patch);
+ if (kIsDebugBuild) {
+ // Check that the next instruction matches the expected LDR.
+ switch (key.GetType()) {
+ case ThunkType::kBakerReadBarrierField: {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(code, literal_offset + 4u);
+ // LDR (immediate) with correct base_reg.
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5));
+ break;
+ }
+ case ThunkType::kBakerReadBarrierRoot: {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn(code, literal_offset - 4u);
+ // LDR (immediate) with correct root_reg.
+ CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType());
+ UNREACHABLE();
+ }
+ }
+ uint32_t target_offset = GetThunkTargetOffset(key, patch_offset);
+ DCHECK_ALIGNED(target_offset, 4u);
+ uint32_t disp = target_offset - patch_offset;
+ DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u); // 21-bit signed.
+ insn |= (disp << (5 - 2)) & 0x00ffffe0u; // Shift bits 2-20 to 5-23.
+ SetInsn(code, literal_offset, insn);
+}
+
+ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey(
+ const LinkerPatch& patch) {
+ DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch);
+ uint32_t value = patch.GetBakerCustomValue1();
+ BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value);
+ ThunkParams params;
+ switch (type) {
+ case BakerReadBarrierKind::kField:
+ params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value);
+ CheckValidReg(params.offset_params.base_reg);
+ params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value);
+ CheckValidReg(params.offset_params.holder_reg);
+ break;
+ case BakerReadBarrierKind::kGcRoot:
+ params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value);
+ CheckValidReg(params.root_params.root_reg);
+ params.root_params.dummy = 0u;
+ DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type);
+ UNREACHABLE();
+ }
+ constexpr uint8_t kTypeTranslationOffset = 1u;
+ static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset ==
+ static_cast<uint32_t>(ThunkType::kBakerReadBarrierField),
+ "Thunk type translation check.");
+ static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset ==
+ static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot),
+ "Thunk type translation check.");
+ return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset),
+ params);
+}
+
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
+ vixl::aarch64::Register base_reg,
+ vixl::aarch64::MemOperand& lock_word,
+ vixl::aarch64::Label* slow_path) {
+ using namespace vixl::aarch64; // NOLINT(build/namespaces)
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip0.W(), lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4, "Check field LDR offset");
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == -4, "Check array LDR offset");
+ __ Sub(lr, lr, 4); // Adjust the return address one instruction back to the LDR.
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32));
+ __ Br(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) {
+ using namespace vixl::aarch64; // NOLINT(build/namespaces)
ArenaPool pool;
ArenaAllocator arena(&pool);
arm64::Arm64Assembler assembler(&arena);
- Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArm64PointerSize).Int32Value());
- assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+
+ switch (key.GetType()) {
+ case ThunkType::kMethodCall: {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64PointerSize).Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ break;
+ }
+ case ThunkType::kBakerReadBarrierField: {
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint (in IP1) that performs further checks
+ // on the reference and marks it if needed.
+ auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg);
+ auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ // If base_reg differs from holder_reg, the offset was too large and we must have
+ // emitted an explicit null check before the load. Otherwise, we need to null-check
+ // the holder as we do not necessarily do that check before going to the thunk.
+ vixl::aarch64::Label throw_npe;
+ if (holder_reg.Is(base_reg)) {
+ __ Cbz(holder_reg.W(), &throw_npe);
+ }
+ vixl::aarch64::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset.
+ __ Ubfx(ip0, ip0, 10, 12); // Extract the offset.
+ __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference.
+ __ Br(ip1); // Jump to the entrypoint.
+ if (holder_reg.Is(base_reg)) {
+ // Add null check slow path. The stack map is at the address pointed to by LR.
+ __ Bind(&throw_npe);
+ int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value();
+ __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset));
+ __ Br(ip0);
+ }
+ break;
+ }
+ case ThunkType::kBakerReadBarrierRoot: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label return_label, not_marked, forwarding_address;
+ __ Cbz(root_reg, &return_label);
+ MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip0.W(), lock_word);
+ __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked);
+ __ Bind(&return_label);
+ __ Br(lr);
+ __ Bind(¬_marked);
+ __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
+ __ B(&forwarding_address, mi);
+ // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
+ // art_quick_read_barrier_mark_introspection_gc_roots.
+ __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ __ Mov(ip0.W(), root_reg);
+ __ Br(ip1);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
+ __ Br(lr);
+ break;
+ }
+ }
+
// Ensure we emit the literal pool.
assembler.FinalizeCode();
std::vector<uint8_t> thunk_code(assembler.CodeSize());
@@ -270,6 +481,28 @@
return thunk_code;
}
+#undef __
+
+uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) {
+ switch (type) {
+ case ThunkType::kMethodCall:
+ return kMaxMethodCallPositiveDisplacement;
+ case ThunkType::kBakerReadBarrierField:
+ case ThunkType::kBakerReadBarrierRoot:
+ return kMaxBcondPositiveDisplacement;
+ }
+}
+
+uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) {
+ switch (type) {
+ case ThunkType::kMethodCall:
+ return kMaxMethodCallNegativeDisplacement;
+ case ThunkType::kBakerReadBarrierField:
+ case ThunkType::kBakerReadBarrierRoot:
+ return kMaxBcondNegativeDisplacement;
+ }
+}
+
uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) {
return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg.
// Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30.
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index a4a8018..7887cea 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_
#include "base/array_ref.h"
+#include "base/bit_field.h"
#include "linker/arm/relative_patcher_arm_base.h"
namespace art {
@@ -25,6 +26,27 @@
class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher {
public:
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kGcRoot, // GC root load.
+ kLast
+ };
+
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg);
+ }
+
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ CheckValidReg(root_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ }
+
Arm64RelativePatcher(RelativePatcherTargetProvider* provider,
const Arm64InstructionSetFeatures* features);
@@ -41,9 +63,33 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
+
+ protected:
+ static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+ ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE;
+ std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE;
+ uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE;
+ uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE;
private:
- static std::vector<uint8_t> CompileThunkCode();
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBitsForRegister = 5u;
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < 30u && reg != 16u && reg != 17u);
+ }
+
static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp);
static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset,
@@ -54,15 +100,6 @@
template <typename Alloc>
static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset);
- // Maximum positive and negative displacement measured from the patch location.
- // (Signed 28 bit displacement with the last bit 0 has range [-2^27, 2^27-4] measured from
- // the ARM64 PC pointing to the BL.)
- static constexpr uint32_t kMaxPositiveDisplacement = (1u << 27) - 4u;
- static constexpr uint32_t kMaxNegativeDisplacement = (1u << 27);
-
- // The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes.
- static constexpr uint32_t kAdrpThunkSize = 8u;
-
const bool fix_cortex_a53_843419_;
// Map original patch_offset to thunk offset.
std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_;
@@ -70,6 +107,8 @@
size_t processed_adrp_thunks_;
std::vector<uint8_t> current_method_thunks_;
+ friend class Arm64RelativePatcherTest;
+
DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher);
};
diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc
index 9932c79..b4d35ab 100644
--- a/compiler/linker/arm64/relative_patcher_arm64_test.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc
@@ -14,8 +14,11 @@
* limitations under the License.
*/
+#include "base/casts.h"
#include "linker/relative_patcher_test.h"
#include "linker/arm64/relative_patcher_arm64.h"
+#include "lock_word.h"
+#include "mirror/object.h"
#include "oat_quick_method_header.h"
namespace art {
@@ -32,6 +35,9 @@
static const uint8_t kNopRawCode[];
static const ArrayRef<const uint8_t> kNopCode;
+ // NOP instruction.
+ static constexpr uint32_t kNopInsn = 0xd503201f;
+
// All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits.
static constexpr uint32_t kBlPlus0 = 0x94000000u;
static constexpr uint32_t kBPlus0 = 0x14000000u;
@@ -40,7 +46,7 @@
static constexpr uint32_t kBlPlusMax = 0x95ffffffu;
static constexpr uint32_t kBlMinusMax = 0x96000000u;
- // LDR immediate, 32-bit.
+ // LDR immediate, unsigned offset.
static constexpr uint32_t kLdrWInsn = 0xb9400000u;
// ADD/ADDS/SUB/SUBS immediate, 64-bit.
@@ -61,6 +67,34 @@
static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu;
static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu;
+ // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes.
+ static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011;
+
+ void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+ CHECK_LE(pos, code->size());
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn),
+ static_cast<uint8_t>(insn >> 8),
+ static_cast<uint8_t>(insn >> 16),
+ static_cast<uint8_t>(insn >> 24),
+ };
+ static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ }
+
+ void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
+ InsertInsn(code, code->size(), insn);
+ }
+
+ std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
+ std::vector<uint8_t> raw_code;
+ raw_code.reserve(insns.size() * 4u);
+ for (uint32_t insn : insns) {
+ PushBackInsn(&raw_code, insn);
+ }
+ return raw_code;
+ }
+
uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
const ArrayRef<const LinkerPatch>& method1_patches,
const ArrayRef<const uint8_t>& last_method_code,
@@ -93,8 +127,7 @@
uint32_t chunk_code_size =
chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader);
gap_code.resize(chunk_code_size, 0u);
- AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code),
- ArrayRef<const LinkerPatch>());
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code));
method_idx += 1u;
chunk_start += chunk_size;
chunk_size = kSmallChunkSize; // For all but the first chunk.
@@ -112,7 +145,7 @@
// There may be a thunk before method2.
if (last_result.second != last_method_offset) {
// Thunk present. Check that there's only one.
- uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + ThunkSize();
+ uint32_t thunk_end = CompiledCode::AlignCode(gap_end, kArm64) + MethodCallThunkSize();
uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end);
CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader));
}
@@ -126,37 +159,49 @@
return result.second;
}
- uint32_t ThunkSize() {
- return static_cast<Arm64RelativePatcher*>(patcher_.get())->thunk_code_.size();
+ std::vector<uint8_t> CompileMethodCallThunk() {
+ ArmBaseRelativePatcher::ThunkKey key(
+ ArmBaseRelativePatcher::ThunkType::kMethodCall,
+ ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces)
+ return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key);
+ }
+
+ uint32_t MethodCallThunkSize() {
+ return CompileMethodCallThunk().size();
}
bool CheckThunk(uint32_t thunk_offset) {
- Arm64RelativePatcher* patcher = static_cast<Arm64RelativePatcher*>(patcher_.get());
- ArrayRef<const uint8_t> expected_code(patcher->thunk_code_);
+ const std::vector<uint8_t> expected_code = CompileMethodCallThunk();
if (output_.size() < thunk_offset + expected_code.size()) {
LOG(ERROR) << "output_.size() == " << output_.size() << " < "
<< "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size());
return false;
}
ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size());
- if (linked_code == expected_code) {
+ if (linked_code == ArrayRef<const uint8_t>(expected_code)) {
return true;
}
// Log failure info.
- DumpDiff(expected_code, linked_code);
+ DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code);
return false;
}
+ std::vector<uint8_t> GenNops(size_t num_nops) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 4u + 4u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ PushBackInsn(&result, kNopInsn);
+ }
+ return result;
+ }
+
std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) {
std::vector<uint8_t> result;
result.reserve(num_nops * 4u + 4u);
for (size_t i = 0; i != num_nops; ++i) {
- result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+ PushBackInsn(&result, kNopInsn);
}
- result.push_back(static_cast<uint8_t>(bl));
- result.push_back(static_cast<uint8_t>(bl >> 8));
- result.push_back(static_cast<uint8_t>(bl >> 16));
- result.push_back(static_cast<uint8_t>(bl >> 24));
+ PushBackInsn(&result, bl);
return result;
}
@@ -167,7 +212,7 @@
std::vector<uint8_t> result;
result.reserve(num_nops * 4u + 8u);
for (size_t i = 0; i != num_nops; ++i) {
- result.insert(result.end(), kNopCode.begin(), kNopCode.end());
+ PushBackInsn(&result, kNopInsn);
}
CHECK_ALIGNED(method_offset, 4u);
CHECK_ALIGNED(target_offset, 4u);
@@ -188,14 +233,8 @@
((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5,
// We take the sign bit from the disp, limiting disp to +- 2GiB.
((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23.
- result.push_back(static_cast<uint8_t>(adrp));
- result.push_back(static_cast<uint8_t>(adrp >> 8));
- result.push_back(static_cast<uint8_t>(adrp >> 16));
- result.push_back(static_cast<uint8_t>(adrp >> 24));
- result.push_back(static_cast<uint8_t>(use_insn));
- result.push_back(static_cast<uint8_t>(use_insn >> 8));
- result.push_back(static_cast<uint8_t>(use_insn >> 16));
- result.push_back(static_cast<uint8_t>(use_insn >> 24));
+ PushBackInsn(&result, adrp);
+ PushBackInsn(&result, use_insn);
return result;
}
@@ -208,7 +247,7 @@
void TestNopsAdrpLdr(size_t num_nops, uint32_t dex_cache_arrays_begin, uint32_t element_offset) {
dex_cache_arrays_begin_ = dex_cache_arrays_begin;
auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, element_offset),
};
@@ -233,7 +272,7 @@
constexpr uint32_t kStringIndex = 1u;
string_index_to_offset_map_.Put(kStringIndex, string_offset);
auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched.
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex),
LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex),
};
@@ -247,16 +286,6 @@
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
- void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
- CHECK_LE(pos, code->size());
- const uint8_t insn_code[] = {
- static_cast<uint8_t>(insn), static_cast<uint8_t>(insn >> 8),
- static_cast<uint8_t>(insn >> 16), static_cast<uint8_t>(insn >> 24),
- };
- static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
- code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
- }
-
void PrepareNopsAdrpInsn2Ldr(size_t num_nops,
uint32_t insn2,
uint32_t dex_cache_arrays_begin,
@@ -264,7 +293,7 @@
dex_cache_arrays_begin_ = dex_cache_arrays_begin;
auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched.
InsertInsn(&code, num_nops * 4u + 4u, insn2);
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::DexCacheArrayPatch(num_nops * 4u , nullptr, num_nops * 4u, element_offset),
LinkerPatch::DexCacheArrayPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, element_offset),
};
@@ -279,7 +308,7 @@
string_index_to_offset_map_.Put(kStringIndex, string_offset);
auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched.
InsertInsn(&code, num_nops * 4u + 4u, insn2);
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex),
LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex),
};
@@ -329,7 +358,7 @@
InsertInsn(&expected_thunk_code, 4u, b_in);
ASSERT_EQ(expected_thunk_code.size(), 8u);
- uint32_t thunk_size = ThunkSize();
+ uint32_t thunk_size = MethodCallThunkSize();
ASSERT_EQ(thunk_offset + thunk_size, output_.size());
ASSERT_EQ(thunk_size, expected_thunk_code.size());
ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size);
@@ -433,6 +462,33 @@
uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10);
TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset);
}
+
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+ const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+ auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+ auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
+ uint32_t GetOutputInsn(uint32_t offset) {
+ CHECK_LE(offset, output_.size());
+ CHECK_GE(output_.size() - offset, 4u);
+ return (static_cast<uint32_t>(output_[offset]) << 0) |
+ (static_cast<uint32_t>(output_[offset + 1]) << 8) |
+ (static_cast<uint32_t>(output_[offset + 2]) << 16) |
+ (static_cast<uint32_t>(output_[offset + 3]) << 24);
+ }
+
+ void TestBakerField(uint32_t offset, uint32_t root_reg);
};
const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = {
@@ -458,24 +514,22 @@
};
TEST_F(Arm64RelativePatcherTestDefault, CallSelf) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
Link();
- static const uint8_t expected_code[] = {
- 0x00, 0x00, 0x00, 0x94
- };
+ const std::vector<uint8_t> expected_code = RawCode({kBlPlus0});
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
}
TEST_F(Arm64RelativePatcherTestDefault, CallOther) {
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
- LinkerPatch method2_patches[] = {
+ const LinkerPatch method2_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
@@ -486,9 +540,7 @@
uint32_t diff_after = method2_offset - method1_offset;
CHECK_ALIGNED(diff_after, 4u);
ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits.
- static const uint8_t method1_expected_code[] = {
- static_cast<uint8_t>(diff_after >> 2), 0x00, 0x00, 0x94
- };
+ const std::vector<uint8_t> method1_expected_code = RawCode({kBlPlus0 + (diff_after >> 2)});
EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code)));
uint32_t diff_before = method1_offset - method2_offset;
CHECK_ALIGNED(diff_before, 4u);
@@ -498,7 +550,7 @@
}
TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -518,7 +570,7 @@
constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs.
ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
- LinkerPatch last_method_patches[] = {
+ const LinkerPatch last_method_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index),
};
@@ -551,7 +603,7 @@
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap().
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
};
@@ -577,7 +629,7 @@
constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs.
ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
- LinkerPatch last_method_patches[] = {
+ const LinkerPatch last_method_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
};
@@ -603,7 +655,7 @@
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap().
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx),
};
@@ -620,9 +672,10 @@
uint32_t last_method_offset = GetMethodOffset(last_method_idx);
ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset));
uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader);
+ uint32_t thunk_size = MethodCallThunkSize();
uint32_t thunk_offset =
- RoundDown(last_method_header_offset - ThunkSize(), GetInstructionSetAlignment(kArm64));
- DCHECK_EQ(thunk_offset + ThunkSize() + CodeAlignmentSize(thunk_offset + ThunkSize()),
+ RoundDown(last_method_header_offset - thunk_size, GetInstructionSetAlignment(kArm64));
+ DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size),
last_method_header_offset);
uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1);
CHECK_ALIGNED(diff, 4u);
@@ -637,7 +690,7 @@
constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs.
ArrayRef<const uint8_t> last_method_code(last_method_raw_code);
ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size());
- LinkerPatch last_method_patches[] = {
+ const LinkerPatch last_method_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u),
};
@@ -832,5 +885,383 @@
TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8)
+void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ // LR and SP/ZR are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 16 * KB);
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data =
+ Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+ uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+ uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check CBZ uses the correct register, i.e. holder_reg.
+ ASSERT_GE(output_.size() - gray_check_offset, 4u);
+ ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ gray_check_offset +=4u;
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ static constexpr size_t kGrayCheckInsns = 5;
+ ASSERT_GE(output_.size() - gray_check_offset, 4u * kGrayCheckInsns);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (mirror::Object::MonitorOffset().Uint32Value() << (10 - 2)) |
+ (holder_reg << 5) |
+ /* ip0 */ 16;
+ EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset));
+ // Verify the gray bit check.
+ const uint32_t check_gray_bit_witout_offset =
+ 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16;
+ EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f);
+ // Verify the fake dependency.
+ const uint32_t fake_dependency =
+ 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32
+ (/* ip0 */ 16 << 16) | // Xm = ip0
+ (base_reg << 5) | // Xn = base_reg
+ base_reg; // Xd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn(gray_check_offset + 12u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+ }
+ }
+}
+
+#define TEST_BAKER_FIELD(offset, root_reg) \
+ TEST_F(Arm64RelativePatcherTestDefault, \
+ BakerOffset##offset##_##root_reg) { \
+ TestBakerField(offset, root_reg); \
+ }
+
+TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15)
+TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29)
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) {
+ // One thunk in the middle with maximum distance branches to it from both sides.
+ // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 4;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
+ // allows the branch to reach that thunk.
+ size_t filler1_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ // Allow reaching the thunk from the very beginning of a method 1MiB away. Backward branch
+ // reaches the full 1MiB. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t filler2_size =
+ 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ constexpr uint32_t kLiteralOffset2 = 0;
+ const std::vector<uint8_t> raw_code2 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ uint32_t first_method_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(5u);
+ EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+ const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0;
+ const uint32_t cbnz_max_backward = kCbnzIP1Plus0Insn | 0x00800000;
+ const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 = RawCode({cbnz_max_backward, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) {
+ // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction
+ // earlier, so the thunk is emitted before the filler.
+ // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 0;
+ const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
+ // allows the branch to reach that thunk.
+ size_t filler1_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ Link();
+
+ const uint32_t cbnz_offset = RoundUp(raw_code1.size(), kArm64Alignment) - kLiteralOffset1;
+ const uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+ const std::vector<uint8_t> expected_code1 = RawCode({cbnz, kLdrWInsn, kNopInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
+ // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded
+ // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
+ // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 4;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4
+ // allows the branch to reach that thunk.
+ size_t filler1_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ // If not for the extra NOP, this would allow reaching the thunk from the very beginning
+ // of a method 1MiB away. Backward branch reaches the full 1MiB. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t filler2_size =
+ 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ // Extra NOP compared to BakerOffsetThunkInTheMiddle.
+ constexpr uint32_t kLiteralOffset2 = 4;
+ const std::vector<uint8_t> raw_code2 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0;
+ const uint32_t cbnz_last_offset = RoundUp(raw_code2.size(), kArm64Alignment) - kLiteralOffset2;
+ const uint32_t cbnz_last = kCbnzIP1Plus0Insn | (cbnz_last_offset << (5 - 2));
+ const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 = RawCode({kNopInsn, cbnz_last, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved.
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
+ // LR and SP/ZR are reserved.
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 4u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kCbnzIP1Plus0Insn});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset);
+ uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2));
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({ldr, cbnz});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+ ASSERT_GE(output_.size() - thunk_offset, 4u);
+ ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f);
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment);
+ }
+}
+
+TEST_F(Arm64RelativePatcherTestDefault, BakerAndMethodCallInteraction) {
+ // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
+ // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
+ // hold when we're reserving thunks of different sizes. This test exposes the situation
+ // by using Baker thunks and a method call thunk.
+
+ // Add a method call patch that can reach to method 1 offset + 128MiB.
+ uint32_t method_idx = 0u;
+ constexpr size_t kMethodCallLiteralOffset = 4u;
+ constexpr uint32_t kMissingMethodIdx = 2u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
+ const LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
+ };
+ ArrayRef<const uint8_t> code1(raw_code1);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
+
+ // Skip kMissingMethodIdx.
+ ++method_idx;
+ ASSERT_EQ(kMissingMethodIdx, method_idx);
+ // Add a method with the right size that the method code for the next one starts 1MiB
+ // after code for method 1.
+ size_t filler_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> filler_code = GenNops(filler_size / 4u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ // Add 126 methods with 1MiB code+header, making the code for the next method start 1MiB
+ // before the currently scheduled MaxNextOffset() for the method call thunk.
+ for (uint32_t i = 0; i != 126; ++i) {
+ filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
+ filler_code = GenNops(filler_size / 4u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ }
+
+ // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
+ // 1MiB + kArm64Alignment, i.e. kArm64Alignment after the method call thunk, and the
+ // second that needs it kArm64Alignment after that. Given the size of the GC root thunk
+ // is more than the space required by the method call thunk plus kArm64Alignment,
+ // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
+ // thunk's pending MaxNextOffset() which needs to be adjusted.
+ ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArm64Alignment) + kArm64Alignment,
+ CompileBakerGcRootThunk(/* root_reg */ 0).size());
+ static_assert(kArm64Alignment == 16, "Code below assumes kArm64Alignment == 16");
+ constexpr size_t kBakerLiteralOffset1 = 4u + kArm64Alignment;
+ constexpr size_t kBakerLiteralOffset2 = 4u + 2 * kArm64Alignment;
+ // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | root_reg`.
+ const uint32_t ldr1 = kLdrWInsn | /* root_reg */ 1;
+ const uint32_t ldr2 = kLdrWInsn | /* root_reg */ 2;
+ const std::vector<uint8_t> last_method_raw_code = RawCode({
+ kNopInsn, kNopInsn, kNopInsn, kNopInsn, // Padding before first GC root read barrier.
+ ldr1, kCbnzIP1Plus0Insn, // First GC root LDR with read barrier.
+ kNopInsn, kNopInsn, // Padding before second GC root read barrier.
+ ldr2, kCbnzIP1Plus0Insn, // Second GC root LDR with read barrier.
+ });
+ uint32_t encoded_data1 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+ uint32_t encoded_data2 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+ const LinkerPatch last_method_patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx),
+ ArrayRef<const uint8_t>(last_method_raw_code),
+ ArrayRef<const LinkerPatch>(last_method_patches));
+
+ // The main purpose of the test is to check that Link() does not cause a crash.
+ Link();
+
+ ASSERT_EQ(127 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index fe5f9a9..8da530f 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -117,5 +117,11 @@
(*code)[literal_low_offset + 1] = static_cast<uint8_t>(diff >> 8);
}
+void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
index 4ff2f2f..852a345 100644
--- a/compiler/linker/mips/relative_patcher_mips.h
+++ b/compiler/linker/mips/relative_patcher_mips.h
@@ -41,6 +41,9 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
private:
// We'll maximize the range of a single load instruction for dex cache array accesses
diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc
index c479716..3488d6d 100644
--- a/compiler/linker/mips64/relative_patcher_mips64.cc
+++ b/compiler/linker/mips64/relative_patcher_mips64.cc
@@ -107,5 +107,11 @@
(*code)[literal_offset + 5] = static_cast<uint8_t>(diff >> 8);
}
+void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/mips64/relative_patcher_mips64.h b/compiler/linker/mips64/relative_patcher_mips64.h
index 8ef8ceb..f478d7f 100644
--- a/compiler/linker/mips64/relative_patcher_mips64.h
+++ b/compiler/linker/mips64/relative_patcher_mips64.h
@@ -39,6 +39,9 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
private:
DISALLOW_COPY_AND_ASSIGN(Mips64RelativePatcher);
diff --git a/compiler/linker/multi_oat_relative_patcher.h b/compiler/linker/multi_oat_relative_patcher.h
index dbda03f..247b290 100644
--- a/compiler/linker/multi_oat_relative_patcher.h
+++ b/compiler/linker/multi_oat_relative_patcher.h
@@ -112,6 +112,13 @@
relative_patcher_->PatchPcRelativeReference(code, patch, patch_offset, target_offset);
}
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) {
+ patch_offset += adjustment_;
+ relative_patcher_->PatchBakerReadBarrierBranch(code, patch, patch_offset);
+ }
+
// Wrappers around RelativePatcher for statistics retrieval.
uint32_t CodeAlignmentSize() const;
uint32_t RelativeCallThunksSize() const;
diff --git a/compiler/linker/multi_oat_relative_patcher_test.cc b/compiler/linker/multi_oat_relative_patcher_test.cc
index 92a96a0..951588a 100644
--- a/compiler/linker/multi_oat_relative_patcher_test.cc
+++ b/compiler/linker/multi_oat_relative_patcher_test.cc
@@ -63,7 +63,7 @@
if (next_write_call_thunk_ != 0u) {
offset += next_write_call_thunk_;
std::vector<uint8_t> thunk(next_write_call_thunk_, 'c');
- bool success = WriteRelCallThunk(out, ArrayRef<const uint8_t>(thunk));
+ bool success = WriteThunk(out, ArrayRef<const uint8_t>(thunk));
CHECK(success);
next_write_call_thunk_ = 0u;
}
@@ -95,6 +95,12 @@
last_target_offset_ = target_offset;
}
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+ }
+
uint32_t last_reserve_offset_ = 0u;
MethodReference last_reserve_method_ = kNullMethodRef;
uint32_t next_reserve_adjustment_ = 0u;
diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc
index f1538b1..ee49453 100644
--- a/compiler/linker/relative_patcher.cc
+++ b/compiler/linker/relative_patcher.cc
@@ -75,6 +75,12 @@
LOG(FATAL) << "Unexpected relative dex cache array patch.";
}
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "Unexpected baker read barrier branch patch.";
+ }
+
private:
DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone);
};
@@ -127,7 +133,7 @@
return true;
}
-bool RelativePatcher::WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
+bool RelativePatcher::WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) {
if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) {
return false;
}
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
index 15e955b..38c8228 100644
--- a/compiler/linker/relative_patcher.h
+++ b/compiler/linker/relative_patcher.h
@@ -109,6 +109,11 @@
uint32_t patch_offset,
uint32_t target_offset) = 0;
+ // Patch a branch to a Baker read barrier thunk.
+ virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) = 0;
+
protected:
RelativePatcher()
: size_code_alignment_(0u),
@@ -117,7 +122,7 @@
}
bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
- bool WriteRelCallThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
+ bool WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk);
private:
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 908cb41..d9a87a0 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -76,9 +76,10 @@
return MethodReference(nullptr, method_idx);
}
- void AddCompiledMethod(MethodReference method_ref,
- const ArrayRef<const uint8_t>& code,
- const ArrayRef<const LinkerPatch>& patches) {
+ void AddCompiledMethod(
+ MethodReference method_ref,
+ const ArrayRef<const uint8_t>& code,
+ const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()) {
compiled_method_refs_.push_back(method_ref);
compiled_methods_.emplace_back(new CompiledMethod(
&driver_,
@@ -169,6 +170,10 @@
patch,
offset + patch.LiteralOffset(),
target_offset);
+ } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) {
+ patcher_->PatchBakerReadBarrierBranch(&patched_code_,
+ patch,
+ offset + patch.LiteralOffset());
} else {
LOG(FATAL) << "Bad patch type. " << patch.GetType();
UNREACHABLE();
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 768d31a..6967b0b 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -56,5 +56,11 @@
(*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24);
}
+void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h
index fbf9ad4..63a8338 100644
--- a/compiler/linker/x86/relative_patcher_x86.h
+++ b/compiler/linker/x86/relative_patcher_x86.h
@@ -30,6 +30,9 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
};
} // namespace linker
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
index 2ff6930..156ece9 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -34,5 +34,11 @@
reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement;
}
+void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED,
+ const LinkerPatch& patch ATTRIBUTE_UNUSED,
+ uint32_t patch_offset ATTRIBUTE_UNUSED) {
+ LOG(FATAL) << "UNIMPLEMENTED";
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h
index 11bb6d5..4f3ec49 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.h
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.h
@@ -30,6 +30,9 @@
const LinkerPatch& patch,
uint32_t patch_offset,
uint32_t target_offset) OVERRIDE;
+ void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code,
+ const LinkerPatch& patch,
+ uint32_t patch_offset) OVERRIDE;
};
} // namespace linker
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 105db1d..1781643 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1348,6 +1348,12 @@
PatchObjectAddress(&patched_code_, literal_offset, type);
break;
}
+ case LinkerPatch::Type::kBakerReadBarrierBranch: {
+ writer_->relative_patcher_->PatchBakerReadBarrierBranch(&patched_code_,
+ patch,
+ offset_ + literal_offset);
+ break;
+ }
default: {
DCHECK(false) << "Unexpected linker patch type: " << patch.GetType();
break;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 794e05c..3d93553 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm64.h"
+#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -25,6 +26,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
+#include "linker/arm64/relative_patcher_arm64.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "offsets.h"
@@ -80,6 +82,26 @@
// generates less code/data with a small num_entries.
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
+// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// Some instructions have special requirements for a temporary, for example
+// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
+// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
+// loads with large offsets need a fixed register to limit the number of link-time
+// thunks we generate. For these and similar cases, we want to reserve a specific
+// register that's neither callee-save nor an argument register. We choose x15.
+inline Location FixedTempLocation() {
+ return Location::RegisterLocation(x15.GetCode());
+}
+
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
@@ -297,23 +319,22 @@
constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier);
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the page address of
- // the entry which is in a scratch register. Make sure it's not used for saving/restoring
- // registers. Exclude the scratch register also for non-Baker read barrier for simplicity.
+ InvokeRuntimeCallingConvention calling_convention;
+ // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp
+ // register, make sure it's not clobbered by the call or by saving/restoring registers.
DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
bool is_load_class_bss_entry =
(cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry);
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
if (is_load_class_bss_entry) {
- // This temp is a scratch register.
DCHECK(bss_entry_temp_.IsValid());
- temps.Exclude(bss_entry_temp_);
+ DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0)));
+ DCHECK(
+ !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_));
}
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
dex::TypeIndex type_index = cls_->GetTypeIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
@@ -386,14 +407,15 @@
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
- UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
- temps.Exclude(temp_);
+ InvokeRuntimeCallingConvention calling_convention;
+ // Make sure `temp_` is not clobbered by the call or by saving/restoring registers.
+ DCHECK(temp_.IsValid());
+ DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0)));
+ DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_));
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- InvokeRuntimeCallingConvention calling_convention;
const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
__ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
@@ -1415,6 +1437,7 @@
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -2206,7 +2229,8 @@
}
}
-void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
+ const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
bool object_field_get_with_read_barrier =
@@ -2220,7 +2244,17 @@
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !field_info.IsVolatile()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // non-volatile loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(FixedTempLocation());
+ }
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -2249,7 +2283,8 @@
// Object FieldGet with Baker's read barrier case.
// /* HeapReference<Object> */ out = *(base + offset)
Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
- Register temp = WRegisterFrom(locations->GetTemp(0));
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
// Note that potential implicit null checks are handled in this
// CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -2257,7 +2292,7 @@
out,
base,
offset,
- temp,
+ maybe_temp,
/* needs_null_check */ true,
field_info.IsVolatile());
} else {
@@ -2642,7 +2677,21 @@
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(FixedTempLocation());
+ }
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2678,11 +2727,25 @@
if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
- Register temp = WRegisterFrom(locations->GetTemp(0));
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj.W(),
+ offset,
+ maybe_temp,
+ /* needs_null_check */ true,
+ /* use_load_acquire */ false);
+ } else {
+ Register temp = WRegisterFrom(locations->GetTemp(0));
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ }
} else {
// General case.
MemOperand source = HeapOperand(obj);
@@ -3712,7 +3775,7 @@
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
- HandleFieldGet(instruction);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -4514,6 +4577,11 @@
return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
const DexFile& dex_file,
uint32_t offset_or_index,
@@ -4612,7 +4680,8 @@
pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
pc_relative_type_patches_.size() +
- type_bss_entry_patches_.size();
+ type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -4646,6 +4715,10 @@
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
@@ -4758,8 +4831,7 @@
if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
- // to the custom calling convention) or by marking, so we shall use IP1.
+ locations->AddTemp(FixedTempLocation());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
@@ -4836,11 +4908,7 @@
// Add ADRP with its PC-relative Class .bss entry patch.
const DexFile& dex_file = cls->GetDexFile();
dex::TypeIndex type_index = cls->GetTypeIndex();
- // We can go to slow path even with non-zero reference and in that case marking
- // can clobber IP0, so we need to use IP1 which shall be preserved.
- bss_entry_temp = ip1;
- UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- temps.Exclude(bss_entry_temp);
+ bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0));
bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp);
// Add LDR with its PC-relative Class patch.
@@ -4947,8 +5015,7 @@
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
- // Note that IP0 may be clobbered by saving/restoring the live register (only one thanks
- // to the custom calling convention) or by marking, so we shall use IP1.
+ locations->AddTemp(FixedTempLocation());
RegisterSet caller_saves = RegisterSet::Empty();
InvokeRuntimeCallingConvention calling_convention;
caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
@@ -4999,11 +5066,7 @@
const DexFile& dex_file = load->GetDexFile();
const dex::StringIndex string_index = load->GetStringIndex();
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // We could use IP0 as the marking shall not clobber IP0 if the reference is null and
- // that's when we need the slow path. But let's not rely on such details and use IP1.
- Register temp = ip1;
- UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- temps.Exclude(temp);
+ Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
// Add LDR with its PC-relative String patch.
@@ -5438,7 +5501,7 @@
}
void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
- HandleFieldGet(instruction);
+ HandleFieldGet(instruction, instruction->GetFieldInfo());
}
void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
@@ -5747,7 +5810,6 @@
Register out_reg = RegisterFrom(out, type);
if (read_barrier_option == kWithReadBarrier) {
CHECK(kEmitCompilerReadBarrier);
- Register temp_reg = RegisterFrom(maybe_temp, type);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
@@ -5755,7 +5817,7 @@
out,
out_reg,
offset,
- temp_reg,
+ maybe_temp,
/* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
@@ -5763,6 +5825,7 @@
// Save the value of `out` into `maybe_temp` before overwriting it
// in the following move operation, as we will need it for the
// read barrier below.
+ Register temp_reg = RegisterFrom(maybe_temp, type);
__ Mov(temp_reg, out_reg);
// /* HeapReference<Object> */ out = *(out + offset)
__ Ldr(out_reg, HeapOperand(out_reg, offset));
@@ -5790,13 +5853,12 @@
CHECK(kEmitCompilerReadBarrier);
if (kUseBakerReadBarrier) {
// Load with fast path based Baker's read barrier.
- Register temp_reg = RegisterFrom(maybe_temp, type);
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
out,
obj_reg,
offset,
- temp_reg,
+ maybe_temp,
/* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
@@ -5827,52 +5889,97 @@
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Register temp = lr;
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
- instruction, root, /* entrypoint */ LocationFrom(temp));
- codegen_->AddSlowPath(slow_path);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data =
+ linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ Ldr(temp, MemOperand(tr, entry_point_offset));
-
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- if (fixup_label == nullptr) {
- __ Ldr(root_reg, MemOperand(obj, offset));
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ if (fixup_label != nullptr) {
+ __ Bind(fixup_label);
+ }
+ static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 instruction (8B) before the return address label.");
+ __ ldr(root_reg, MemOperand(obj.X(), offset));
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ __ Bind(&return_address);
} else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
- }
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ Cbnz(temp, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Register temp = lr;
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(
+ instruction, root, /* entrypoint */ LocationFrom(temp));
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ Ldr(temp, MemOperand(tr, entry_point_offset));
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ if (fixup_label == nullptr) {
+ __ Ldr(root_reg, MemOperand(obj, offset));
+ } else {
+ codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
+ }
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -5902,13 +6009,76 @@
Location ref,
Register obj,
uint32_t offset,
- Register temp,
+ Location maybe_temp,
bool needs_null_check,
bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !use_load_acquire &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` the read barrier mark introspection entrypoint.
+ // If `temp` is null, it means that `GetIsGcMarking()` is false, and
+ // vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> root = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ DCHECK(maybe_temp.IsRegister());
+ base = WRegisterFrom(maybe_temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base.GetCode(),
+ obj.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == -4,
+ "Field LDR must be 1 instruction (4B) before the return address label.");
+ __ ldr(RegisterFrom(ref, Primitive::kPrimNot), MemOperand(base.X(), offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
+ Register temp = WRegisterFrom(maybe_temp);
Location no_index = Location::NoLocation();
size_t no_scale_factor = 0u;
GenerateReferenceLoadWithBakerReadBarrier(instruction,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 10d8b84..723507b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -351,7 +351,7 @@
private:
void HandleBinaryOp(HBinaryOperation* instr);
void HandleFieldSet(HInstruction* instruction);
- void HandleFieldGet(HInstruction* instruction);
+ void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
void HandleInvoke(HInvoke* instr);
void HandleCondition(HCondition* instruction);
void HandleShift(HBinaryOperation* instr);
@@ -578,6 +578,10 @@
uint32_t element_offset,
vixl::aarch64::Label* adrp_label = nullptr);
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the CBNZ instruction.
+ vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index);
@@ -609,7 +613,7 @@
Location ref,
vixl::aarch64::Register obj,
uint32_t offset,
- vixl::aarch64::Register temp,
+ Location maybe_temp,
bool needs_null_check,
bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
@@ -737,6 +741,13 @@
vixl::aarch64::Label* pc_insn_label;
};
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ vixl::aarch64::Label label;
+ uint32_t custom_data;
+ };
+
vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
vixl::aarch64::Label* adrp_label,
@@ -776,6 +787,8 @@
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 423fd3c..77dcb5a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2507,9 +2507,11 @@
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
UseScratchRegisterScope temps(masm);
+ Location temp3_loc; // Used only for Baker read barrier.
Register temp3;
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- temp3 = WRegisterFrom(locations->GetTemp(2));
+ temp3_loc = locations->GetTemp(2);
+ temp3 = WRegisterFrom(temp3_loc);
} else {
temp3 = temps.AcquireW();
}
@@ -2527,7 +2529,7 @@
temp1_loc,
src.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// Bail out if the source is not a non primitive array.
@@ -2536,7 +2538,7 @@
temp1_loc,
temp1,
component_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
@@ -2553,7 +2555,7 @@
temp1_loc,
dest.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
@@ -2570,7 +2572,7 @@
temp2_loc,
temp1,
component_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
@@ -2589,7 +2591,7 @@
temp2_loc,
src.W(),
class_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
@@ -2603,7 +2605,7 @@
temp1_loc,
temp1,
component_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// /* HeapReference<Class> */ temp1 = temp1->super_class_
@@ -2687,7 +2689,7 @@
temp1_loc,
src.W(),
class_offset,
- temp2,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
// /* HeapReference<Class> */ temp2 = temp1->component_type_
@@ -2695,7 +2697,7 @@
temp2_loc,
temp1,
component_offset,
- temp3,
+ temp3_loc,
/* needs_null_check */ false,
/* use_load_acquire */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());