ARM: Link-time generated thunks for Baker CC read barrier.
Remaining work for follow-up CLs:
- use implicit null check in field thunk,
- use 16-bit LDRs for fields and GC roots.
Test: m test-art-target-gtest
Test: testrunner.py --target on Nexus 6P.
Test: testrunner.py --target on Nexus 6P with heap poisoning enabled.
Test: Repeat the above tests with ART_USE_OLD_ARM_BACKEND=true.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Iad5addab72d790a9d61879f61f2e75b246bcdf5a
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index f08270d..8bc3eb4 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -14,8 +14,12 @@
* limitations under the License.
*/
+#include "base/casts.h"
#include "linker/relative_patcher_test.h"
#include "linker/arm/relative_patcher_thumb2.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+#include "mirror/object.h"
#include "oat_quick_method_header.h"
namespace art {
@@ -34,13 +38,99 @@
static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode;
static const uint32_t kPcInsnOffset;
+ // The PC in Thumb mode is 4 bytes after the instruction location.
+ static constexpr uint32_t kPcAdjustment = 4u;
+
// Branches within range [-256, 256) can be created from these by adding the low 8 bits.
- static constexpr uint32_t kBlPlus0 = 0xf000f800;
- static constexpr uint32_t kBlMinus256 = 0xf7ffff00;
+ static constexpr uint32_t kBlPlus0 = 0xf000f800u;
+ static constexpr uint32_t kBlMinus256 = 0xf7ffff00u;
// Special BL values.
- static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff;
- static constexpr uint32_t kBlMinusMax = 0xf400d000;
+ static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu;
+ static constexpr uint32_t kBlMinusMax = 0xf400d000u;
+
+ // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
+ static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+
+ // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
+ static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
+
+ // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract.
+ static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u;
+
+ // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift.
+ static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u;
+
+ // NOP instructions.
+ static constexpr uint32_t kNopInsn = 0xbf00u;
+ static constexpr uint32_t kNopWInsn = 0xf3af8000u;
+
+ void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) {
+ CHECK_LE(pos, code->size());
+ if (IsUint<16>(insn)) {
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn),
+ static_cast<uint8_t>(insn >> 8),
+ };
+ static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ } else {
+ const uint8_t insn_code[] = {
+ static_cast<uint8_t>(insn >> 16),
+ static_cast<uint8_t>(insn >> 24),
+ static_cast<uint8_t>(insn),
+ static_cast<uint8_t>(insn >> 8),
+ };
+ static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code).");
+ code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code));
+ }
+ }
+
+ void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) {
+ InsertInsn(code, code->size(), insn);
+ }
+
+ std::vector<uint8_t> GenNops(size_t num_nops) {
+ std::vector<uint8_t> result;
+ result.reserve(num_nops * 2u);
+ for (size_t i = 0; i != num_nops; ++i) {
+ PushBackInsn(&result, kNopInsn);
+ }
+ return result;
+ }
+
+ std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) {
+ std::vector<uint8_t> raw_code;
+ size_t number_of_16_bit_insns =
+ std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); });
+ raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u);
+ for (uint32_t insn : insns) {
+ PushBackInsn(&raw_code, insn);
+ }
+ return raw_code;
+ }
+
+ uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) {
+ if (!IsAligned<2u>(bne_offset)) {
+ LOG(ERROR) << "Unaligned bne_offset: " << bne_offset;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ if (!IsAligned<2u>(target_offset)) {
+ LOG(ERROR) << "Unaligned target_offset: " << target_offset;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ uint32_t diff = target_offset - bne_offset - kPcAdjustment;
+ DCHECK_ALIGNED(diff, 2u);
+ if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) {
+ LOG(ERROR) << "Target out of range: " << diff;
+ return 0xffffffffu; // Fails code diff later.
+ }
+ return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11
+ | (((diff >> 12) & 0x3fu) << 16) // imm6
+ | (((diff >> 18) & 1) << 13) // J1
+ | (((diff >> 19) & 1) << 11) // J2
+ | (((diff >> 20) & 1) << 26); // S
+ }
bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code,
const ArrayRef<const LinkerPatch>& method1_patches,
@@ -125,19 +215,57 @@
std::vector<uint8_t> result;
result.reserve(num_nops * 2u + 4u);
for (size_t i = 0; i != num_nops; ++i) {
- result.push_back(0x00);
- result.push_back(0xbf);
+ PushBackInsn(&result, kNopInsn);
}
- result.push_back(static_cast<uint8_t>(bl >> 16));
- result.push_back(static_cast<uint8_t>(bl >> 24));
- result.push_back(static_cast<uint8_t>(bl));
- result.push_back(static_cast<uint8_t>(bl >> 8));
+ PushBackInsn(&result, bl);
return result;
}
void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset);
void TestStringReference(uint32_t string_offset);
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
+
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+ const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+ auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg));
+ auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+ LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+ auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get());
+ ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch);
+ return patcher->CompileThunk(key);
+ }
+
+ uint32_t GetOutputInsn32(uint32_t offset) {
+ CHECK_LE(offset, output_.size());
+ CHECK_GE(output_.size() - offset, 4u);
+ return (static_cast<uint32_t>(output_[offset]) << 16) |
+ (static_cast<uint32_t>(output_[offset + 1]) << 24) |
+ (static_cast<uint32_t>(output_[offset + 2]) << 0) |
+ (static_cast<uint32_t>(output_[offset + 3]) << 8);
+ }
+
+ uint16_t GetOutputInsn16(uint32_t offset) {
+ CHECK_LE(offset, output_.size());
+ CHECK_GE(output_.size() - offset, 2u);
+ return (static_cast<uint32_t>(output_[offset]) << 0) |
+ (static_cast<uint32_t>(output_[offset + 1]) << 8);
+ }
+
+ void TestBakerField(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -164,7 +292,7 @@
void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin,
uint32_t element_offset) {
dex_cache_arrays_begin_ = dex_cache_arrays_begin;
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset),
LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset),
};
@@ -175,7 +303,7 @@
void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) {
constexpr uint32_t kStringIndex = 1u;
string_index_to_offset_map_.Put(kStringIndex, string_offset);
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex),
LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex),
};
@@ -214,7 +342,7 @@
}
TEST_F(Thumb2RelativePatcherTest, CallSelf) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -227,11 +355,11 @@
}
TEST_F(Thumb2RelativePatcherTest, CallOther) {
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches));
- LinkerPatch method2_patches[] = {
+ const LinkerPatch method2_patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 1u),
};
AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches));
@@ -254,7 +382,7 @@
}
TEST_F(Thumb2RelativePatcherTest, CallTrampoline) {
- LinkerPatch patches[] = {
+ const LinkerPatch patches[] = {
LinkerPatch::RelativeCodePatch(0u, nullptr, 2u),
};
AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches));
@@ -274,7 +402,7 @@
constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index),
};
@@ -303,7 +431,7 @@
constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
};
@@ -325,7 +453,7 @@
constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
};
@@ -347,7 +475,7 @@
constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs.
ArrayRef<const uint8_t> method1_code(method1_raw_code);
ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size());
- LinkerPatch method1_patches[] = {
+ const LinkerPatch method1_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u),
};
@@ -382,7 +510,7 @@
constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs.
ArrayRef<const uint8_t> method3_code(method3_raw_code);
ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size());
- LinkerPatch method3_patches[] = {
+ const LinkerPatch method3_patches[] = {
LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u),
};
@@ -445,5 +573,535 @@
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
+void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 4 * KB);
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+#define TEST_BAKER_FIELD(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffset##offset##_##ref_reg) { \
+ TestBakerField(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7)
+TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11)
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
+ // One thunk in the middle with maximum distance branches to it from both sides.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 6u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ constexpr uint32_t kLiteralOffset2 = 4;
+ static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment),
+ "PC for BNE must be aligned.");
+
+ // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch
+ // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t filler2_size =
+ 1 * MB - (kLiteralOffset2 + kPcAdjustment)
+ - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ uint32_t first_method_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(5u);
+ EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+ const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+ const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000;
+ const std::vector<uint8_t> expected_code1 =
+ RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) {
+ // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction
+ // earlier, so the thunk is emitted before the filler.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 4u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ Link();
+
+ const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment));
+ const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) {
+ // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded
+ // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end.
+ // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`.
+ constexpr uint32_t kLiteralOffset1 = 6u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code1(raw_code1);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ const LinkerPatch patches1[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1));
+
+ constexpr uint32_t expected_thunk_offset =
+ kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u);
+ static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned.");
+ size_t filler1_size = expected_thunk_offset -
+ RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment);
+ std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u);
+ ArrayRef<const uint8_t> filler1_code(raw_filler1_code);
+ AddCompiledMethod(MethodRef(2u), filler1_code);
+
+ // Enforce thunk reservation with a tiny method.
+ AddCompiledMethod(MethodRef(3u), kNopCode);
+
+ constexpr uint32_t kReachableFromOffset2 = 4;
+ constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2;
+ static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment),
+ "PC for BNE must be aligned.");
+
+ // If not for the extra NOP, this would allow reaching the thunk from the BNE
+ // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take
+ // PC adjustment into account. Things to subtract:
+ // - thunk size and method 3 pre-header, rounded up (padding in between if needed)
+ // - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
+ // - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
+ size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t filler2_size =
+ 1 * MB - (kReachableFromOffset2 + kPcAdjustment)
+ - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u);
+ ArrayRef<const uint8_t> filler2_code(raw_filler2_code);
+ AddCompiledMethod(MethodRef(4u), filler2_code);
+
+ // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle.
+ const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
+ ArrayRef<const uint8_t> code2(raw_code2);
+ const LinkerPatch patches2[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data),
+ };
+ AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2));
+
+ Link();
+
+ uint32_t first_method_offset = GetMethodOffset(1u);
+ uint32_t last_method_offset = GetMethodOffset(5u);
+ EXPECT_EQ(2 * MB, last_method_offset - first_method_offset);
+
+ const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff;
+ const uint32_t bne_last =
+ BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment));
+ const std::vector<uint8_t> expected_code1 =
+ RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn});
+ const std::vector<uint8_t> expected_code2 =
+ RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn});
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1)));
+ ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerArray) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ auto ldr = [](uint32_t base_reg) {
+ uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
+ uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
+ return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12);
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the lock word for gray bit check is loaded from the correct address
+ // before the base_reg which points to the array data.
+ ASSERT_GE(output_.size() - thunk_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset;
+ ASSERT_LT(offset, 0);
+ ASSERT_GT(offset, -256);
+ const uint32_t load_lock_word =
+ kLdrNegativeOffset |
+ (-offset & 0xffu) |
+ (base_reg << 16) |
+ (/* IP */ 12 << 12);
+ EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency.
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ constexpr size_t kMethodCodeSize = 8u;
+ constexpr size_t kLiteralOffset = 4u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
+ if (root_reg < 8) {
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ } else {
+ ASSERT_GE(output_.size() - thunk_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ }
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) {
+ // Test 1MiB of patches to the same thunk to stress-test different large offsets.
+ // (The low bits are not that important but the location of the high bits is easy to get wrong.)
+ std::vector<uint8_t> code;
+ code.reserve(1 * MB);
+ const size_t num_patches = 1 * MB / 8u;
+ std::vector<LinkerPatch> patches;
+ patches.reserve(num_patches);
+ const uint32_t ldr =
+ kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0);
+ for (size_t i = 0; i != num_patches; ++i) {
+ PushBackInsn(&code, ldr);
+ PushBackInsn(&code, kBneWPlus0);
+ patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+ }
+ ASSERT_EQ(1 * MB, code.size());
+ ASSERT_EQ(num_patches, patches.size());
+ AddCompiledMethod(MethodRef(1u),
+ ArrayRef<const uint8_t>(code),
+ ArrayRef<const LinkerPatch>(patches));
+ Link();
+
+ // The thunk is right after the method code.
+ DCHECK_ALIGNED(1 * MB, kArmAlignment);
+ std::vector<uint8_t> expected_code;
+ for (size_t i = 0; i != num_patches; ++i) {
+ PushBackInsn(&expected_code, ldr);
+ PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB));
+ patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data));
+ }
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code)));
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) {
+ // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());`
+ // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily
+ // hold when we're reserving thunks of different sizes. This test exposes the situation
+ // by using Baker thunks and a method call thunk.
+
+ // Add a method call patch that can reach to method 1 offset + 16MiB.
+ uint32_t method_idx = 0u;
+ constexpr size_t kMethodCallLiteralOffset = 2u;
+ constexpr uint32_t kMissingMethodIdx = 2u;
+ const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0});
+ const LinkerPatch method1_patches[] = {
+ LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u),
+ };
+ ArrayRef<const uint8_t> code1(raw_code1);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches));
+
+ // Skip kMissingMethodIdx.
+ ++method_idx;
+ ASSERT_EQ(kMissingMethodIdx, method_idx);
+ // Add a method with the right size that the method code for the next one starts 1MiB
+ // after code for method 1.
+ size_t filler_size =
+ 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment)
+ - sizeof(OatQuickMethodHeader);
+ std::vector<uint8_t> filler_code = GenNops(filler_size / 2u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB
+ // before the currently scheduled MaxNextOffset() for the method call thunk.
+ for (uint32_t i = 0; i != 14; ++i) {
+ filler_size = 1 * MB - sizeof(OatQuickMethodHeader);
+ filler_code = GenNops(filler_size / 2u);
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code));
+ }
+
+ // Add 2 Baker GC root patches to the last method, one that would allow the thunk at
+ // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the
+ // second that needs it kArmAlignment after that. Given the size of the GC root thunk
+ // is more than the space required by the method call thunk plus kArmAlignment,
+ // this pushes the first GC root thunk's pending MaxNextOffset() before the method call
+ // thunk's pending MaxNextOffset() which needs to be adjusted.
+ ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
+ CompileBakerGcRootThunk(/* root_reg */ 0).size());
+ static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
+ constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
+ constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
+ // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`.
+ const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12);
+ const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12);
+ const std::vector<uint8_t> last_method_raw_code = RawCode({
+ kNopInsn, // Padding before first GC root read barrier.
+ ldr1, kBneWPlus0, // First GC root LDR with read barrier.
+ ldr2, kBneWPlus0, // Second GC root LDR with read barrier.
+ });
+ uint32_t encoded_data1 =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+ uint32_t encoded_data2 =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+ const LinkerPatch last_method_patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
+ LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx),
+ ArrayRef<const uint8_t>(last_method_raw_code),
+ ArrayRef<const LinkerPatch>(last_method_patches));
+
+ // The main purpose of the test is to check that Link() does not cause a crash.
+ Link();
+
+ ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u));
+}
+
} // namespace linker
} // namespace art