ARM/AOT: Allow 16-bit LDR for Baker read barrier loads.
Test: m test-art-target-gtest
Test: testrunner.py --target on Nexus 6P.
Test: testrunner.py --target on Nexus 6P with heap poisoning enabled.
Test: Repeat the above tests with ART_USE_OLD_ARM_BACKEND=true.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: I458f2ec5fe9abead4db06c7595d992945096fb68
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index ced52ff..a98aedf 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -18,6 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "art_method.h"
+#include "base/bit_utils.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "lock_word.h"
@@ -112,12 +113,22 @@
// Check that the next instruction matches the expected LDR.
switch (kind) {
case BakerReadBarrierKind::kField: {
- DCHECK_GE(code->size() - literal_offset, 8u);
- uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
- // LDR (immediate) with correct base_reg.
- CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
- const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code->size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(code, literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code->size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(code, literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
break;
}
case BakerReadBarrierKind::kArray: {
@@ -131,11 +142,20 @@
break;
}
case BakerReadBarrierKind::kGcRoot: {
- DCHECK_GE(literal_offset, 4u);
- uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
- // LDR (immediate) with correct root_reg.
- const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
- CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(code, literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(code, literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
break;
}
default:
@@ -160,7 +180,8 @@
static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler,
vixl::aarch32::Register base_reg,
vixl::aarch32::MemOperand& lock_word,
- vixl::aarch32::Label* slow_path) {
+ vixl::aarch32::Label* slow_path,
+ int32_t raw_ldr_offset) {
using namespace vixl::aarch32; // NOLINT(build/namespaces)
// Load the lock word containing the rb_state.
__ Ldr(ip, lock_word);
@@ -169,14 +190,7 @@
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
__ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
__ B(ne, slow_path, /* is_far_target */ false);
- static_assert(
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
- "Field and array LDR offsets must be the same to reuse the same code.");
- // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Add(lr, lr, raw_ldr_offset);
// Introduce a dependency on the lock_word including rb_state,
// to prevent load-load reordering, and without using
// a memory barrier (which would be more expensive).
@@ -199,6 +213,7 @@
CheckValidReg(base_reg.GetCode());
Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
// If base_reg differs from holder_reg, the offset was too large and we must have
@@ -210,16 +225,30 @@
}
vixl::aarch32::Label slow_path;
MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET;
- MemOperand ldr_half_address(lr, ldr_offset + 2);
- __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
- __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
- __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ raw_ldr_offset;
+ Register ep_reg(kBakerCcEntrypointRegister);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
// Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
- __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint.
+ __ Bx(ep_reg); // Jump to the entrypoint.
if (holder_reg.Is(base_reg)) {
// Add null check slow path. The stack map is at the address pointed to by LR.
__ Bind(&throw_npe);
@@ -233,6 +262,7 @@
Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(base_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data));
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label slow_path;
@@ -240,10 +270,11 @@
mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
- EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ raw_ldr_offset;
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
@@ -261,6 +292,7 @@
Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
CheckValidReg(root_reg.GetCode());
DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
temps.Exclude(ip);
vixl::aarch32::Label return_label, not_marked, forwarding_address;
@@ -280,7 +312,10 @@
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
Register ep_reg(kBakerCcEntrypointRegister);
- __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
__ Mov(ip, root_reg);
__ Bx(ep_reg);
__ Bind(&forwarding_address);
@@ -344,7 +379,7 @@
void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) {
DCHECK_LE(offset + 4u, code->size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
uint8_t* addr = &(*code)[offset];
addr[0] = (value >> 16) & 0xff;
addr[1] = (value >> 24) & 0xff;
@@ -354,7 +389,7 @@
uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) {
DCHECK_LE(offset + 4u, code.size());
- DCHECK_EQ(offset & 1u, 0u);
+ DCHECK_ALIGNED(offset, 2u);
const uint8_t* addr = &code[offset];
return
(static_cast<uint32_t>(addr[0]) << 16) +
@@ -369,5 +404,18 @@
return GetInsn32(ArrayRef<const uint8_t>(*code), offset);
}
+uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) {
+ DCHECK_LE(offset + 2u, code.size());
+ DCHECK_ALIGNED(offset, 2u);
+ const uint8_t* addr = &code[offset];
+ return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8);
+}
+
+template <typename Vector>
+uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) {
+ static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
+ return GetInsn16(ArrayRef<const uint8_t>(*code), offset);
+}
+
} // namespace linker
} // namespace art
diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h
index 7fad245..7e787d2 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.h
+++ b/compiler/linker/arm/relative_patcher_thumb2.h
@@ -35,26 +35,37 @@
public:
static constexpr uint32_t kBakerCcEntrypointRegister = 4u;
- static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
CheckValidReg(base_reg);
CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(holder_reg);
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
}
static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
CheckValidReg(base_reg);
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
BakerReadBarrierFirstRegField::Encode(base_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
}
- static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
BakerReadBarrierFirstRegField::Encode(root_reg) |
- BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg);
+ BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
}
explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider);
@@ -86,6 +97,12 @@
kLast
};
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast
+ };
+
static constexpr size_t kBitsForBakerReadBarrierKind =
MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
static constexpr size_t kBitsForRegister = 4u;
@@ -95,9 +112,14 @@
BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>;
using BakerReadBarrierSecondRegField =
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister);
+ DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg;
}
void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data);
@@ -108,6 +130,11 @@
template <typename Vector>
static uint32_t GetInsn32(Vector* code, uint32_t offset);
+ static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset);
+
+ template <typename Vector>
+ static uint32_t GetInsn16(Vector* code, uint32_t offset);
+
friend class Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher);
diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc
index 2e28349..af5fa40 100644
--- a/compiler/linker/arm/relative_patcher_thumb2_test.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc
@@ -52,6 +52,9 @@
// BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset.
static constexpr uint32_t kBneWPlus0 = 0xf0408000u;
+ // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn.
+ static constexpr uint32_t kLdrInsn = 0x6800u;
+
// LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn.
static constexpr uint32_t kLdrWInsn = 0xf8d00000u;
@@ -223,9 +226,11 @@
void TestStringReference(uint32_t string_offset);
void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset);
- std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) {
+ std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -237,9 +242,9 @@
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
- std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) {
+ std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) {
LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch(
- 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg));
+ 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow));
ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch);
return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key);
}
@@ -260,7 +265,8 @@
(static_cast<uint32_t>(output_[offset + 1]) << 8);
}
- void TestBakerField(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg);
+ void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg);
};
const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = {
@@ -568,7 +574,7 @@
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
-void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) {
+void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -584,8 +590,8 @@
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
ArrayRef<const uint8_t> code(raw_code);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ false);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
};
@@ -608,7 +614,8 @@
ASSERT_TRUE(
CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg);
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -666,15 +673,131 @@
}
}
-#define TEST_BAKER_FIELD(offset, ref_reg) \
- TEST_F(Thumb2RelativePatcherTest, \
- BakerOffset##offset##_##ref_reg) { \
- TestBakerField(offset, ref_reg); \
+void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
+ };
+ DCHECK_ALIGNED(offset, 4u);
+ DCHECK_LT(offset, 32u);
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 0u;
+ uint32_t method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base_reg, holder_reg, /* narrow */ true);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data),
+ };
+ ++method_idx;
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t base_reg : valid_regs) {
+ if (base_reg >= 8u) {
+ continue;
+ }
+ for (uint32_t holder_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
+ const std::vector<uint8_t> expected_code = RawCode({bne, ldr});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne;
+ ASSERT_TRUE(
+ CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk =
+ CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ size_t gray_check_offset = thunk_offset;
+ if (holder_reg == base_reg) {
+ // Verify that the null-check uses the correct register, i.e. holder_reg.
+ if (holder_reg < 8) {
+ ASSERT_GE(output_.size() - gray_check_offset, 2u);
+ ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ gray_check_offset +=2u;
+ } else {
+ ASSERT_GE(output_.size() - gray_check_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ gray_check_offset += 6u;
+ }
+ }
+ // Verify that the lock word for gray bit check is loaded from the holder address.
+ ASSERT_GE(output_.size() - gray_check_offset,
+ 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u);
+ const uint32_t load_lock_word =
+ kLdrWInsn |
+ (holder_reg << 16) |
+ (/* IP */ 12 << 12) |
+ mirror::Object::MonitorOffset().Uint32Value();
+ ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset));
+ // Verify the gray bit check.
+ DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate.
+ uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift);
+ const uint32_t tst_gray_bit_without_offset =
+ 0xf0100f00 | (/* IP */ 12 << 16)
+ | (((ror_shift >> 4) & 1) << 26) // i
+ | (((ror_shift >> 1) & 7) << 12) // imm3
+ | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift).
+ EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u));
+ EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE
+ // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset").
+ const uint32_t fake_dependency =
+ 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00)
+ (/* IP */ 12) | // Rm = IP
+ (base_reg << 16) | // Rn = base_reg
+ (base_reg << 8); // Rd = base_reg
+ EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u));
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+ }
+}
+
+#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetWide##offset##_##ref_reg) { \
+ TestBakerFieldWide(offset, ref_reg); \
}
-TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0)
-TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7)
-TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7)
+TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11)
+
+#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \
+ TEST_F(Thumb2RelativePatcherTest, \
+ BakerOffsetNarrow##offset##_##ref_reg) { \
+ TestBakerFieldNarrow(offset, ref_reg); \
+ }
+
+TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0)
+TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3)
+TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7)
TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) {
// One thunk in the middle with maximum distance branches to it from both sides.
@@ -682,8 +805,8 @@
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -710,7 +833,8 @@
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kLiteralOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -749,8 +873,8 @@
constexpr uint32_t kLiteralOffset1 = 4u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -779,8 +903,8 @@
constexpr uint32_t kLiteralOffset1 = 6u;
const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn});
ArrayRef<const uint8_t> code1(raw_code1);
- uint32_t encoded_data =
- Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0);
+ uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false);
const LinkerPatch patches1[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data),
};
@@ -809,7 +933,8 @@
// - thunk size and method 3 pre-header, rounded up (padding in between if needed)
// - method 3 code and method 4 pre-header, rounded up (padding in between if needed)
// - method 4 header (let there be no padding between method 4 code and method 5 pre-header).
- size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size();
+ size_t thunk_size =
+ CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size();
size_t filler2_size =
1 * MB - (kReachableFromOffset2 + kPcAdjustment)
- RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment)
@@ -929,7 +1054,7 @@
}
}
-TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) {
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
uint32_t valid_regs[] = {
0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
8, 9, 10, 11, // IP, SP, LR and PC are reserved.
@@ -945,7 +1070,8 @@
ArrayRef<const uint8_t> code(raw_code);
const LinkerPatch patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(
- kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)),
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)),
};
AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
}
@@ -962,7 +1088,67 @@
ASSERT_EQ(kMethodCodeSize, expected_code.size());
EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
- std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg);
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false);
+ ASSERT_GT(output_.size(), thunk_offset);
+ ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
+ ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
+ expected_thunk.size());
+ if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) {
+ DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk);
+ ASSERT_TRUE(false);
+ }
+
+ // Verify that the fast-path null-check uses the correct register, i.e. root_reg.
+ if (root_reg < 8) {
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
+ } else {
+ ASSERT_GE(output_.size() - thunk_offset, 6u);
+ ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
+ ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
+ }
+ // Do not check the rest of the implementation.
+
+ // The next thunk follows on the next aligned offset.
+ thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment);
+ }
+}
+
+TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
+ uint32_t valid_regs[] = {
+ 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
+ // Not appplicable to high registers.
+ };
+ constexpr size_t kMethodCodeSize = 6u;
+ constexpr size_t kLiteralOffset = 2u;
+ uint32_t method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
+ ASSERT_EQ(kMethodCodeSize, raw_code.size());
+ ArrayRef<const uint8_t> code(raw_code);
+ const LinkerPatch patches[] = {
+ LinkerPatch::BakerReadBarrierBranchPatch(
+ kLiteralOffset,
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)),
+ };
+ AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches));
+ }
+ Link();
+
+ // All thunks are at the end.
+ uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
+ method_idx = 0u;
+ for (uint32_t root_reg : valid_regs) {
+ ++method_idx;
+ uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
+ uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
+ const std::vector<uint8_t> expected_code = RawCode({ldr, bne});
+ ASSERT_EQ(kMethodCodeSize, expected_code.size());
+ EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code)));
+
+ std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true);
ASSERT_GT(output_.size(), thunk_offset);
ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size());
ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset,
@@ -973,14 +1159,8 @@
}
// Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg.
- if (root_reg < 8) {
- ASSERT_GE(output_.size() - thunk_offset, 2u);
- ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
- } else {
- ASSERT_GE(output_.size() - thunk_offset, 6u);
- ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u);
- ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ
- }
+ ASSERT_GE(output_.size() - thunk_offset, 2u);
+ ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u);
// Do not check the rest of the implementation.
// The next thunk follows on the next aligned offset.
@@ -998,7 +1178,8 @@
patches.reserve(num_patches);
const uint32_t ldr =
kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12);
- uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0);
+ uint32_t encoded_data =
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false);
for (size_t i = 0; i != num_patches; ++i) {
PushBackInsn(&code, ldr);
PushBackInsn(&code, kBneWPlus0);
@@ -1067,7 +1248,7 @@
// this pushes the first GC root thunk's pending MaxNextOffset() before the method call
// thunk's pending MaxNextOffset() which needs to be adjusted.
ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment,
- CompileBakerGcRootThunk(/* root_reg */ 0).size());
+ CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size());
static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8");
constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment;
constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment;
@@ -1080,9 +1261,9 @@
ldr2, kBneWPlus0, // Second GC root LDR with read barrier.
});
uint32_t encoded_data1 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false);
uint32_t encoded_data2 =
- Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2);
+ Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false);
const LinkerPatch last_method_patches[] = {
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1),
LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2),
diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h
index d1ab410..02a5b1e 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.h
+++ b/compiler/linker/arm64/relative_patcher_arm64.h
@@ -100,7 +100,7 @@
BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < 30u && reg != 16u && reg != 17u);
+ DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg;
}
void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 35dccd6..8650aee 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -90,13 +90,17 @@
}
static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
- DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler()));
__ BindTrackedLabel(bne_label);
Label placeholder_label;
__ b(&placeholder_label, NE); // Placeholder, patched at link-time.
__ Bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) {
+ return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u;
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -8049,8 +8053,9 @@
// return_address:
CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow);
Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8063,16 +8068,18 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, root_reg, obj, offset);
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8172,10 +8179,12 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register ref_reg = ref.AsRegister<Register>();
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = temp.AsRegister<Register>();
@@ -8183,10 +8192,14 @@
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
CheckLastTempIsBakerCcEntrypointRegister(instruction);
uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow);
Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8199,19 +8212,20 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- Register ref_reg = ref.AsRegister<Register>();
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit());
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow);
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ LoadFromOffset(kLoadWord, ref_reg, base, offset);
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8257,7 +8271,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8282,15 +8296,15 @@
Label return_address;
__ AdrCode(LR, &return_address);
__ CmpConstant(kBakerCcEntrypointRegister, 0);
- ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ int old_position = GetAssembler()->GetBuffer()->GetPosition();
__ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
__ Bind(&return_address);
+ DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ed0a64c..54aa03c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6094,7 +6094,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6189,7 +6189,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 8417f84..b2e0a91 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -124,6 +124,10 @@
__ bind(&placeholder_label);
}
+static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
+ return rt.IsLow() && rn.IsLow() && offset < 32u;
+}
+
class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
@@ -8158,8 +8162,9 @@
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
+ root_reg.GetCode(), narrow);
vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8174,15 +8179,16 @@
vixl32::Label return_address;
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
- static_assert(
- BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
// Currently the offset is always within range. If that changes,
// we shall have to split the load the same way as for fields.
DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
EmitPlaceholderBne(codegen_, bne_label);
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// Note that we do not actually check the value of
// `GetIsGcMarking()` to decide whether to mark the loaded GC
@@ -8283,10 +8289,12 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = *(obj+offset);
+ // HeapReference<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
@@ -8294,12 +8302,15 @@
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
+ // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
+ // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
+ // increase the overall code size when taking the generated thunks into account.
+ DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
+ base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
// entrypoint_reg =
@@ -8316,19 +8327,24 @@
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
- vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
- __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
if (needs_null_check) {
MaybeRecordImplicitNullCheck(instruction);
}
- // Note: We need a Wide NEG for the unpoisoning.
+ // Note: We need a specific width for the unpoisoning NEG.
if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
return;
}
@@ -8374,7 +8390,7 @@
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> reference = data[index];
+ // HeapReference<mirror::Object> reference = data[index];
// gray_return_address:
DCHECK(index.IsValid());
@@ -8404,9 +8420,7 @@
EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
__ cmp(kBakerCcEntrypointRegister, Operand(0));
EmitPlaceholderBne(this, bne_label);
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
- " 2 32-bit instructions (8B) for heap poisoning.");
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
__ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
DCHECK(!needs_null_check); // The thunk cannot handle the null check.
// Note: We need a Wide NEG for the unpoisoning.
@@ -8414,6 +8428,8 @@
__ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
__ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
return;
}
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 5c36110..2ff9018 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -924,9 +924,11 @@
class ScopedForce32Bit {
public:
- explicit ScopedForce32Bit(Thumb2Assembler* assembler)
+ explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true)
: assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) {
- assembler->Force32Bit();
+ if (force) {
+ assembler->Force32Bit();
+ }
}
~ScopedForce32Bit() {
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index 1a5e39f..d6056c0 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -71,11 +71,15 @@
#undef FRAME_SIZE_SAVE_REFS_AND_ARGS
static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING;
#undef FRAME_SIZE_SAVE_EVERYTHING
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET
-#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET
-#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
-#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET
+#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
} // namespace arm
namespace arm64 {
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index f1f1766..8f2fd6e 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -24,18 +24,25 @@
#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
#define FRAME_SIZE_SAVE_EVERYTHING 192
+// The offset from the art_quick_read_barrier_mark_introspection (used for field
+// loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR,
+// i.e. art_quick_read_barrier_mark_introspection_narrow.
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20
+// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints,
+// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0
// The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
// i.e. art_quick_read_barrier_mark_introspection_arrays.
#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
-// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint,
-// i.e. art_quick_read_barrier_mark_introspection_gc_roots.
-#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0
// The offset of the reference load LDR from the return address in LR for field loads.
#ifdef USE_HEAP_POISONING
-#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -4
#else
-#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -4
+#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -2
#endif
// The offset of the reference load LDR from the return address in LR for array loads.
#ifdef USE_HEAP_POISONING
@@ -44,7 +51,8 @@
#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4
#endif
// The offset of the reference load LDR from the return address in LR for GC root loads.
-#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6
// Flag for enabling R4 optimization in arm runtime
// #define ARM_R4_SUSPEND_FLAG
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 6b72477..919b0af 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -53,8 +53,11 @@
extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_narrow(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*);
-extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow(
+ mirror::Object*);
// Used by soft float.
// Single-precision FP arithmetics.
@@ -86,18 +89,27 @@
qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr;
qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr;
- // Check that array switch cases are at appropriate offsets from the introspection entrypoint.
// For the alignment check, strip the Thumb mode bit.
DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u);
+ // Check the field narrow entrypoint offset from the introspection entrypoint.
+ intptr_t narrow_diff =
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_narrow) -
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+ DCHECK_EQ(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET, narrow_diff);
+ // Check array switch cases offsets from the introspection entrypoint.
intptr_t array_diff =
reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) -
reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff);
- // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint.
- intptr_t gc_roots_diff =
- reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) -
+ // Check the GC root entrypoint offsets from the introspection entrypoint.
+ intptr_t gc_roots_wide_diff =
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_wide) -
reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
- DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff);
+ DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET, gc_roots_wide_diff);
+ intptr_t gc_roots_narrow_diff =
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) -
+ reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection);
+ DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff);
// The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12.
// We're using the entry to hold a pointer to the introspection entrypoint instead.
qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fa21208..d0c6728 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2189,7 +2189,7 @@
.byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
.endm
-#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
+#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
#error "Array and field introspection code sharing requires same LDR offset."
#endif
.macro BRBMI_ARRAY_LOAD index_reg
@@ -2208,7 +2208,10 @@
BRBMI_BKPT_FILL_4B
.endm
-.macro BRBMI_SLOW_PATH ldr_offset
+.macro BRBMI_RUNTIME_CALL
+ // Note: This macro generates exactly 22 bytes of code. The core register
+ // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
+
push {r0-r3, r7, lr} // Save return address and caller-save registers.
.cfi_adjust_cfa_offset 24
.cfi_rel_offset r0, 0
@@ -2234,11 +2237,72 @@
.cfi_restore r3
.cfi_restore r7
.cfi_restore lr
+.endm
+.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
+ // If reference is null, just return it in the right register.
+ cmp ip, #0
+ beq .Lmark_introspection_return\label_suffix
+ // Use R4 as temp and check the mark bit of the reference.
+ ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+ beq .Lmark_introspection_unmarked\label_suffix
+.Lmark_introspection_return\label_suffix:
+.endm
+
+.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
+.Lmark_introspection_unmarked\label_suffix:
+ // Check if the top two bits are one, if this is the case it is a forwarding address.
+#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
+ // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
+ // the highest bits and the "forwarding address" state to have all bits set.
+#error "Unexpected lock word state shift or forwarding address state value."
+#endif
+ cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+ bhs .Lmark_introspection_forwarding_address\label_suffix
+.endm
+
+.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
+.Lmark_introspection_forwarding_address\label_suffix:
+ // Note: This macro generates exactly 22 bytes of code, the branch is near.
+
+ // Shift left by the forwarding address shift. This clears out the state bits since they are
+ // in the top 2 bits of the lock word.
+ lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+ b .Lmark_introspection_return\label_suffix
+.endm
+
+.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
// Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
ldrh r4, [lr, #(-1 + \ldr_offset + 2)]
- lsr r4, r4, #12 // Extract `ref_reg`.
- b .Lmark_introspection_return_switch
+.endm
+
+.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
+ // Load the 16-bit instruction. Adjust for the thumb state in LR.
+ ldrh r4, [lr, #(-1 + \ldr_offset)]
+.endm
+
+.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
+ .balign 64
+ .thumb_func
+ .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
+ .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
+ .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
+art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
+ BRBMI_RUNTIME_CALL
+ // Load the LDR (or the half of it) that contains Rt.
+ BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
+ b .Lmark_introspection_extract_register_and_return\label_suffix
+ // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
+ // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
+ // the 6 byte forwarding address extraction here across the 32-byte boundary.
+ BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
+ // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
+ // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
+ // branch) shall take the rest of the 32-byte section (within a cache line).
+ BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
+ BRBMI_RUNTIME_CALL
+ b .Lmark_introspection_return\label_suffix
.endm
/*
@@ -2249,14 +2313,16 @@
*
* The entrypoint is called through a thunk that differs across load kinds.
* For field and array loads the LDR instruction in generated code follows
- * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1
- * is an adjustment for the Thumb mode bit in LR, and the thunk knows the
- * holder and performs the gray bit check, returning to the LDR instruction
- * if the object is not gray, so this entrypoint no longer needs to know
- * anything about the holder. For GC root loads, the LDR instruction in
- * generated code precedes the branch to the thunk, i.e. the LDR is at
- * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and
- * the thunk does not do the gray bit check.
+ * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
+ * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
+ * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
+ * knows the holder and performs the gray bit check, returning to the LDR
+ * instruction if the object is not gray, so this entrypoint no longer
+ * needs to know anything about the holder. For GC root loads, the LDR
+ * instruction in generated code precedes the branch to the thunk, i.e. the
+ * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
+ * where the -1 is again the Thumb mode bit adjustment, and the thunk does
+ * not do the gray bit check.
*
* For field accesses and array loads with a constant index the thunk loads
* the reference into IP using introspection and calls the main entrypoint,
@@ -2288,11 +2354,29 @@
*
* The code structure is
* art_quick_read_barrier_mark_introspection:
- * Over 128 bytes for the main entrypoint code.
- * Padding to 192 bytes if needed.
- * art_quick_read_barrier_mark_introspection_gc_roots:
- * GC root entrypoint code.
- * Padding to 256 bytes if needed.
+ * Up to 32 bytes code for main entrypoint fast-path code for fields
+ * (and array elements with constant offset) with LDR encoding T3;
+ * jumps to the switch in the "narrow" entrypoint.
+ * Padding to 32 bytes if needed.
+ * art_quick_read_barrier_mark_introspection_narrow:
+ * Up to 48 bytes code for fast path code for fields (and array
+ * elements with constant offset) with LDR encoding T1, ending in the
+ * return switch instruction TBB and the table with switch offsets.
+ * Padding to 80 bytes if needed.
+ * .Lmark_introspection_return_switch_case_r0:
+ * Exactly 48 bytes of code for the return switch cases (12 cases,
+ * including BKPT for the reserved registers).
+ * Ends at 128 bytes total.
+ * art_quick_read_barrier_mark_introspection_gc_roots_wide:
+ * GC root entrypoint code for LDR encoding T3 (28 bytes).
+ * Forwarding address extraction for LDR encoding T3 (6 bytes).
+ * Slow path for main entrypoint for LDR encoding T3 (30 bytes).
+ * Ends at 192 bytes total.
+ * art_quick_read_barrier_mark_introspection_gc_roots_narrow:
+ * GC root entrypoint code for LDR encoding T1 (28 bytes).
+ * Forwarding address extraction for LDR encoding T1 (6 bytes).
+ * Slow path for main entrypoint for LDR encoding T1 (30 bytes).
+ * Ends at 256 bytes total.
* art_quick_read_barrier_mark_introspection_arrays:
* Exactly 128 bytes for array load switch cases (16x2 instructions).
*/
@@ -2302,17 +2386,30 @@
// (R4 is reserved for the entrypoint address.)
// For heap poisoning, the reference is poisoned, so unpoison it first.
UNPOISON_HEAP_REF ip
- // If reference is null, just return it in the right register.
- cmp ip, #0
- beq .Lmark_introspection_return
- // Use R4 as temp and check the mark bit of the reference.
- ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
- beq .Lmark_introspection_unmarked
-.Lmark_introspection_return:
- // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
- ldrh r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)]
+ // Check for null or marked, lock word is loaded into IP.
+ BRBMI_CHECK_NULL_AND_MARKED _wide
+ // Load the half of the instruction that contains Rt.
+ BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+.Lmark_introspection_extract_register_and_return_wide:
lsr r4, r4, #12 // Extract `ref_reg`.
+ b .Lmark_introspection_return_switch
+
+ .balign 32
+ .thumb_func
+ .type art_quick_read_barrier_mark_introspection_narrow, #function
+ .hidden art_quick_read_barrier_mark_introspection_narrow
+ .global art_quick_read_barrier_mark_introspection_narrow
+art_quick_read_barrier_mark_introspection_narrow:
+ // At this point, IP contains the reference, R4 can be freely used.
+ // (R4 is reserved for the entrypoint address.)
+ // For heap poisoning, the reference is poisoned, so unpoison it first.
+ UNPOISON_HEAP_REF ip
+ // Check for null or marked, lock word is loaded into R4.
+ BRBMI_CHECK_NULL_AND_MARKED _narrow
+ // Load the 16-bit instruction.
+ BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+.Lmark_introspection_extract_register_and_return_narrow:
+ and r4, r4, #7 // Extract `ref_reg`.
.Lmark_introspection_return_switch:
tbb [pc, r4] // Jump to the switch case.
.Lmark_introspection_return_table:
@@ -2320,32 +2417,8 @@
.balign 16
BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
- .balign 16
-.Lmark_introspection_unmarked:
- // Check if the top two bits are one, if this is the case it is a forwarding address.
-#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
- // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
- // the highest bits and the "forwarding address" state to have all bits set.
-#error "Unexpected lock word state shift or forwarding address state value."
-#endif
- cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
- bhs .Lmark_introspection_forwarding_address
- BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
-
- .balign 8
-.Lmark_introspection_forwarding_address:
- // Shift left by the forwarding address shift. This clears out the state bits since they are
- // in the top 2 bits of the lock word.
- lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
- b .Lmark_introspection_return
-
- .balign 64
- .thumb_func
- .type art_quick_read_barrier_mark_introspection_gc_roots, #function
- .hidden art_quick_read_barrier_mark_introspection_gc_roots
- .global art_quick_read_barrier_mark_introspection_gc_roots
-art_quick_read_barrier_mark_introspection_gc_roots:
- BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
+ BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
+ BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
.balign 256
.thumb_func
diff --git a/runtime/oat.h b/runtime/oat.h
index a38eebc..e119b81 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '1', '2', '4', '\0' }; // New compiler filter names.
+ static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' }; // ARM Baker narrow thunks.
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";