diff options
author | 2017-05-03 17:09:25 +0100 | |
---|---|---|
committer | 2017-05-09 15:43:34 +0100 | |
commit | 88abba2b0cb0151d89e16da3e64025878dc2f142 (patch) | |
tree | 231e5551a1b8d3c8bf162c9d0f30916b36ba2742 | |
parent | b9c3a99096c746b09af611e55e11b86600374011 (diff) |
ARM/AOT: Allow 16-bit LDR for Baker read barrier loads.
Test: m test-art-target-gtest
Test: testrunner.py --target on Nexus 6P.
Test: testrunner.py --target on Nexus 6P with heap poisoning enabled.
Test: Repeat the above tests with ART_USE_OLD_ARM_BACKEND=true.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: I458f2ec5fe9abead4db06c7595d992945096fb68
-rw-r--r-- | compiler/linker/arm/relative_patcher_thumb2.cc | 112 | ||||
-rw-r--r-- | compiler/linker/arm/relative_patcher_thumb2.h | 39 | ||||
-rw-r--r-- | compiler/linker/arm/relative_patcher_thumb2_test.cc | 245 | ||||
-rw-r--r-- | compiler/linker/arm64/relative_patcher_arm64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 50 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 56 | ||||
-rw-r--r-- | compiler/utils/arm/assembler_thumb2.h | 6 | ||||
-rw-r--r-- | runtime/arch/arch_test.cc | 10 | ||||
-rw-r--r-- | runtime/arch/arm/asm_support_arm.h | 20 | ||||
-rw-r--r-- | runtime/arch/arm/entrypoints_init_arm.cc | 24 | ||||
-rw-r--r-- | runtime/arch/arm/quick_entrypoints_arm.S | 179 | ||||
-rw-r--r-- | runtime/oat.h | 2 |
13 files changed, 567 insertions, 182 deletions
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index ced52ff07a..a98aedfc69 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -18,6 +18,7 @@ #include "arch/arm/asm_support_arm.h" #include "art_method.h" +#include "base/bit_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "lock_word.h" @@ -112,12 +113,22 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co // Check that the next instruction matches the expected LDR. switch (kind) { case BakerReadBarrierKind::kField: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (immediate) with correct base_reg. - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code->size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(code, literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } break; } case BakerReadBarrierKind::kArray: { @@ -131,11 +142,20 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co break; } case BakerReadBarrierKind::kGcRoot: { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); - // LDR (immediate) with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } break; } default: @@ -160,7 +180,8 @@ void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* co static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, vixl::aarch32::Register base_reg, vixl::aarch32::MemOperand& lock_word, - vixl::aarch32::Label* slow_path) { + vixl::aarch32::Label* slow_path, + int32_t raw_ldr_offset) { using namespace vixl::aarch32; // NOLINT(build/namespaces) // Load the lock word containing the rb_state. __ Ldr(ip, lock_word); @@ -169,14 +190,7 @@ static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); __ B(ne, slow_path, /* is_far_target */ false); - static_assert( - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, - "Field and array LDR offsets must be the same to reuse the same code."); - // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Add(lr, lr, raw_ldr_offset); // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). @@ -199,6 +213,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& CheckValidReg(base_reg.GetCode()); Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -210,16 +225,30 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& } vixl::aarch32::Label slow_path; MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); __ Bind(&slow_path); const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET; - MemOperand ldr_half_address(lr, ldr_offset + 2); - __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". - __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. - __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + raw_ldr_offset; + Register ep_reg(kBakerCcEntrypointRegister); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint. + __ Bx(ep_reg); // Jump to the entrypoint. if (holder_reg.Is(base_reg)) { // Add null check slow path. The stack map is at the address pointed to by LR. __ Bind(&throw_npe); @@ -233,6 +262,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); CheckValidReg(base_reg.GetCode()); DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidth::kWide == BakerReadBarrierWidthField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); vixl::aarch32::Label slow_path; @@ -240,10 +270,11 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); DCHECK_LT(lock_word.GetOffsetImmediate(), 0); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); __ Bind(&slow_path); const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + raw_ldr_offset; MemOperand ldr_address(lr, ldr_offset + 2); __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", // i.e. Rm+32 because the scale in imm2 is 2. @@ -261,6 +292,7 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); CheckValidReg(root_reg.GetCode()); DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip); vixl::aarch32::Label return_label, not_marked, forwarding_address; @@ -280,7 +312,10 @@ void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister // to art_quick_read_barrier_mark_introspection_gc_roots. Register ep_reg(kBakerCcEntrypointRegister); - __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); __ Mov(ip, root_reg); __ Bx(ep_reg); __ Bind(&forwarding_address); @@ -344,7 +379,7 @@ uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); uint8_t* addr = &(*code)[offset]; addr[0] = (value >> 16) & 0xff; addr[1] = (value >> 24) & 0xff; @@ -354,7 +389,7 @@ void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offse uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 1u, 0u); + DCHECK_ALIGNED(offset, 2u); const uint8_t* addr = &code[offset]; return (static_cast<uint32_t>(addr[0]) << 16) + @@ -369,5 +404,18 @@ uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { return GetInsn32(ArrayRef<const uint8_t>(*code), offset); } +uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { + DCHECK_LE(offset + 2u, code.size()); + DCHECK_ALIGNED(offset, 2u); + const uint8_t* addr = &code[offset]; + return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); +} + +template <typename Vector> +uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { + static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); + return GetInsn16(ArrayRef<const uint8_t>(*code), offset); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index 7fad245856..7e787d2916 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -35,26 +35,37 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: static constexpr uint32_t kBakerCcEntrypointRegister = 4u; - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { CheckValidReg(base_reg); CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg); + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); } static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { CheckValidReg(base_reg); return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); } - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); } explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); @@ -86,6 +97,12 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { kLast }; + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast + }; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 4u; @@ -95,9 +112,14 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; using BakerReadBarrierSecondRegField = BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, + kBitsForBakerReadBarrierWidth>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister); + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; } void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); @@ -108,6 +130,11 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { template <typename Vector> static uint32_t GetInsn32(Vector* code, uint32_t offset); + static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); + + template <typename Vector> + static uint32_t GetInsn16(Vector* code, uint32_t offset); + friend class Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index 2e28349231..af5fa40dc1 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -52,6 +52,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. + static constexpr uint32_t kLdrInsn = 0x6800u; + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. static constexpr uint32_t kLdrWInsn = 0xf8d00000u; @@ -223,9 +226,11 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -237,9 +242,9 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -260,7 +265,8 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { (static_cast<uint32_t>(output_[offset + 1]) << 8); } - void TestBakerField(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); + void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -568,7 +574,7 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } -void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { +void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { uint32_t valid_regs[] = { 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. 8, 9, 10, 11, // IP, SP, LR and PC are reserved. @@ -584,8 +590,8 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ false); const LinkerPatch patches[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), }; @@ -608,7 +614,113 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg ASSERT_TRUE( CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 32u); + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base_reg, holder_reg, /* narrow */ true); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + if (base_reg >= 8u) { + continue; + } + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = + CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); ASSERT_GT(output_.size(), thunk_offset); ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, @@ -666,15 +778,26 @@ void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg } } -#define TEST_BAKER_FIELD(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffset##offset##_##ref_reg) { \ - TestBakerField(offset, ref_reg); \ +#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetWide##offset##_##ref_reg) { \ + TestBakerFieldWide(offset, ref_reg); \ } -TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7) -TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11) +TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) +TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) + +#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffsetNarrow##offset##_##ref_reg) { \ + TestBakerFieldNarrow(offset, ref_reg); \ + } + +TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) +TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { // One thunk in the middle with maximum distance branches to it from both sides. @@ -682,8 +805,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { constexpr uint32_t kLiteralOffset1 = 6u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -710,7 +833,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { // - thunk size and method 3 pre-header, rounded up (padding in between if needed) // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); size_t filler2_size = 1 * MB - (kLiteralOffset2 + kPcAdjustment) - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) @@ -749,8 +873,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { constexpr uint32_t kLiteralOffset1 = 4u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -779,8 +903,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast constexpr uint32_t kLiteralOffset1 = 6u; const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); const LinkerPatch patches1[] = { LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), }; @@ -809,7 +933,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast // - thunk size and method 3 pre-header, rounded up (padding in between if needed) // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t thunk_size = + CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); size_t filler2_size = 1 * MB - (kReachableFromOffset2 + kPcAdjustment) - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) @@ -929,7 +1054,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) { } } -TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { +TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { uint32_t valid_regs[] = { 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. 8, 9, 10, 11, // IP, SP, LR and PC are reserved. @@ -945,7 +1070,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ArrayRef<const uint8_t> code(raw_code); const LinkerPatch patches[] = { LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), }; AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); } @@ -962,7 +1088,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ASSERT_EQ(kMethodCodeSize, expected_code.size()); EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); ASSERT_GT(output_.size(), thunk_offset); ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, @@ -972,7 +1098,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { ASSERT_TRUE(false); } - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + // Verify that the fast-path null-check uses the correct register, i.e. root_reg. if (root_reg < 8) { ASSERT_GE(output_.size() - thunk_offset, 2u); ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); @@ -988,6 +1114,60 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { } } +TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + // Not appplicable to high registers. + }; + constexpr size_t kMethodCodeSize = 6u; + constexpr size_t kLiteralOffset = 2u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { // Test 1MiB of patches to the same thunk to stress-test different large offsets. // (The low bits are not that important but the location of the high bits is easy to get wrong.) @@ -998,7 +1178,8 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { patches.reserve(num_patches); const uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); for (size_t i = 0; i != num_patches; ++i) { PushBackInsn(&code, ldr); PushBackInsn(&code, kBneWPlus0); @@ -1067,7 +1248,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { // this pushes the first GC root thunk's pending MaxNextOffset() before the method call // thunk's pending MaxNextOffset() which needs to be adjusted. ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, - CompileBakerGcRootThunk(/* root_reg */ 0).size()); + CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; @@ -1080,9 +1261,9 @@ TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { ldr2, kBneWPlus0, // Second GC root LDR with read barrier. }); uint32_t encoded_data1 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); uint32_t encoded_data2 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); const LinkerPatch last_method_patches[] = { LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index d1ab410a7e..02a5b1ef8f 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -100,7 +100,7 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u); + DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; } void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 35dccd645d..8650aee819 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -90,13 +90,17 @@ static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instru } static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { - DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit()); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(codegen->GetAssembler())); __ BindTrackedLabel(bne_label); Label placeholder_label; __ b(&placeholder_label, NE); // Placeholder, patched at link-time. __ Bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(Register rt, Register rn, uint32_t offset) { + return ArmAssembler::IsLowRegister(rt) && ArmAssembler::IsLowRegister(rn) && offset < 32u; +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -8049,8 +8053,9 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct // return_address: CheckLastTempIsBakerCcEntrypointRegister(instruction); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow); Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8063,16 +8068,18 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, root_reg, obj, offset); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8172,10 +8179,12 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register ref_reg = ref.AsRegister<Register>(); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = temp.AsRegister<Register>(); @@ -8183,10 +8192,14 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } CheckLastTempIsBakerCcEntrypointRegister(instruction); uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj); + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj, narrow); Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8199,19 +8212,20 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - Register ref_reg = ref.AsRegister<Register>(); DCHECK_LT(offset, kReferenceLoadMinFarOffset); + DCHECK(!down_cast<Thumb2Assembler*>(GetAssembler())->IsForced32Bit()); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()), !narrow); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ LoadFromOffset(kLoadWord, ref_reg, base, offset); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8257,7 +8271,7 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8282,15 +8296,15 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr Label return_address; __ AdrCode(LR, &return_address); __ CmpConstant(kBakerCcEntrypointRegister, 0); - ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ScopedForce32Bit maybe_force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + int old_position = GetAssembler()->GetBuffer()->GetPosition(); __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); __ Bind(&return_address); + DCHECK_EQ(old_position - GetAssembler()->GetBuffer()->GetPosition(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index ed0a64c0d9..54aa03cb5a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -6094,7 +6094,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6189,7 +6189,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 8417f845a4..b2e0a9186e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -124,6 +124,10 @@ static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Lab __ bind(&placeholder_label); } +static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { + return rt.IsLow() && rn.IsLow() && offset < 32u; +} + class EmitAdrCode { public: EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) @@ -8158,8 +8162,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( + root_reg.GetCode(), narrow); vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8174,15 +8179,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( vixl32::Label return_address; EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); - static_assert( - BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); // Currently the offset is always within range. If that changes, // we shall have to split the load the same way as for fields. DCHECK_LT(offset, kReferenceLoadMinFarOffset); - __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); EmitPlaceholderBne(codegen_, bne_label); __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // Note that we do not actually check the value of // `GetIsGcMarking()` to decide whether to mark the loaded GC @@ -8283,10 +8289,12 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = *(obj+offset); + // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); vixl32::Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { base = RegisterFrom(temp); @@ -8294,12 +8302,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); offset &= (kReferenceLoadMinFarOffset - 1u); + // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large + // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely + // increase the overall code size when taking the generated thunks into account. + DCHECK(!narrow); } UseScratchRegisterScope temps(GetVIXLAssembler()); ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); + base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); // entrypoint_reg = @@ -8316,19 +8327,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); - vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } - // Note: We need a Wide NEG for the unpoisoning. + // Note: We need a specific width for the unpoisoning NEG. if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); return; } @@ -8374,7 +8390,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> reference = data[index]; + // HeapReference<mirror::Object> reference = data[index]; // gray_return_address: DCHECK(index.IsValid()); @@ -8404,9 +8420,7 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); __ cmp(kBakerCcEntrypointRegister, Operand(0)); EmitPlaceholderBne(this, bne_label); - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 32-bit instruction (4B) before the return address label; " - " 2 32-bit instructions (8B) for heap poisoning."); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); DCHECK(!needs_null_check); // The thunk cannot handle the null check. // Note: We need a Wide NEG for the unpoisoning. @@ -8414,6 +8428,8 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); return; } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 5c36110cf6..2ff9018510 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -924,9 +924,11 @@ class Thumb2Assembler FINAL : public ArmAssembler { class ScopedForce32Bit { public: - explicit ScopedForce32Bit(Thumb2Assembler* assembler) + explicit ScopedForce32Bit(Thumb2Assembler* assembler, bool force = true) : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { - assembler->Force32Bit(); + if (force) { + assembler->Force32Bit(); + } } ~ScopedForce32Bit() { diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index 1a5e39f0f7..d6056c0ece 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -71,11 +71,15 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET #undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET -#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET -#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET #undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET } // namespace arm namespace arm64 { diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index f1f1766ad4..8f2fd6ecc9 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -24,18 +24,25 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 192 +// The offset from the art_quick_read_barrier_mark_introspection (used for field +// loads with 32-bit LDR) to the entrypoint for field loads with 16-bit LDR, +// i.e. art_quick_read_barrier_mark_introspection_narrow. +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20 +// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints, +// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0 // The offset from art_quick_read_barrier_mark_introspection to the array switch cases, // i.e. art_quick_read_barrier_mark_introspection_arrays. #define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 -// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint, -// i.e. art_quick_read_barrier_mark_introspection_gc_roots. -#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0 // The offset of the reference load LDR from the return address in LR for field loads. #ifdef USE_HEAP_POISONING -#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -4 #else -#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET -4 +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET -2 #endif // The offset of the reference load LDR from the return address in LR for array loads. #ifdef USE_HEAP_POISONING @@ -44,7 +51,8 @@ #define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4 #endif // The offset of the reference load LDR from the return address in LR for GC root loads. -#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET -8 +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET -6 // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 6b7247773a..919b0afc40 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -53,8 +53,11 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_narrow(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); -extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_wide(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots_narrow( + mirror::Object*); // Used by soft float. // Single-precision FP arithmetics. @@ -86,18 +89,27 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; - // Check that array switch cases are at appropriate offsets from the introspection entrypoint. // For the alignment check, strip the Thumb mode bit. DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u); + // Check the field narrow entrypoint offset from the introspection entrypoint. + intptr_t narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_narrow) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET, narrow_diff); + // Check array switch cases offsets from the introspection entrypoint. intptr_t array_diff = reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff); - // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint. - intptr_t gc_roots_diff = - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) - + // Check the GC root entrypoint offsets from the introspection entrypoint. + intptr_t gc_roots_wide_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_wide) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET, gc_roots_wide_diff); + intptr_t gc_roots_narrow_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots_narrow) - reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); - DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET, gc_roots_narrow_diff); // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12. // We're using the entry to hold a pointer to the introspection entrypoint instead. qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index fa21208fcb..d0c6728fd9 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2189,7 +2189,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 .endm -#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET #error "Array and field introspection code sharing requires same LDR offset." #endif .macro BRBMI_ARRAY_LOAD index_reg @@ -2208,7 +2208,10 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 BRBMI_BKPT_FILL_4B .endm -.macro BRBMI_SLOW_PATH ldr_offset +.macro BRBMI_RUNTIME_CALL + // Note: This macro generates exactly 22 bytes of code. The core register + // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. + push {r0-r3, r7, lr} // Save return address and caller-save registers. .cfi_adjust_cfa_offset 24 .cfi_rel_offset r0, 0 @@ -2234,11 +2237,72 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 .cfi_restore r3 .cfi_restore r7 .cfi_restore lr +.endm + +.macro BRBMI_CHECK_NULL_AND_MARKED label_suffix + // If reference is null, just return it in the right register. + cmp ip, #0 + beq .Lmark_introspection_return\label_suffix + // Use R4 as temp and check the mark bit of the reference. + ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lmark_introspection_unmarked\label_suffix +.Lmark_introspection_return\label_suffix: +.endm + +.macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix +.Lmark_introspection_unmarked\label_suffix: + // Check if the top two bits are one, if this is the case it is a forwarding address. +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in + // the highest bits and the "forwarding address" state to have all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + bhs .Lmark_introspection_forwarding_address\label_suffix +.endm + +.macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix +.Lmark_introspection_forwarding_address\label_suffix: + // Note: This macro generates exactly 22 bytes of code, the branch is near. + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + b .Lmark_introspection_return\label_suffix +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. ldrh r4, [lr, #(-1 + \ldr_offset + 2)] - lsr r4, r4, #12 // Extract `ref_reg`. - b .Lmark_introspection_return_switch +.endm + +.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset + // Load the 16-bit instruction. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset)] +.endm + +.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix + .balign 64 + .thumb_func + .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function + .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix + .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix +art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: + BRBMI_RUNTIME_CALL + // Load the LDR (or the half of it) that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset + b .Lmark_introspection_extract_register_and_return\label_suffix + // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for + // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze + // the 6 byte forwarding address extraction here across the 32-byte boundary. + BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix + // And the slow path taking exactly 30 bytes (6 bytes for the forwarding + // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near + // branch) shall take the rest of the 32-byte section (within a cache line). + BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix + BRBMI_RUNTIME_CALL + b .Lmark_introspection_return\label_suffix .endm /* @@ -2249,14 +2313,16 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 * * The entrypoint is called through a thunk that differs across load kinds. * For field and array loads the LDR instruction in generated code follows - * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1 - * is an adjustment for the Thumb mode bit in LR, and the thunk knows the - * holder and performs the gray bit check, returning to the LDR instruction - * if the object is not gray, so this entrypoint no longer needs to know - * anything about the holder. For GC root loads, the LDR instruction in - * generated code precedes the branch to the thunk, i.e. the LDR is at - * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and - * the thunk does not do the gray bit check. + * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) + * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where + * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk + * knows the holder and performs the gray bit check, returning to the LDR + * instruction if the object is not gray, so this entrypoint no longer + * needs to know anything about the holder. For GC root loads, the LDR + * instruction in generated code precedes the branch to the thunk, i.e. the + * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) + * where the -1 is again the Thumb mode bit adjustment, and the thunk does + * not do the gray bit check. * * For field accesses and array loads with a constant index the thunk loads * the reference into IP using introspection and calls the main entrypoint, @@ -2288,11 +2354,29 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 * * The code structure is * art_quick_read_barrier_mark_introspection: - * Over 128 bytes for the main entrypoint code. - * Padding to 192 bytes if needed. - * art_quick_read_barrier_mark_introspection_gc_roots: - * GC root entrypoint code. - * Padding to 256 bytes if needed. + * Up to 32 bytes code for main entrypoint fast-path code for fields + * (and array elements with constant offset) with LDR encoding T3; + * jumps to the switch in the "narrow" entrypoint. + * Padding to 32 bytes if needed. + * art_quick_read_barrier_mark_introspection_narrow: + * Up to 48 bytes code for fast path code for fields (and array + * elements with constant offset) with LDR encoding T1, ending in the + * return switch instruction TBB and the table with switch offsets. + * Padding to 80 bytes if needed. + * .Lmark_introspection_return_switch_case_r0: + * Exactly 48 bytes of code for the return switch cases (12 cases, + * including BKPT for the reserved registers). + * Ends at 128 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_wide: + * GC root entrypoint code for LDR encoding T3 (28 bytes). + * Forwarding address extraction for LDR encoding T3 (6 bytes). + * Slow path for main entrypoint for LDR encoding T3 (30 bytes). + * Ends at 192 bytes total. + * art_quick_read_barrier_mark_introspection_gc_roots_narrow: + * GC root entrypoint code for LDR encoding T1 (28 bytes). + * Forwarding address extraction for LDR encoding T1 (6 bytes). + * Slow path for main entrypoint for LDR encoding T1 (30 bytes). + * Ends at 256 bytes total. * art_quick_read_barrier_mark_introspection_arrays: * Exactly 128 bytes for array load switch cases (16x2 instructions). */ @@ -2302,17 +2386,30 @@ ENTRY art_quick_read_barrier_mark_introspection // (R4 is reserved for the entrypoint address.) // For heap poisoning, the reference is poisoned, so unpoison it first. UNPOISON_HEAP_REF ip - // If reference is null, just return it in the right register. - cmp ip, #0 - beq .Lmark_introspection_return - // Use R4 as temp and check the mark bit of the reference. - ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED - beq .Lmark_introspection_unmarked -.Lmark_introspection_return: - // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. - ldrh r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)] + // Check for null or marked, lock word is loaded into IP. + BRBMI_CHECK_NULL_AND_MARKED _wide + // Load the half of the instruction that contains Rt. + BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET +.Lmark_introspection_extract_register_and_return_wide: lsr r4, r4, #12 // Extract `ref_reg`. + b .Lmark_introspection_return_switch + + .balign 32 + .thumb_func + .type art_quick_read_barrier_mark_introspection_narrow, #function + .hidden art_quick_read_barrier_mark_introspection_narrow + .global art_quick_read_barrier_mark_introspection_narrow +art_quick_read_barrier_mark_introspection_narrow: + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // Check for null or marked, lock word is loaded into R4. + BRBMI_CHECK_NULL_AND_MARKED _narrow + // Load the 16-bit instruction. + BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET +.Lmark_introspection_extract_register_and_return_narrow: + and r4, r4, #7 // Extract `ref_reg`. .Lmark_introspection_return_switch: tbb [pc, r4] // Jump to the switch case. .Lmark_introspection_return_table: @@ -2320,32 +2417,8 @@ ENTRY art_quick_read_barrier_mark_introspection .balign 16 BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE - .balign 16 -.Lmark_introspection_unmarked: - // Check if the top two bits are one, if this is the case it is a forwarding address. -#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) - // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in - // the highest bits and the "forwarding address" state to have all bits set. -#error "Unexpected lock word state shift or forwarding address state value." -#endif - cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) - bhs .Lmark_introspection_forwarding_address - BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET - - .balign 8 -.Lmark_introspection_forwarding_address: - // Shift left by the forwarding address shift. This clears out the state bits since they are - // in the top 2 bits of the lock word. - lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT - b .Lmark_introspection_return - - .balign 64 - .thumb_func - .type art_quick_read_barrier_mark_introspection_gc_roots, #function - .hidden art_quick_read_barrier_mark_introspection_gc_roots - .global art_quick_read_barrier_mark_introspection_gc_roots -art_quick_read_barrier_mark_introspection_gc_roots: - BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide + BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow .balign 256 .thumb_func diff --git a/runtime/oat.h b/runtime/oat.h index a38eebc188..e119b81bff 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '1', '2', '4', '\0' }; // New compiler filter names. + static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' }; // ARM Baker narrow thunks. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; |