diff options
-rw-r--r-- | compiler/linker/arm/relative_patcher_arm_base.h | 27 | ||||
-rw-r--r-- | compiler/linker/arm64/relative_patcher_arm64.cc | 60 | ||||
-rw-r--r-- | compiler/linker/arm64/relative_patcher_arm64.h | 13 | ||||
-rw-r--r-- | compiler/linker/arm64/relative_patcher_arm64_test.cc | 144 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 90 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_shared.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 14 |
7 files changed, 290 insertions, 59 deletions
diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index 2cb1b6c535..47f840fd65 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -43,10 +43,11 @@ class ArmBaseRelativePatcher : public RelativePatcher { enum class ThunkType { kMethodCall, // Method call thunk. kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset. + kBakerReadBarrierArray, // Baker read barrier, array load with index in register. kBakerReadBarrierRoot, // Baker read barrier, GC root load. }; - struct BakerReadBarrierOffsetParams { + struct BakerReadBarrierFieldParams { uint32_t holder_reg; // Holder object for reading lock word. uint32_t base_reg; // Base register, different from holder for large offset. // If base differs from holder, it should be a pre-defined @@ -54,9 +55,16 @@ class ArmBaseRelativePatcher : public RelativePatcher { // The offset is retrieved using introspection. }; + struct BakerReadBarrierArrayParams { + uint32_t base_reg; // Reference to the start of the data. + uint32_t dummy; // Dummy field. + // The index register is retrieved using introspection + // to limit the number of thunks we need to emit. + }; + struct BakerReadBarrierRootParams { uint32_t root_reg; // The register holding the GC root. - uint32_t dummy; + uint32_t dummy; // Dummy field. }; struct RawThunkParams { @@ -66,8 +74,12 @@ class ArmBaseRelativePatcher : public RelativePatcher { union ThunkParams { RawThunkParams raw_params; - BakerReadBarrierOffsetParams offset_params; + BakerReadBarrierFieldParams field_params; + BakerReadBarrierArrayParams array_params; BakerReadBarrierRootParams root_params; + static_assert(sizeof(raw_params) == sizeof(field_params), "field_params size check"); + static_assert(sizeof(raw_params) == sizeof(array_params), "array_params size check"); + static_assert(sizeof(raw_params) == sizeof(root_params), "root_params size check"); }; class ThunkKey { @@ -78,9 +90,14 @@ class ArmBaseRelativePatcher : public RelativePatcher { return type_; } - BakerReadBarrierOffsetParams GetOffsetParams() const { + BakerReadBarrierFieldParams GetFieldParams() const { DCHECK(type_ == ThunkType::kBakerReadBarrierField); - return params_.offset_params; + return params_.field_params; + } + + BakerReadBarrierArrayParams GetArrayParams() const { + DCHECK(type_ == ThunkType::kBakerReadBarrierArray); + return params_.array_params; } BakerReadBarrierRootParams GetRootParams() const { diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 551c73b2a4..5c6fb504cf 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -29,6 +29,7 @@ #include "mirror/array-inl.h" #include "oat.h" #include "oat_quick_method_header.h" +#include "read_barrier.h" #include "utils/arm64/assembler_arm64.h" namespace art { @@ -313,7 +314,17 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod uint32_t next_insn = GetInsn(code, literal_offset + 4u); // LDR (immediate) with correct base_reg. CheckValidReg(next_insn & 0x1fu); // Check destination register. - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5)); + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetFieldParams().base_reg << 5)); + break; + } + case ThunkType::kBakerReadBarrierArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (key.GetArrayParams().base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register break; } case ThunkType::kBakerReadBarrierRoot: { @@ -344,10 +355,16 @@ ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( ThunkParams params; switch (type) { case BakerReadBarrierKind::kField: - params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.offset_params.base_reg); - params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); - CheckValidReg(params.offset_params.holder_reg); + params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.field_params.base_reg); + params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); + CheckValidReg(params.field_params.holder_reg); + break; + case BakerReadBarrierKind::kArray: + params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.array_params.base_reg); + params.array_params.dummy = 0u; + DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); break; case BakerReadBarrierKind::kGcRoot: params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); @@ -363,6 +380,9 @@ ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), "Thunk type translation check."); + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray), + "Thunk type translation check."); static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), "Thunk type translation check."); @@ -394,7 +414,7 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32)); + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); __ Br(lr); // And return back to the function. // Note: The fake dependency is unnecessary for the slow path. } @@ -419,8 +439,8 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { // and return to the LDR instruction to load the reference. Otherwise, use introspection // to load the reference and call the entrypoint (in IP1) that performs further checks // on the reference and marks it if needed. - auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg); - auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg); + auto holder_reg = Register::GetXRegFromCode(key.GetFieldParams().holder_reg); + auto base_reg = Register::GetXRegFromCode(key.GetFieldParams().base_reg); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -444,11 +464,31 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { // Add null check slow path. The stack map is at the address pointed to by LR. __ Bind(&throw_npe); int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset)); + __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset)); __ Br(ip0); } break; } + case ThunkType::kBakerReadBarrierArray: { + auto base_reg = Register::GetXRegFromCode(key.GetArrayParams().base_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } case ThunkType::kBakerReadBarrierRoot: { // Check if the reference needs to be marked and if so (i.e. not null, not marked yet // and it does not have a forwarding address), call the correct introspection entrypoint; @@ -494,6 +534,7 @@ uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) { case ThunkType::kMethodCall: return kMaxMethodCallPositiveDisplacement; case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierArray: case ThunkType::kBakerReadBarrierRoot: return kMaxBcondPositiveDisplacement; } @@ -504,6 +545,7 @@ uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) { case ThunkType::kMethodCall: return kMaxMethodCallNegativeDisplacement; case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierArray: case ThunkType::kBakerReadBarrierRoot: return kMaxBcondNegativeDisplacement; } diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index 7887cea5e6..71ab70eda9 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -19,6 +19,7 @@ #include "base/array_ref.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { @@ -28,6 +29,7 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { public: enum class BakerReadBarrierKind : uint8_t { kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. kGcRoot, // GC root load. kLast }; @@ -40,6 +42,13 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BakerReadBarrierSecondRegField::Encode(holder_reg); } + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { CheckValidReg(root_reg); return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | @@ -68,14 +77,14 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 5u; diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index b4d35ab2a7..57ea886586 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -18,6 +18,7 @@ #include "linker/relative_patcher_test.h" #include "linker/arm64/relative_patcher_arm64.h" #include "lock_word.h" +#include "mirror/array-inl.h" #include "mirror/object.h" #include "oat_quick_method_header.h" @@ -46,9 +47,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kBlPlusMax = 0x95ffffffu; static constexpr uint32_t kBlMinusMax = 0x96000000u; - // LDR immediate, unsigned offset. + // LDR immediate, 32-bit, unsigned offset. static constexpr uint32_t kLdrWInsn = 0xb9400000u; + // LDR register, 32-bit, LSL #2. + static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u; + + // LDUR, 32-bit. + static constexpr uint32_t kLdurWInsn = 0xb8400000u; + // ADD/ADDS/SUB/SUBS immediate, 64-bit. static constexpr uint32_t kAddXInsn = 0x91000000u; static constexpr uint32_t kAddsXInsn = 0xb1000000u; @@ -68,7 +75,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. - static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011; + static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u; void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { CHECK_LE(pos, code->size()); @@ -188,7 +195,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> GenNops(size_t num_nops) { std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 4u); + result.reserve(num_nops * 4u); for (size_t i = 0; i != num_nops; ++i) { PushBackInsn(&result, kNopInsn); } @@ -228,7 +235,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } else { LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn; } - uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) + uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, // We take the sign bit from the disp, limiting disp to +- 2GiB. @@ -471,6 +478,14 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { return patcher->CompileThunk(key); } + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); @@ -488,7 +503,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { (static_cast<uint32_t>(output_[offset + 3]) << 24); } - void TestBakerField(uint32_t offset, uint32_t root_reg); + void TestBakerField(uint32_t offset, uint32_t ref_reg); }; const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { @@ -885,7 +900,7 @@ TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) -void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) { +void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -899,7 +914,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg uint32_t method_idx = 0u; for (uint32_t base_reg : valid_regs) { for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); ArrayRef<const uint8_t> code(raw_code); @@ -922,7 +937,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg ++method_idx; uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); ASSERT_EQ(kMethodCodeSize, expected_code.size()); ASSERT_TRUE( @@ -942,7 +957,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg if (holder_reg == base_reg) { // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. ASSERT_GE(output_.size() - gray_check_offset, 4u); - ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); gray_check_offset +=4u; } // Verify that the lock word for gray bit check is loaded from the holder address. @@ -955,12 +970,12 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg /* ip0 */ 16; EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); // Verify the gray bit check. - const uint32_t check_gray_bit_witout_offset = - 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f); + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu); // Verify the fake dependency. const uint32_t fake_dependency = - 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32 + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 (/* ip0 */ 16 << 16) | // Xm = ip0 (base_reg << 5) | // Xn = base_reg base_reg; // Xd = base_reg @@ -973,19 +988,19 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg } } -#define TEST_BAKER_FIELD(offset, root_reg) \ +#define TEST_BAKER_FIELD(offset, ref_reg) \ TEST_F(Arm64RelativePatcherTestDefault, \ - BakerOffset##offset##_##root_reg) { \ - TestBakerField(offset, root_reg); \ + BakerOffset##offset##_##ref_reg) { \ + TestBakerField(offset, ref_reg); \ } -TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15) -TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29) +TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15) +TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29) TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1046,7 +1061,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 0; const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1076,7 +1091,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1132,7 +1147,88 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFr ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); } -TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { +TEST_F(Arm64RelativePatcherTestDefault, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + // LR and SP/ZR are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg; + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); + uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + static constexpr size_t kGrayCheckInsns = 5; + ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + const uint32_t load_lock_word = + kLdurWInsn | + ((offset & 0x1ffu) << 12) | + (base_reg << 5) | + /* ip0 */ 16; + EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset)); + // Verify the gray bit check. + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu); + // Verify the fake dependency. + const uint32_t fake_dependency = + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 + (/* ip0 */ 16 << 16) | // Xm = ip0 + (base_reg << 5) | // Xn = base_reg + base_reg; // Xd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); + } +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -1180,7 +1276,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. ASSERT_GE(output_.size() - thunk_offset, 4u); - ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); // Do not check the rest of the implementation. // The next thunk follows on the next aligned offset. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4629c54a17..eee832a732 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -91,6 +91,7 @@ constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; // Flags controlling the use of link-time generated thunks for Baker read barriers. constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; // Some instructions have special requirements for a temporary, for example @@ -2759,6 +2760,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); @@ -2769,12 +2771,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { obj.W(), offset, maybe_temp, - /* needs_null_check */ true, + /* needs_null_check */ false, /* use_load_acquire */ false); } else { Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); } } else { // General case. @@ -5928,9 +5930,9 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk // checks the reference and jumps to the entrypoint if needed. @@ -6054,24 +6056,24 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins !use_load_acquire && !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // // temp = Thread::Current()->pReadBarrierMarkIntrospection - // lr = &return_address; + // lr = &gray_return_address; // if (temp != nullptr) { // goto field_thunk<holder_reg, base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> root = *(obj+offset); + // GcRoot<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6141,16 +6143,74 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = + linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + + // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(ip1, MemOperand(tr, entry_point_offset)); + __ Add(temp.X(), obj.X(), Operand(data_offset)); + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // Array cells are never volatile variables, therefore array loads // never use Load-Acquire instructions on ARM64. const bool use_load_acquire = false; - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 7d1f146587..c39e5f4d3b 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -247,6 +247,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, access->GetType() == Primitive::kPrimNot) { // For object arrays, the read barrier instrumentation requires // the original array pointer. + // TODO: This can be relaxed for Baker CC. return false; } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 8368026e92..36c7df70ce 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -5377,10 +5377,16 @@ class HArrayGet FINAL : public HExpression<2> { } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // TODO: We can be smarter here. - // Currently, the array access is always preceded by an ArrayLength or a NullCheck - // which generates the implicit null check. There are cases when these can be removed - // to produce better code. If we ever add optimizations to do so we should allow an - // implicit check here (as long as the address falls in the first page). + // Currently, unless the array is the result of NewArray, the array access is always + // preceded by some form of null NullCheck necessary for the bounds check, usually + // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for + // dynamic BCE. There are cases when these could be removed to produce better code. + // If we ever add optimizations to do so we should allow an implicit check here + // (as long as the address falls in the first page). + // + // As an example of such fancy optimization, we could eliminate BoundsCheck for + // a = cond ? new int[1] : null; + // a[0]; // The Phi does not need bounds check for either input. return false; } |