diff options
44 files changed, 3538 insertions, 455 deletions
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 1a5d79ce70..f2ccc4bcd3 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -16,9 +16,15 @@ #include "linker/arm/relative_patcher_thumb2.h" +#include "arch/arm/asm_support_arm.h" #include "art_method.h" #include "compiled_method.h" -#include "utils/arm/assembler_thumb2.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "lock_word.h" +#include "mirror/object.h" +#include "mirror/array-inl.h" +#include "read_barrier.h" +#include "utils/arm/assembler_arm_vixl.h" namespace art { namespace linker { @@ -32,6 +38,12 @@ static constexpr int32_t kPcDisplacement = 4; constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; +// Maximum positive and negative displacement for a conditional branch measured from the patch +// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured +// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) +constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; +constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; + Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) : ArmBaseRelativePatcher(provider, kThumb2) { } @@ -84,29 +96,244 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, SetInsn32(code, literal_offset, insn); } -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; +void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + DCHECK_ALIGNED(patch_offset, 2u); + uint32_t literal_offset = patch.LiteralOffset(); + DCHECK_ALIGNED(literal_offset, 2u); + DCHECK_LT(literal_offset, code->size()); + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) + ThunkKey key = GetBakerReadBarrierKey(patch); + if (kIsDebugBuild) { + // Check that the next instruction matches the expected LDR. + switch (key.GetType()) { + case ThunkType::kBakerReadBarrierField: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate) with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (key.GetFieldParams().base_reg << 16)); + break; + } + case ThunkType::kBakerReadBarrierArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (key.GetArrayParams().base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case ThunkType::kBakerReadBarrierRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate) with correct root_reg. + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (key.GetRootParams().root_reg << 12)); + break; + } + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + UNREACHABLE(); + } + } + uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); + DCHECK_ALIGNED(target_offset, 4u); + uint32_t disp = target_offset - (patch_offset + kPcDisplacement); + DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. + insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". + ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". + ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". + ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". + ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". + SetInsn32(code, literal_offset, insn); } ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; - UNREACHABLE(); + const LinkerPatch& patch) { + DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); + uint32_t value = patch.GetBakerCustomValue1(); + BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value); + ThunkParams params; + switch (type) { + case BakerReadBarrierKind::kField: + params.field_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.field_params.base_reg); + params.field_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); + CheckValidReg(params.field_params.holder_reg); + break; + case BakerReadBarrierKind::kArray: + params.array_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.array_params.base_reg); + params.array_params.dummy = 0u; + DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); + break; + case BakerReadBarrierKind::kGcRoot: + params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); + CheckValidReg(params.root_params.root_reg); + params.root_params.dummy = 0u; + DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); + break; + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type); + UNREACHABLE(); + } + constexpr uint8_t kTypeTranslationOffset = 1u; + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), + "Thunk type translation check."); + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kArray) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierArray), + "Thunk type translation check."); + static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == + static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), + "Thunk type translation check."); + return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset), + params); +} + +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, + vixl::aarch32::Register base_reg, + vixl::aarch32::MemOperand& lock_word, + vixl::aarch32::Label* slow_path) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target */ false); + static_assert( + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, + "Field and array LDR offsets must be the same to reuse the same code."); + // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. } std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - DCHECK(key.GetType() == ThunkType::kMethodCall); - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. + using namespace vixl::aarch32; // NOLINT(build/namespaces) ArenaPool pool; ArenaAllocator arena(&pool); - arm::Thumb2Assembler assembler(&arena); - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); + arm::ArmVIXLAssembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl::aarch32::pc, + vixl::aarch32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + __ Bkpt(0); + break; + case ThunkType::kBakerReadBarrierField: { + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) + // that performs further checks on the reference and marks it if needed. + Register holder_reg(key.GetFieldParams().holder_reg); + Register base_reg(key.GetFieldParams().base_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have + // emitted an explicit null check before the load. Otherwise, we need to null-check + // the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch32::Label throw_npe; + if (holder_reg.Is(base_reg)) { + __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); + } + vixl::aarch32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET; + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint. + if (holder_reg.Is(base_reg)) { + // Add null check slow path. The stack map is at the address pointed to by LR. + __ Bind(&throw_npe); + int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); + __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); + __ Bx(ip); + } + break; + } + case ThunkType::kBakerReadBarrierArray: { + Register base_reg(key.GetArrayParams().base_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create + __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case ThunkType::kBakerReadBarrierRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + Register root_reg(key.GetRootParams().root_reg); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to art_quick_read_barrier_mark_introspection_gc_roots. + Register ep_reg(kBakerCcEntrypointRegister); + __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + } + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); @@ -114,14 +341,28 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { return thunk_code; } +#undef __ + uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallPositiveDisplacement; + switch (type) { + case ThunkType::kMethodCall: + return kMaxMethodCallPositiveDisplacement; + case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierArray: + case ThunkType::kBakerReadBarrierRoot: + return kMaxBcondPositiveDisplacement; + } } uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallNegativeDisplacement; + switch (type) { + case ThunkType::kMethodCall: + return kMaxMethodCallNegativeDisplacement; + case ThunkType::kBakerReadBarrierField: + case ThunkType::kBakerReadBarrierArray: + case ThunkType::kBakerReadBarrierRoot: + return kMaxBcondNegativeDisplacement; + } } void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index ab37802d0f..9eb06894d3 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -17,6 +17,10 @@ #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#include "arch/arm/registers_arm.h" +#include "base/array_ref.h" +#include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { @@ -24,6 +28,37 @@ namespace linker { class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: + static constexpr uint32_t kBakerCcEntrypointRegister = 4u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast + }; + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); void PatchCall(std::vector<uint8_t>* code, @@ -45,6 +80,22 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBitsForRegister = 4u; + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister); + } + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index f08270d934..8bc3eb4505 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -14,8 +14,12 @@ * limitations under the License. */ +#include "base/casts.h" #include "linker/relative_patcher_test.h" #include "linker/arm/relative_patcher_thumb2.h" +#include "lock_word.h" +#include "mirror/array-inl.h" +#include "mirror/object.h" #include "oat_quick_method_header.h" namespace art { @@ -34,13 +38,99 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; static const uint32_t kPcInsnOffset; + // The PC in Thumb mode is 4 bytes after the instruction location. + static constexpr uint32_t kPcAdjustment = 4u; + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + static constexpr uint32_t kBlPlus0 = 0xf000f800u; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; - static constexpr uint32_t kBlMinusMax = 0xf400d000; + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; + static constexpr uint32_t kBlMinusMax = 0xf400d000u; + + // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. + static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. + static constexpr uint32_t kLdrWInsn = 0xf8d00000u; + + // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. + static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; + + // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. + static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; + + // NOP instructions. + static constexpr uint32_t kNopInsn = 0xbf00u; + static constexpr uint32_t kNopWInsn = 0xf3af8000u; + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + if (IsUint<16>(insn)) { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } else { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn >> 16), + static_cast<uint8_t>(insn >> 24), + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + } + + void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { + InsertInsn(code, code->size(), insn); + } + + std::vector<uint8_t> GenNops(size_t num_nops) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u); + for (size_t i = 0; i != num_nops; ++i) { + PushBackInsn(&result, kNopInsn); + } + return result; + } + + std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { + std::vector<uint8_t> raw_code; + size_t number_of_16_bit_insns = + std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); + raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); + for (uint32_t insn : insns) { + PushBackInsn(&raw_code, insn); + } + return raw_code; + } + + uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { + if (!IsAligned<2u>(bne_offset)) { + LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; + return 0xffffffffu; // Fails code diff later. + } + if (!IsAligned<2u>(target_offset)) { + LOG(ERROR) << "Unaligned target_offset: " << target_offset; + return 0xffffffffu; // Fails code diff later. + } + uint32_t diff = target_offset - bne_offset - kPcAdjustment; + DCHECK_ALIGNED(diff, 2u); + if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { + LOG(ERROR) << "Target out of range: " << diff; + return 0xffffffffu; // Fails code diff later. + } + return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 + | (((diff >> 12) & 0x3fu) << 16) // imm6 + | (((diff >> 18) & 1) << 13) // J1 + | (((diff >> 19) & 1) << 11) // J2 + | (((diff >> 20) & 1) << 26); // S + } bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, @@ -125,19 +215,57 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> result; result.reserve(num_nops * 2u + 4u); for (size_t i = 0; i != num_nops; ++i) { - result.push_back(0x00); - result.push_back(0xbf); + PushBackInsn(&result, kNopInsn); } - result.push_back(static_cast<uint8_t>(bl >> 16)); - result.push_back(static_cast<uint8_t>(bl >> 24)); - result.push_back(static_cast<uint8_t>(bl)); - result.push_back(static_cast<uint8_t>(bl >> 8)); + PushBackInsn(&result, bl); return result; } void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { + const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); + auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); + auto* patcher = down_cast<Thumb2RelativePatcher*>(patcher_.get()); + ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); + return patcher->CompileThunk(key); + } + + uint32_t GetOutputInsn32(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 4u); + return (static_cast<uint32_t>(output_[offset]) << 16) | + (static_cast<uint32_t>(output_[offset + 1]) << 24) | + (static_cast<uint32_t>(output_[offset + 2]) << 0) | + (static_cast<uint32_t>(output_[offset + 3]) << 8); + } + + uint16_t GetOutputInsn16(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 2u); + return (static_cast<uint32_t>(output_[offset]) << 0) | + (static_cast<uint32_t>(output_[offset + 1]) << 8); + } + + void TestBakerField(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -164,7 +292,7 @@ const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) { dex_cache_arrays_begin_ = dex_cache_arrays_begin; - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset), LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset), }; @@ -175,7 +303,7 @@ void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_ void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { constexpr uint32_t kStringIndex = 1u; string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), }; @@ -214,7 +342,7 @@ void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker } TEST_F(Thumb2RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -227,11 +355,11 @@ TEST_F(Thumb2RelativePatcherTest, CallSelf) { } TEST_F(Thumb2RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { + const LinkerPatch method2_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); @@ -254,7 +382,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOther) { } TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -274,7 +402,7 @@ TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), }; @@ -303,7 +431,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -325,7 +453,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -347,7 +475,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -382,7 +510,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -445,5 +573,535 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } +void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 4 * KB); + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +#define TEST_BAKER_FIELD(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffset##offset##_##ref_reg) { \ + TestBakerField(offset, ref_reg); \ + } + +TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7) +TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11) + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { + // One thunk in the middle with maximum distance branches to it from both sides. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kLiteralOffset2 = 4; + static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch + // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - (kLiteralOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { + // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction + // earlier, so the thunk is emitted before the filler. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + Link(); + + const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { + // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded + // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kReachableFromOffset2 = 4; + constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; + static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // If not for the extra NOP, this would allow reaching the thunk from the BNE + // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take + // PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - (kReachableFromOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_last = + BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = + RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + ASSERT_GE(output_.size() - thunk_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + ASSERT_GT(offset, -256); + const uint32_t load_lock_word = + kLdrNegativeOffset | + (-offset & 0xffu) | + (base_reg << 16) | + (/* IP */ 12 << 12); + EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency. + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 4u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + if (root_reg < 8) { + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + } else { + ASSERT_GE(output_.size() - thunk_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + } + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { + // Test 1MiB of patches to the same thunk to stress-test different large offsets. + // (The low bits are not that important but the location of the high bits is easy to get wrong.) + std::vector<uint8_t> code; + code.reserve(1 * MB); + const size_t num_patches = 1 * MB / 8u; + std::vector<LinkerPatch> patches; + patches.reserve(num_patches); + const uint32_t ldr = + kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0); + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&code, ldr); + PushBackInsn(&code, kBneWPlus0); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + ASSERT_EQ(1 * MB, code.size()); + ASSERT_EQ(num_patches, patches.size()); + AddCompiledMethod(MethodRef(1u), + ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + + // The thunk is right after the method code. + DCHECK_ALIGNED(1 * MB, kArmAlignment); + std::vector<uint8_t> expected_code; + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&expected_code, ldr); + PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { + // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` + // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily + // hold when we're reserving thunks of different sizes. This test exposes the situation + // by using Baker thunks and a method call thunk. + + // Add a method call patch that can reach to method 1 offset + 16MiB. + uint32_t method_idx = 0u; + constexpr size_t kMethodCallLiteralOffset = 2u; + constexpr uint32_t kMissingMethodIdx = 2u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); + const LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), + }; + ArrayRef<const uint8_t> code1(raw_code1); + ++method_idx; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); + + // Skip kMissingMethodIdx. + ++method_idx; + ASSERT_EQ(kMissingMethodIdx, method_idx); + // Add a method with the right size that the method code for the next one starts 1MiB + // after code for method 1. + size_t filler_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB + // before the currently scheduled MaxNextOffset() for the method call thunk. + for (uint32_t i = 0; i != 14; ++i) { + filler_size = 1 * MB - sizeof(OatQuickMethodHeader); + filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + } + + // Add 2 Baker GC root patches to the last method, one that would allow the thunk at + // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the + // second that needs it kArmAlignment after that. Given the size of the GC root thunk + // is more than the space required by the method call thunk plus kArmAlignment, + // this pushes the first GC root thunk's pending MaxNextOffset() before the method call + // thunk's pending MaxNextOffset() which needs to be adjusted. + ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, + CompileBakerGcRootThunk(/* root_reg */ 0).size()); + static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); + constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; + constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; + // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. + const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); + const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); + const std::vector<uint8_t> last_method_raw_code = RawCode({ + kNopInsn, // Padding before first GC root read barrier. + ldr1, kBneWPlus0, // First GC root LDR with read barrier. + ldr2, kBneWPlus0, // Second GC root LDR with read barrier. + }); + uint32_t encoded_data1 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); + uint32_t encoded_data2 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); + const LinkerPatch last_method_patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), + ArrayRef<const uint8_t>(last_method_raw_code), + ArrayRef<const LinkerPatch>(last_method_patches)); + + // The main purpose of the test is to check that Link() does not cause a crash. + Link(); + + ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); +} + } // namespace linker } // namespace art diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ebd578c5cd..3c6e277ff9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -25,6 +26,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -60,10 +62,41 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const Register kBakerCcEntrypointRegister = R4; + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() +static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) { + DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK_EQ(kBakerCcEntrypointRegister, + instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>()); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { + DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit()); + __ BindTrackedLabel(bne_label); + Label placeholder_label; + __ b(&placeholder_label, NE); // Placeholder, patched at link-time. + __ Bind(&placeholder_label); +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -1962,6 +1995,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -5281,7 +5315,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5747,11 +5792,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5863,8 +5932,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6701,6 +6782,13 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6880,6 +6968,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7050,6 +7141,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -7923,48 +8017,93 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg); + Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -7982,6 +8121,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct } } +void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } +} + void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -7991,6 +8140,69 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = temp.AsRegister<Register>(); + DCHECK_NE(base, kBakerCcEntrypointRegister); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + Register ref_reg = ref.AsRegister<Register>(); + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + __ LoadFromOffset(kLoadWord, ref_reg, base, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8011,9 +8223,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = index.AsRegister<Register>(); + Register ref_reg = ref.AsRegister<Register>(); + Register data_reg = temp.AsRegister<Register>(); + DCHECK_NE(data_reg, kBakerCcEntrypointRegister); + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + __ AddConstant(data_reg, obj, data_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8379,6 +8649,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( return &patches->back(); } +Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index) { return boot_image_string_patches_.GetOrCreate( @@ -8445,7 +8720,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8479,6 +8755,10 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 86f2f21df7..6f007e100b 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -488,6 +488,11 @@ class CodeGeneratorARM : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + Label* NewBakerReadBarrierPatch(uint32_t custom_data); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); @@ -503,6 +508,10 @@ class CodeGeneratorARM : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -616,6 +625,13 @@ class CodeGeneratorARM : public CodeGenerator { Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + Label label; + uint32_t custom_data; + }; + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, @@ -648,6 +664,8 @@ class CodeGeneratorARM : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d65b3276c8..82f34b6375 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -16,6 +16,7 @@ #include "code_generator_arm_vixl.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -24,6 +25,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics_arm_vixl.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const vixl32::Register kBakerCcEntrypointRegister = r4; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -88,6 +104,56 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " +static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, + HInstruction* instruction) { + DCHECK(temps->IsAvailable(ip)); + temps->Exclude(ip); + DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); + DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { + ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + +class EmitAdrCode { + public: + EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) + : assembler_(assembler), rd_(rd), label_(label) { + ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + adr_location_ = assembler->GetCursorOffset(); + assembler->adr(EncodingSize(Wide), rd, label); + } + + ~EmitAdrCode() { + DCHECK(label_->IsBound()); + // The ADR emitted by the assembler does not set the Thumb mode bit we need. + // TODO: Maybe extend VIXL to allow ADR for return address? + uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_); + // Expecting ADR encoding T3 with `(offset & 1) == 0`. + DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26. + DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23. + DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15. + DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`. + // Add the Thumb mode bit. + raw_adr[2] |= 0x01u; + } + + private: + ArmVIXLMacroAssembler* const assembler_; + vixl32::Register rd_; + vixl32::Label* const label_; + int32_t adr_location_; +}; + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -2012,6 +2078,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -5289,7 +5356,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5756,11 +5834,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5871,8 +5973,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { vixl32::Register out = OutputRegister(instruction); if (index.IsConstant()) { @@ -6762,6 +6876,13 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6938,6 +7059,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7100,6 +7224,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7998,48 +8125,96 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard(GetVIXLAssembler(), + 4 * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset)); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -8057,6 +8232,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } +} + void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, @@ -8066,6 +8251,75 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + DCHECK(!base.Is(kBakerCcEntrypointRegister)); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base.GetCode(), + obj.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8086,9 +8340,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer. + DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + __ Add(data_reg, obj, Operand(data_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8497,6 +8815,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } +vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral( const DexFile& dex_file, dex::StringIndex string_index) { @@ -8574,7 +8897,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8608,6 +8932,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 7281069102..7df1e296ee 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -572,6 +572,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, @@ -588,6 +593,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -712,6 +721,13 @@ class CodeGeneratorARMVIXL : public CodeGenerator { VIXLUInt32Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + vixl::aarch32::Label label; + uint32_t custom_data; + }; + VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); @@ -749,6 +765,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 6a140458a6..aea901dec7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -338,19 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // Ensure the inputs of `instruction` are defined in a block of the graph. for (HInstruction* input : instruction->GetInputs()) { - const HInstructionList& list = input->IsPhi() - ? input->GetBlock()->GetPhis() - : input->GetBlock()->GetInstructions(); if (input->GetBlock() == nullptr) { AddError(StringPrintf("Input %d of instruction %d is not in any " "basic block of the control-flow graph.", input->GetId(), instruction->GetId())); - } else if (!list.Contains(input)) { - AddError(StringPrintf("Input %d of instruction %d is not defined " - "in a basic block of the control-flow graph.", - input->GetId(), - instruction->GetId())); + } else { + const HInstructionList& list = input->IsPhi() + ? input->GetBlock()->GetPhis() + : input->GetBlock()->GetInstructions(); + if (!list.Contains(input)) { + AddError(StringPrintf("Input %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + input->GetId(), + instruction->GetId())); + } } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 750f9cc213..c784171fd7 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1648,6 +1648,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fd8a37ae05..77d870bec2 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2026,6 +2026,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 0ed8a35338..0f24e81be2 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -652,6 +652,9 @@ class ArmAssembler : public Assembler { virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; + // ADR instruction loading register for branching to the label. + virtual void AdrCode(Register rt, Label* label) = 0; + // Memory barriers. virtual void dmb(DmbOptions flavor) = 0; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 1e71d06b49..d7096b3c87 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -214,14 +214,14 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { DCHECK_GE(dest_end, src_end); for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { Fixup* fixup = &*i; + size_t old_fixup_location = fixup->GetLocation(); if (fixup->GetOriginalSize() == fixup->GetSize()) { // The size of this Fixup didn't change. To avoid moving the data // in small chunks, emit the code to its original position. - fixup->Emit(&buffer_, adjusted_code_size); fixup->Finalize(dest_end - src_end); + fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size); } else { // Move the data between the end of the fixup and src_end to its final location. - size_t old_fixup_location = fixup->GetLocation(); size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); size_t data_size = src_end - src_begin; size_t dest_begin = dest_end - data_size; @@ -230,7 +230,7 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { dest_end = dest_begin - fixup->GetSizeInBytes(); // Finalize the Fixup and emit the data to the new location. fixup->Finalize(dest_end - src_end); - fixup->Emit(&buffer_, adjusted_code_size); + fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size); } } CHECK_EQ(src_end, dest_end); @@ -1895,6 +1895,9 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kCbxz48Bit: return 6u; + case kCodeAddr4KiB: + return 4u; + case kLiteral1KiB: return 2u; case kLiteral4KiB: @@ -1973,6 +1976,15 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff -= 2; // Extra CMP Rn, #0, 16-bit. break; + case kCodeAddr4KiB: + // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. + DCHECK_ALIGNED(diff, 2); + diff += location_ & 2; + // Add the Thumb mode bit. + diff += 1; + break; + case kLiteral1KiB: case kLiteral4KiB: case kLongOrFPLiteral1KiB: @@ -1987,8 +1999,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; - case kLiteral1MiB: case kLiteral64KiB: + case kLiteral1MiB: case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. @@ -2041,6 +2053,10 @@ bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const { // We don't support conditional branches beyond +-1MiB. return true; + case kCodeAddr4KiB: + // ADR uses the aligned PC and as such the offset cannot be calculated early. + return false; + case kLiteral1KiB: case kLiteral4KiB: case kLiteral64KiB: @@ -2087,6 +2103,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) // We don't support conditional branches beyond +-1MiB. break; + case kCodeAddr4KiB: + // We don't support Code address ADR beyond +4KiB. + break; + case kLiteral1KiB: DCHECK(!IsHighRegister(rn_)); if (IsUint<10>(GetOffset(current_code_size))) { @@ -2159,13 +2179,15 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) return current_code_size - old_code_size; } -void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { +void Thumb2Assembler::Fixup::Emit(uint32_t emit_location, + AssemblerBuffer* buffer, + uint32_t code_size) const { switch (GetSize()) { case kBranch16Bit: { DCHECK(type_ == kUnconditional || type_ == kConditional); DCHECK_EQ(type_ == kConditional, cond_ != AL); int16_t encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kBranch32Bit: { @@ -2180,15 +2202,15 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK_NE(encoding & B12, 0); encoding ^= B14 | B12; } - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kCbxz16Bit: { DCHECK(type_ == kCompareAndBranchXZero); int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kCbxz32Bit: { @@ -2196,8 +2218,8 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2, b_encoding); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2, b_encoding); break; } case kCbxz48Bit: { @@ -2205,24 +2227,32 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); - buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16); + buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kCodeAddr4KiB: { + DCHECK(type_ == kLoadCodeAddr); + int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral1KiB: { DCHECK(type_ == kLoadLiteralNarrow); int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteral4KiB: { DCHECK(type_ == kLoadLiteralNarrow); // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral64KiB: { @@ -2242,11 +2272,11 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralFar: { @@ -2256,36 +2286,36 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralAddr1KiB: { DCHECK(type_ == kLoadLiteralAddr); int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteralAddr4KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteralAddr64KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); break; } case kLiteralAddrFar: { @@ -2294,29 +2324,29 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); break; } case kLongOrFPLiteral1KiB: { int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLongOrFPLiteral64KiB: { int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLongOrFPLiteralFar: { @@ -2325,13 +2355,13 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } } @@ -3331,6 +3361,19 @@ void Thumb2Assembler::bx(Register rm, Condition cond) { } +void Thumb2Assembler::AdrCode(Register rt, Label* label) { + uint32_t pc = buffer_.Size(); + FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt)); + CHECK(!label->IsBound()); + // ADR target must be an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); + Emit16(0); + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); +} + + void Thumb2Assembler::Push(Register rd, Condition cond) { str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond); } @@ -3405,7 +3448,7 @@ void Thumb2Assembler::Bind(Label* label) { break; } } - last_fixup.Emit(&buffer_, buffer_.Size()); + last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size()); fixups_.pop_back(); } } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 1c495aa7a7..5c36110cf6 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -268,6 +268,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; + // ADR instruction loading register for branching to the label, including the Thumb mode bit. + void AdrCode(Register rt, Label* label) OVERRIDE; + virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, @@ -377,6 +380,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { force_32bit_ = true; } + void Allow16Bit() { + force_32bit_ = false; + } + // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This // will generate a fixup. JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; @@ -422,6 +429,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { kUnconditionalLink, // BL. kUnconditionalLinkX, // BLX. kCompareAndBranchXZero, // cbz/cbnz. + kLoadCodeAddr, // Get address of a code label, used for Baker read barriers. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. kLoadLiteralAddr, // Load address of literal (used for jump table). @@ -442,6 +450,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + // ADR variants. + kCodeAddr4KiB, // ADR rX, <label>; label must be after the ADR but within 4KiB range. + // Multi-instruction expansion is not supported. + // Load integer literal variants. // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. kLiteral1KiB, @@ -492,6 +504,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { cond, kCompareAndBranchXZero, kCbxz16Bit, location); } + // Code address. + static Fixup LoadCodeAddress(uint32_t location, Register rt) { + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadCodeAddr, kCodeAddr4KiB, location); + } + // Load narrow literal. static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || @@ -550,6 +568,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { switch (GetOriginalSize()) { case kBranch32Bit: case kCbxz48Bit: + case kCodeAddr4KiB: case kLiteralFar: case kLiteralAddrFar: case kLongOrFPLiteralFar: @@ -623,7 +642,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Emit the branch instruction into the assembler buffer. This does the // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const; private: Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, @@ -903,6 +922,24 @@ class Thumb2Assembler FINAL : public ArmAssembler { FixupId last_fixup_id_; }; +class ScopedForce32Bit { + public: + explicit ScopedForce32Bit(Thumb2Assembler* assembler) + : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { + assembler->Force32Bit(); + } + + ~ScopedForce32Bit() { + if (!old_force_32bit_) { + assembler_->Allow16Bit(); + } + } + + private: + Thumb2Assembler* const assembler_; + const bool old_force_32bit_; +}; + } // namespace arm } // namespace art diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc index f1c6f67a7c..cf453b9a16 100644 --- a/dexlayout/dex_ir.cc +++ b/dexlayout/dex_ir.cc @@ -281,6 +281,16 @@ void Collections::ReadEncodedValue( item->SetDouble(conv.d); break; } + case DexFile::kDexAnnotationMethodType: { + const uint32_t proto_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetProtoId(GetProtoId(proto_index)); + break; + } + case DexFile::kDexAnnotationMethodHandle: { + const uint32_t method_handle_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetMethodHandle(GetMethodHandle(method_handle_index)); + break; + } case DexFile::kDexAnnotationString: { const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); item->SetStringId(GetStringId(string_index)); @@ -766,6 +776,64 @@ ClassData* Collections::CreateClassData( return class_data; } +void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) { + // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems. + const DexFile::MapList* map = + reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset()); + for (uint32_t i = 0; i < map->size_; ++i) { + const DexFile::MapItem* item = map->list_ + i; + switch (item->type_) { + case DexFile::kDexTypeCallSiteIdItem: + SetCallSiteIdsOffset(item->offset_); + break; + case DexFile::kDexTypeMethodHandleItem: + SetMethodHandleItemsOffset(item->offset_); + break; + default: + break; + } + } + // Populate MethodHandleItems first (CallSiteIds may depend on them). + for (uint32_t i = 0; i < dex_file.NumMethodHandles(); i++) { + CreateMethodHandleItem(dex_file, i); + } + // Populate CallSiteIds. + for (uint32_t i = 0; i < dex_file.NumCallSiteIds(); i++) { + CreateCallSiteId(dex_file, i); + } +} + +void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) { + const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i); + const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_; + EncodedArrayItem* call_site_item = + CreateEncodedArrayItem(disk_call_item_ptr, disk_call_site_id.data_off_); + + CallSiteId* call_site_id = new CallSiteId(call_site_item); + call_site_ids_.AddIndexedItem(call_site_id, CallSiteIdsOffset() + i * CallSiteId::ItemSize(), i); +} + +void Collections::CreateMethodHandleItem(const DexFile& dex_file, uint32_t i) { + const DexFile::MethodHandleItem& disk_method_handle = dex_file.GetMethodHandle(i); + uint16_t index = disk_method_handle.field_or_method_idx_; + DexFile::MethodHandleType type = + static_cast<DexFile::MethodHandleType>(disk_method_handle.method_handle_type_); + bool is_invoke = type == DexFile::MethodHandleType::kInvokeStatic || + type == DexFile::MethodHandleType::kInvokeInstance || + type == DexFile::MethodHandleType::kInvokeConstructor; + static_assert(DexFile::MethodHandleType::kLast == DexFile::MethodHandleType::kInvokeConstructor, + "Unexpected method handle types."); + IndexedItem* field_or_method_id; + if (is_invoke) { + field_or_method_id = GetMethodId(index); + } else { + field_or_method_id = GetFieldId(index); + } + MethodHandleItem* method_handle = new MethodHandleItem(type, field_or_method_id); + method_handle_items_.AddIndexedItem( + method_handle, MethodHandleItemsOffset() + i * MethodHandleItem::ItemSize(), i); +} + static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) { return 0; } @@ -823,6 +891,16 @@ static const FileSectionDescriptor kFileSectionDescriptors[] = { &dex_ir::Collections::ClassDefsSize, &dex_ir::Collections::ClassDefsOffset }, { + "CallSiteId", + DexFile::kDexTypeCallSiteIdItem, + &dex_ir::Collections::CallSiteIdsSize, + &dex_ir::Collections::CallSiteIdsOffset + }, { + "MethodHandle", + DexFile::kDexTypeMethodHandleItem, + &dex_ir::Collections::MethodHandleItemsSize, + &dex_ir::Collections::MethodHandleItemsOffset + }, { "StringData", DexFile::kDexTypeStringDataItem, &dex_ir::Collections::StringDatasSize, diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h index cad039550a..5692eb2b39 100644 --- a/dexlayout/dex_ir.h +++ b/dexlayout/dex_ir.h @@ -35,6 +35,7 @@ class AnnotationItem; class AnnotationsDirectoryItem; class AnnotationSetItem; class AnnotationSetRefList; +class CallSiteId; class ClassData; class ClassDef; class CodeItem; @@ -47,6 +48,7 @@ class FieldItem; class Header; class MapList; class MapItem; +class MethodHandleItem; class MethodId; class MethodItem; class ParameterAnnotation; @@ -65,6 +67,8 @@ static constexpr size_t kProtoIdItemSize = 12; static constexpr size_t kFieldIdItemSize = 8; static constexpr size_t kMethodIdItemSize = 8; static constexpr size_t kClassDefItemSize = 32; +static constexpr size_t kCallSiteIdItemSize = 4; +static constexpr size_t kMethodHandleItemSize = 8; // Visitor support class AbstractDispatcher { @@ -79,6 +83,8 @@ class AbstractDispatcher { virtual void Dispatch(const ProtoId* proto_id) = 0; virtual void Dispatch(const FieldId* field_id) = 0; virtual void Dispatch(const MethodId* method_id) = 0; + virtual void Dispatch(const CallSiteId* call_site_id) = 0; + virtual void Dispatch(const MethodHandleItem* method_handle_item) = 0; virtual void Dispatch(ClassData* class_data) = 0; virtual void Dispatch(ClassDef* class_def) = 0; virtual void Dispatch(FieldItem* field_item) = 0; @@ -165,6 +171,9 @@ class Collections { std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); } std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); } std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); } + std::vector<std::unique_ptr<CallSiteId>>& CallSiteIds() { return call_site_ids_.Collection(); } + std::vector<std::unique_ptr<MethodHandleItem>>& MethodHandleItems() + { return method_handle_items_.Collection(); } std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas() { return string_datas_.Collection(); } std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); } @@ -189,6 +198,10 @@ class Collections { void CreateFieldId(const DexFile& dex_file, uint32_t i); void CreateMethodId(const DexFile& dex_file, uint32_t i); void CreateClassDef(const DexFile& dex_file, uint32_t i); + void CreateCallSiteId(const DexFile& dex_file, uint32_t i); + void CreateMethodHandleItem(const DexFile& dex_file, uint32_t i); + + void CreateCallSitesAndMethodHandles(const DexFile& dex_file); TypeList* CreateTypeList(const DexFile::TypeList* type_list, uint32_t offset); EncodedArrayItem* CreateEncodedArrayItem(const uint8_t* static_data, uint32_t offset); @@ -207,6 +220,8 @@ class Collections { FieldId* GetFieldId(uint32_t index) { return FieldIds()[index].get(); } MethodId* GetMethodId(uint32_t index) { return MethodIds()[index].get(); } ClassDef* GetClassDef(uint32_t index) { return ClassDefs()[index].get(); } + CallSiteId* GetCallSiteId(uint32_t index) { return CallSiteIds()[index].get(); } + MethodHandleItem* GetMethodHandle(uint32_t index) { return MethodHandleItems()[index].get(); } StringId* GetStringIdOrNullPtr(uint32_t index) { return index == DexFile::kDexNoIndex ? nullptr : GetStringId(index); @@ -221,6 +236,8 @@ class Collections { uint32_t FieldIdsOffset() const { return field_ids_.GetOffset(); } uint32_t MethodIdsOffset() const { return method_ids_.GetOffset(); } uint32_t ClassDefsOffset() const { return class_defs_.GetOffset(); } + uint32_t CallSiteIdsOffset() const { return call_site_ids_.GetOffset(); } + uint32_t MethodHandleItemsOffset() const { return method_handle_items_.GetOffset(); } uint32_t StringDatasOffset() const { return string_datas_.GetOffset(); } uint32_t TypeListsOffset() const { return type_lists_.GetOffset(); } uint32_t EncodedArrayItemsOffset() const { return encoded_array_items_.GetOffset(); } @@ -240,6 +257,9 @@ class Collections { void SetFieldIdsOffset(uint32_t new_offset) { field_ids_.SetOffset(new_offset); } void SetMethodIdsOffset(uint32_t new_offset) { method_ids_.SetOffset(new_offset); } void SetClassDefsOffset(uint32_t new_offset) { class_defs_.SetOffset(new_offset); } + void SetCallSiteIdsOffset(uint32_t new_offset) { call_site_ids_.SetOffset(new_offset); } + void SetMethodHandleItemsOffset(uint32_t new_offset) + { method_handle_items_.SetOffset(new_offset); } void SetStringDatasOffset(uint32_t new_offset) { string_datas_.SetOffset(new_offset); } void SetTypeListsOffset(uint32_t new_offset) { type_lists_.SetOffset(new_offset); } void SetEncodedArrayItemsOffset(uint32_t new_offset) @@ -262,6 +282,8 @@ class Collections { uint32_t FieldIdsSize() const { return field_ids_.Size(); } uint32_t MethodIdsSize() const { return method_ids_.Size(); } uint32_t ClassDefsSize() const { return class_defs_.Size(); } + uint32_t CallSiteIdsSize() const { return call_site_ids_.Size(); } + uint32_t MethodHandleItemsSize() const { return method_handle_items_.Size(); } uint32_t StringDatasSize() const { return string_datas_.Size(); } uint32_t TypeListsSize() const { return type_lists_.Size(); } uint32_t EncodedArrayItemsSize() const { return encoded_array_items_.Size(); } @@ -288,6 +310,8 @@ class Collections { CollectionVector<FieldId> field_ids_; CollectionVector<MethodId> method_ids_; CollectionVector<ClassDef> class_defs_; + CollectionVector<CallSiteId> call_site_ids_; + CollectionVector<MethodHandleItem> method_handle_items_; CollectionMap<StringData> string_datas_; CollectionMap<TypeList> type_lists_; @@ -603,8 +627,10 @@ class EncodedValue { void SetDouble(double d) { u_.double_val_ = d; } void SetStringId(StringId* string_id) { u_.string_val_ = string_id; } void SetTypeId(TypeId* type_id) { u_.type_val_ = type_id; } + void SetProtoId(ProtoId* proto_id) { u_.proto_val_ = proto_id; } void SetFieldId(FieldId* field_id) { u_.field_val_ = field_id; } void SetMethodId(MethodId* method_id) { u_.method_val_ = method_id; } + void SetMethodHandle(MethodHandleItem* method_handle) { u_.method_handle_val_ = method_handle; } void SetEncodedArray(EncodedArrayItem* encoded_array) { encoded_array_.reset(encoded_array); } void SetEncodedAnnotation(EncodedAnnotation* encoded_annotation) { encoded_annotation_.reset(encoded_annotation); } @@ -619,8 +645,10 @@ class EncodedValue { double GetDouble() const { return u_.double_val_; } StringId* GetStringId() const { return u_.string_val_; } TypeId* GetTypeId() const { return u_.type_val_; } + ProtoId* GetProtoId() const { return u_.proto_val_; } FieldId* GetFieldId() const { return u_.field_val_; } MethodId* GetMethodId() const { return u_.method_val_; } + MethodHandleItem* GetMethodHandle() const { return u_.method_handle_val_; } EncodedArrayItem* GetEncodedArray() const { return encoded_array_.get(); } EncodedAnnotation* GetEncodedAnnotation() const { return encoded_annotation_.get(); } @@ -639,8 +667,10 @@ class EncodedValue { double double_val_; StringId* string_val_; TypeId* type_val_; + ProtoId* proto_val_; FieldId* field_val_; MethodId* method_val_; + MethodHandleItem* method_handle_val_; } u_; std::unique_ptr<EncodedArrayItem> encoded_array_; std::unique_ptr<EncodedAnnotation> encoded_annotation_; @@ -1087,6 +1117,48 @@ class AnnotationsDirectoryItem : public Item { DISALLOW_COPY_AND_ASSIGN(AnnotationsDirectoryItem); }; +class CallSiteId : public IndexedItem { + public: + explicit CallSiteId(EncodedArrayItem* call_site_item) : call_site_item_(call_site_item) { + size_ = kCallSiteIdItemSize; + } + ~CallSiteId() OVERRIDE { } + + static size_t ItemSize() { return kCallSiteIdItemSize; } + + EncodedArrayItem* CallSiteItem() const { return call_site_item_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + EncodedArrayItem* call_site_item_; + + DISALLOW_COPY_AND_ASSIGN(CallSiteId); +}; + +class MethodHandleItem : public IndexedItem { + public: + MethodHandleItem(DexFile::MethodHandleType method_handle_type, IndexedItem* field_or_method_id) + : method_handle_type_(method_handle_type), + field_or_method_id_(field_or_method_id) { + size_ = kMethodHandleItemSize; + } + ~MethodHandleItem() OVERRIDE { } + + static size_t ItemSize() { return kMethodHandleItemSize; } + + DexFile::MethodHandleType GetMethodHandleType() const { return method_handle_type_; } + IndexedItem* GetFieldOrMethodId() const { return field_or_method_id_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + DexFile::MethodHandleType method_handle_type_; + IndexedItem* field_or_method_id_; + + DISALLOW_COPY_AND_ASSIGN(MethodHandleItem); +}; + // TODO(sehr): implement MapList. class MapList : public Item { public: diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc index d0c5bf964e..8eb726a64a 100644 --- a/dexlayout/dex_ir_builder.cc +++ b/dexlayout/dex_ir_builder.cc @@ -72,6 +72,8 @@ Header* DexIrBuilder(const DexFile& dex_file) { } // MapItem. collections.SetMapListOffset(disk_header.map_off_); + // CallSiteIds and MethodHandleItems. + collections.CreateCallSitesAndMethodHandles(dex_file); CheckAndSetRemainingOffsets(dex_file, &collections); @@ -115,6 +117,14 @@ static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* co CHECK_EQ(item->size_, collections->ClassDefsSize()); CHECK_EQ(item->offset_, collections->ClassDefsOffset()); break; + case DexFile::kDexTypeCallSiteIdItem: + CHECK_EQ(item->size_, collections->CallSiteIdsSize()); + CHECK_EQ(item->offset_, collections->CallSiteIdsOffset()); + break; + case DexFile::kDexTypeMethodHandleItem: + CHECK_EQ(item->size_, collections->MethodHandleItemsSize()); + CHECK_EQ(item->offset_, collections->MethodHandleItemsOffset()); + break; case DexFile::kDexTypeMapList: CHECK_EQ(item->size_, 1u); CHECK_EQ(item->offset_, disk_header.map_off_); diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc index 7ffa38bfd4..e1b828ca52 100644 --- a/dexlayout/dex_writer.cc +++ b/dexlayout/dex_writer.cc @@ -151,6 +151,12 @@ size_t DexWriter::WriteEncodedValue(dex_ir::EncodedValue* encoded_value, size_t length = EncodeDoubleValue(encoded_value->GetDouble(), buffer); start = 8 - length; break; + case DexFile::kDexAnnotationMethodType: + length = EncodeUIntValue(encoded_value->GetProtoId()->GetIndex(), buffer); + break; + case DexFile::kDexAnnotationMethodHandle: + length = EncodeUIntValue(encoded_value->GetMethodHandle()->GetIndex(), buffer); + break; case DexFile::kDexAnnotationString: length = EncodeUIntValue(encoded_value->GetStringId()->GetIndex(), buffer); break; @@ -485,6 +491,27 @@ void DexWriter::WriteClasses() { } } +void DexWriter::WriteCallSites() { + uint32_t call_site_off[1]; + for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id : + header_->GetCollections().CallSiteIds()) { + call_site_off[0] = call_site_id->CallSiteItem()->GetOffset(); + Write(call_site_off, call_site_id->GetSize(), call_site_id->GetOffset()); + } +} + +void DexWriter::WriteMethodHandles() { + uint16_t method_handle_buff[4]; + for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle : + header_->GetCollections().MethodHandleItems()) { + method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType()); + method_handle_buff[1] = 0; // unused. + method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex(); + method_handle_buff[3] = 0; // unused. + Write(method_handle_buff, method_handle->GetSize(), method_handle->GetOffset()); + } +} + struct MapItemContainer { MapItemContainer(uint32_t type, uint32_t size, uint32_t offset) : type_(type), size_(size), offset_(offset) { } @@ -528,6 +555,14 @@ void DexWriter::WriteMapItem() { queue.push(MapItemContainer(DexFile::kDexTypeClassDefItem, collection.ClassDefsSize(), collection.ClassDefsOffset())); } + if (collection.CallSiteIdsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeCallSiteIdItem, collection.CallSiteIdsSize(), + collection.CallSiteIdsOffset())); + } + if (collection.MethodHandleItemsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeMethodHandleItem, + collection.MethodHandleItemsSize(), collection.MethodHandleItemsOffset())); + } // Data section. queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset())); @@ -618,10 +653,8 @@ void DexWriter::WriteHeader() { uint32_t class_defs_off = collections.ClassDefsOffset(); buffer[16] = class_defs_size; buffer[17] = class_defs_off; - uint32_t data_off = class_defs_off + class_defs_size * dex_ir::ClassDef::ItemSize(); - uint32_t data_size = file_size - data_off; - buffer[18] = data_size; - buffer[19] = data_off; + buffer[18] = header_->DataSize(); + buffer[19] = header_->DataOffset(); Write(buffer, 20 * sizeof(uint32_t), offset); } @@ -640,6 +673,8 @@ void DexWriter::WriteMemMap() { WriteDebugInfoItems(); WriteCodeItems(); WriteClasses(); + WriteCallSites(); + WriteMethodHandles(); WriteMapItem(); WriteHeader(); } diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h index fb76e5ccfc..b396adf126 100644 --- a/dexlayout/dex_writer.h +++ b/dexlayout/dex_writer.h @@ -59,6 +59,8 @@ class DexWriter { void WriteDebugInfoItems(); void WriteCodeItems(); void WriteClasses(); + void WriteCallSites(); + void WriteMethodHandles(); void WriteMapItem(); void WriteHeader(); diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc index 49c818593b..c577b6e105 100644 --- a/dexlayout/dexdiag.cc +++ b/dexlayout/dexdiag.cc @@ -296,21 +296,20 @@ static void ProcessOneDexMapping(uint64_t* pagemap, DisplayDexStatistics(start_page, end_page, section_resident_pages, sections, printer); } -static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { +static bool IsVdexFileMapping(const std::string& mapped_name) { // Confirm that the map is from a vdex file. static const char* suffixes[] = { ".vdex" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { + std::string vdex_name = pm_map_name(map); // Extract all the dex files from the vdex file. std::string error_msg; std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name, @@ -334,6 +333,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { << ": error " << error_msg << std::endl; + return false; } // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; @@ -385,21 +385,19 @@ static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printe printer->PrintSkipLine(); } -static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { - // Confirm that the map is from a vdex file. +static bool IsOatFileMapping(const std::string& mapped_name) { + // Confirm that the map is from an oat file. static const char* suffixes[] = { ".odex", ".oat" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; size_t len; @@ -511,14 +509,20 @@ static int DexDiagMain(int argc, char* argv[]) { if (!FilterByNameContains(mapped_file_name, name_filters)) { continue; } - match_found = true; - if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { - return EXIT_FAILURE; - } else if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { - return EXIT_FAILURE; + if (IsVdexFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; + } else if (IsOatFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; } } if (!match_found) { + std::cerr << "No relevant memory maps were found." << std::endl; return EXIT_FAILURE; } #endif diff --git a/dexlayout/dexdiag_test.cc b/dexlayout/dexdiag_test.cc index d0d2af14e5..a0b3f32756 100644 --- a/dexlayout/dexdiag_test.cc +++ b/dexlayout/dexdiag_test.cc @@ -55,7 +55,6 @@ class DexDiagTest : public CommonRuntimeTest { } std::unique_ptr<OatFile> OpenOatAndVdexFiles() { - std::cout << "YO!" << std::endl; // Open the core.oat file. // This is a little convoluted because we have to // get the location of the default core image (.../framework/core.oat), diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index a857976021..1a5e39f0f7 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -71,6 +71,11 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET } // namespace arm namespace arm64 { @@ -83,6 +88,11 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET } // namespace arm64 namespace mips { diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index c03bcae526..f1f1766ad4 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -24,6 +24,28 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 192 +// The offset from art_quick_read_barrier_mark_introspection to the array switch cases, +// i.e. art_quick_read_barrier_mark_introspection_arrays. +#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 +// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint, +// i.e. art_quick_read_barrier_mark_introspection_gc_roots. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0 + +// The offset of the reference load LDR from the return address in LR for field loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8 +#else +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4 +#endif +// The offset of the reference load LDR from the return address in LR for array loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8 +#else +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4 +#endif +// The offset of the reference load LDR from the return address in LR for GC root loads. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8 + // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index d21d0c07a2..6b7247773a 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -17,6 +17,7 @@ #include <math.h> #include <string.h> +#include "arch/arm/asm_support_arm.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" #include "entrypoints/quick/quick_default_externs.h" @@ -51,6 +52,10 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); + // Used by soft float. // Single-precision FP arithmetics. extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] @@ -80,6 +85,22 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + + // Check that array switch cases are at appropriate offsets from the introspection entrypoint. + // For the alignment check, strip the Thumb mode bit. + DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u); + intptr_t array_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff); + // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint. + intptr_t gc_roots_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); + // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12. + // We're using the entry to hold a pointer to the introspection entrypoint instead. + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index a277edfa29..fa21208fcb 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2146,6 +2146,216 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 +// Helper macros for Baker CC read barrier mark introspection (BRBMI). +.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register + \macro_for_register r0 + \macro_for_register r1 + \macro_for_register r2 + \macro_for_register r3 + \macro_for_reserved_register // R4 is reserved for the entrypoint address. + \macro_for_register r5 + \macro_for_register r6 + \macro_for_register r7 + \macro_for_register r8 + \macro_for_register r9 + \macro_for_register r10 + \macro_for_register r11 +.endm + +.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register + BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register + \macro_for_reserved_register // IP is reserved. + \macro_for_reserved_register // SP is reserved. + \macro_for_reserved_register // LR is reserved. + \macro_for_reserved_register // PC is reserved. +.endm + +.macro BRBMI_RETURN_SWITCH_CASE reg +.Lmark_introspection_return_switch_case_\reg: + mov \reg, ip + bx lr +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE +.Lmark_introspection_return_switch_case_bad: + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg + .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 +.endm + +#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#error "Array and field introspection code sharing requires same LDR offset." +#endif +.macro BRBMI_ARRAY_LOAD index_reg + ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. + b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. + .balign 8 // Add padding to 8 bytes. +.endm + +.macro BRBMI_BKPT_FILL_4B + bkpt 0 + bkpt 0 +.endm + +.macro BRBMI_BKPT_FILL_8B + BRBMI_BKPT_FILL_4B + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_SLOW_PATH ldr_offset + push {r0-r3, r7, lr} // Save return address and caller-save registers. + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r7, 16 + .cfi_rel_offset lr, 20 + + mov r0, ip // Pass the reference. + vpush {s0-s15} // save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 + bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) + vpop {s0-s15} // restore floating-point registers + .cfi_adjust_cfa_offset -64 + mov ip, r0 // Move reference to ip in preparation for return switch. + + pop {r0-r3, r7, lr} // Restore registers. + .cfi_adjust_cfa_offset -24 + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r7 + .cfi_restore lr + + // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset + 2)] + lsr r4, r4, #12 // Extract `ref_reg`. + b .Lmark_introspection_return_switch +.endm + + /* + * Use introspection to load a reference from the same address as the LDR + * instruction in generated code would load (unless loaded by the thunk, + * see below), call ReadBarrier::Mark() with that reference if needed + * and return it in the same register as the LDR instruction would load. + * + * The entrypoint is called through a thunk that differs across load kinds. + * For field and array loads the LDR instruction in generated code follows + * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1 + * is an adjustment for the Thumb mode bit in LR, and the thunk knows the + * holder and performs the gray bit check, returning to the LDR instruction + * if the object is not gray, so this entrypoint no longer needs to know + * anything about the holder. For GC root loads, the LDR instruction in + * generated code precedes the branch to the thunk, i.e. the LDR is at + * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and + * the thunk does not do the gray bit check. + * + * For field accesses and array loads with a constant index the thunk loads + * the reference into IP using introspection and calls the main entrypoint, + * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, + * the passed reference is poisoned. + * + * For array accesses with non-constant index, the thunk inserts the bits + * 0-5 of the LDR instruction to the entrypoint address, effectively + * calculating a switch case label based on the index register (bits 0-3) + * and adding an extra offset (bits 4-5 hold the shift which is always 2 + * for reference loads) to differentiate from the main entrypoint, then + * moves the base register to IP and jumps to the switch case. Therefore + * we need to align the main entrypoint to 512 bytes, accounting for + * a 256-byte offset followed by 16 array entrypoints starting at + * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR + * (register) and a branch to the main entrypoint. + * + * For GC root accesses we cannot use the main entrypoint because of the + * different offset where the LDR instruction in generated code is located. + * (And even with heap poisoning enabled, GC roots are not poisoned.) + * To re-use the same entrypoint pointer in generated code, we make sure + * that the gc root entrypoint (a copy of the entrypoint with a different + * offset for introspection loads) is located at a known offset (128 bytes, + * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main + * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves + * the root register to IP and jumps to the customized entrypoint, + * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also + * performs all the fast-path checks, so we need just the slow path. + * + * The code structure is + * art_quick_read_barrier_mark_introspection: + * Over 128 bytes for the main entrypoint code. + * Padding to 192 bytes if needed. + * art_quick_read_barrier_mark_introspection_gc_roots: + * GC root entrypoint code. + * Padding to 256 bytes if needed. + * art_quick_read_barrier_mark_introspection_arrays: + * Exactly 128 bytes for array load switch cases (16x2 instructions). + */ + .balign 512 +ENTRY art_quick_read_barrier_mark_introspection + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // If reference is null, just return it in the right register. + cmp ip, #0 + beq .Lmark_introspection_return + // Use R4 as temp and check the mark bit of the reference. + ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lmark_introspection_unmarked +.Lmark_introspection_return: + // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)] + lsr r4, r4, #12 // Extract `ref_reg`. +.Lmark_introspection_return_switch: + tbb [pc, r4] // Jump to the switch case. +.Lmark_introspection_return_table: + BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .balign 16 + BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE + + .balign 16 +.Lmark_introspection_unmarked: + // Check if the top two bits are one, if this is the case it is a forwarding address. +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in + // the highest bits and the "forwarding address" state to have all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + bhs .Lmark_introspection_forwarding_address + BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + + .balign 8 +.Lmark_introspection_forwarding_address: + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + b .Lmark_introspection_return + + .balign 64 + .thumb_func + .type art_quick_read_barrier_mark_introspection_gc_roots, #function + .hidden art_quick_read_barrier_mark_introspection_gc_roots + .global art_quick_read_barrier_mark_introspection_gc_roots +art_quick_read_barrier_mark_introspection_gc_roots: + BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET + + .balign 256 + .thumb_func + .type art_quick_read_barrier_mark_introspection_arrays, #function + .hidden art_quick_read_barrier_mark_introspection_arrays + .global art_quick_read_barrier_mark_introspection_arrays +art_quick_read_barrier_mark_introspection_arrays: + BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B +END art_quick_read_barrier_mark_introspection + .extern artInvokePolymorphic ENTRY art_quick_invoke_polymorphic SETUP_SAVE_REFS_AND_ARGS_FRAME r2 diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc index dbfcdfe874..4847f38489 100644 --- a/runtime/compiler_filter.cc +++ b/runtime/compiler_filter.cc @@ -140,6 +140,26 @@ CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter f UNREACHABLE(); } +CompilerFilter::Filter CompilerFilter::GetSafeModeFilterFrom(Filter filter) { + // For safe mode, we should not return a filter that generates AOT compiled + // code. + switch (filter) { + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: + return filter; + + case CompilerFilter::kSpace: + case CompilerFilter::kSpeed: + case CompilerFilter::kEverything: + case CompilerFilter::kSpaceProfile: + case CompilerFilter::kSpeedProfile: + case CompilerFilter::kEverythingProfile: + return CompilerFilter::kQuicken; + } + UNREACHABLE(); +} bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) { return current >= target; diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h index 9cb54b14b6..f802439053 100644 --- a/runtime/compiler_filter.h +++ b/runtime/compiler_filter.h @@ -75,6 +75,9 @@ class CompilerFilter FINAL { // Returns a non-profile-guided version of the given filter. static Filter GetNonProfileDependentFilterFrom(Filter filter); + // Returns a filter suitable for safe mode. + static Filter GetSafeModeFilterFrom(Filter filter); + // Returns true if the 'current' compiler filter is considered at least as // good as the 'target' compilation type. // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc index a59165f958..383f4e3666 100644 --- a/runtime/compiler_filter_test.cc +++ b/runtime/compiler_filter_test.cc @@ -28,6 +28,13 @@ static void TestCompilerFilterName(CompilerFilter::Filter filter, std::string na EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter)); } +static void TestSafeModeFilter(CompilerFilter::Filter expected, std::string name) { + CompilerFilter::Filter parsed; + EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed)); + EXPECT_EQ(expected, CompilerFilter::GetSafeModeFilterFrom(parsed)); +} + + // Verify the dexopt status values from dalvik.system.DexFile // match the OatFileAssistant::DexOptStatus values. TEST(CompilerFilterTest, ParseCompilerFilter) { @@ -47,4 +54,17 @@ TEST(CompilerFilterTest, ParseCompilerFilter) { EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter)); } +TEST(CompilerFilterTest, SafeModeFilter) { + TestSafeModeFilter(CompilerFilter::kAssumeVerified, "assume-verified"); + TestSafeModeFilter(CompilerFilter::kExtract, "extract"); + TestSafeModeFilter(CompilerFilter::kVerify, "verify"); + TestSafeModeFilter(CompilerFilter::kQuicken, "quicken"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything"); +} + } // namespace art diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h index e58c6f541e..6765407949 100644 --- a/runtime/dex2oat_environment_test.h +++ b/runtime/dex2oat_environment_test.h @@ -42,7 +42,16 @@ class Dex2oatEnvironmentTest : public CommonRuntimeTest { CommonRuntimeTest::SetUp(); // Create a scratch directory to work from. - scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest"; + + // Get the realpath of the android data. The oat dir should always point to real location + // when generating oat files in dalvik-cache. This avoids complicating the unit tests + // when matching the expected paths. + UniqueCPtr<const char[]> android_data_real(realpath(android_data_.c_str(), nullptr)); + ASSERT_TRUE(android_data_real != nullptr) + << "Could not get the realpath of the android data" << android_data_ << strerror(errno); + + scratch_dir_.assign(android_data_real.get()); + scratch_dir_ += "/Dex2oatEnvironmentTest"; ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700)); // Create a subdirectory in scratch for odex files. diff --git a/runtime/dex_file.h b/runtime/dex_file.h index 1b18d21cb1..36c734197a 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -92,8 +92,8 @@ class DexFile { uint32_t method_ids_off_; // file offset of MethodIds array uint32_t class_defs_size_; // number of ClassDefs uint32_t class_defs_off_; // file offset of ClassDef array - uint32_t data_size_; // unused - uint32_t data_off_; // unused + uint32_t data_size_; // size of data section + uint32_t data_off_; // file offset of data section // Decode the dex magic version uint32_t GetVersion() const; diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc index 24b1abbad4..3c8243a6c5 100644 --- a/runtime/dexopt_test.cc +++ b/runtime/dexopt_test.cc @@ -45,18 +45,23 @@ void DexoptTest::PostRuntimeCreate() { } void DexoptTest::GenerateOatForTest(const std::string& dex_location, - const std::string& oat_location, - CompilerFilter::Filter filter, - bool relocate, - bool pic, - bool with_alternate_image) { + const std::string& oat_location_in, + CompilerFilter::Filter filter, + bool relocate, + bool pic, + bool with_alternate_image) { std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA)); std::string dalvik_cache_tmp = dalvik_cache + ".redirected"; - + std::string oat_location = oat_location_in; if (!relocate) { // Temporarily redirect the dalvik cache so dex2oat doesn't find the // relocated image file. ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno); + // If the oat location is in dalvik cache, replace the cache path with the temporary one. + size_t pos = oat_location.find(dalvik_cache); + if (pos != std::string::npos) { + oat_location = oat_location.replace(pos, dalvik_cache.length(), dalvik_cache_tmp); + } } std::vector<std::string> args; @@ -90,6 +95,7 @@ void DexoptTest::GenerateOatForTest(const std::string& dex_location, if (!relocate) { // Restore the dalvik cache if needed. ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno); + oat_location = oat_location_in; } // Verify the odex file was generated as expected. diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 0617dae1ae..77554e8b30 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -622,6 +622,31 @@ static jstring DexFile_getNonProfileGuidedCompilerFilter(JNIEnv* env, return env->NewStringUTF(new_filter_str.c_str()); } +static jstring DexFile_getSafeModeCompilerFilter(JNIEnv* env, + jclass javeDexFileClass ATTRIBUTE_UNUSED, + jstring javaCompilerFilter) { + ScopedUtfChars compiler_filter(env, javaCompilerFilter); + if (env->ExceptionCheck()) { + return nullptr; + } + + CompilerFilter::Filter filter; + if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) { + return javaCompilerFilter; + } + + CompilerFilter::Filter new_filter = CompilerFilter::GetSafeModeFilterFrom(filter); + + // Filter stayed the same, return input. + if (filter == new_filter) { + return javaCompilerFilter; + } + + // Create a new string object and return. + std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter); + return env->NewStringUTF(new_filter_str.c_str()); +} + static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) { const OatFile* oat_file = nullptr; std::vector<const DexFile*> dex_files; @@ -695,6 +720,9 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, getNonProfileGuidedCompilerFilter, "(Ljava/lang/String;)Ljava/lang/String;"), + NATIVE_METHOD(DexFile, + getSafeModeCompilerFilter, + "(Ljava/lang/String;)Ljava/lang/String;"), NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"), NATIVE_METHOD(DexFile, getDexFileStatus, "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"), diff --git a/runtime/oat.h b/runtime/oat.h index 9b2227bc0c..924f77c65b 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Revert concurrent graying for immune spaces. - static constexpr uint8_t kOatVersion[] = { '1', '2', '2', '\0' }; + static constexpr uint8_t kOatVersion[] = { '1', '2', '3', '\0' }; // ARM Baker link-time thunks. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index eafa77f1a2..603bbbf8bd 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -68,19 +68,34 @@ std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStat OatFileAssistant::OatFileAssistant(const char* dex_location, const InstructionSet isa, bool load_executable) - : OatFileAssistant(dex_location, nullptr, isa, load_executable) -{ } - -OatFileAssistant::OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable) : isa_(isa), load_executable_(load_executable), odex_(this, /*is_oat_location*/ false), oat_(this, /*is_oat_location*/ true) { CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location"; - dex_location_.assign(dex_location); + + // Try to get the realpath for the dex location. + // + // This is OK with respect to dalvik cache naming scheme because we never + // generate oat files starting from symlinks which go into dalvik cache. + // (recall that the oat files in dalvik cache are encoded by replacing '/' + // with '@' in the path). + // The boot image oat files (which are symlinked in dalvik-cache) are not + // loaded via the oat file assistant. + // + // The only case when the dex location may resolve to a different path + // is for secondary dex files (e.g. /data/user/0 symlinks to /data/data and + // the app is free to create its own internal layout). Related to this it is + // worthwhile to mention that installd resolves the secondary dex location + // before calling dex2oat. + UniqueCPtr<const char[]> dex_location_real(realpath(dex_location, nullptr)); + if (dex_location_real != nullptr) { + dex_location_.assign(dex_location_real.get()); + } else { + // If we can't get the realpath of the location there's not much point in trying to move on. + PLOG(ERROR) << "Could not get the realpath of dex_location " << dex_location; + return; + } if (load_executable_ && isa != kRuntimeISA) { LOG(WARNING) << "OatFileAssistant: Load executable specified, " @@ -98,15 +113,27 @@ OatFileAssistant::OatFileAssistant(const char* dex_location, } // Get the oat filename. - if (oat_location != nullptr) { - oat_.Reset(oat_location); + std::string oat_file_name; + if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { + oat_.Reset(oat_file_name); } else { - std::string oat_file_name; - if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { - oat_.Reset(oat_file_name); - } else { - LOG(WARNING) << "Failed to determine oat file name for dex location " + LOG(WARNING) << "Failed to determine oat file name for dex location " << dex_location_ << ": " << error_msg; + } + + // Check if the dex directory is writable. + // This will be needed in most uses of OatFileAssistant and so it's OK to + // compute it eagerly. (the only use which will not make use of it is + // OatFileAssistant::GetStatusDump()) + size_t pos = dex_location_.rfind('/'); + if (pos == std::string::npos) { + LOG(WARNING) << "Failed to determine dex file parent directory: " << dex_location_; + } else { + std::string parent = dex_location_.substr(0, pos); + if (access(parent.c_str(), W_OK) == 0) { + dex_parent_writable_ = true; + } else { + VLOG(oat) << "Dex parent of " << dex_location_ << " is not writable: " << strerror(errno); } } } @@ -139,12 +166,17 @@ bool OatFileAssistant::Lock(std::string* error_msg) { CHECK(error_msg != nullptr); CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired"; - const std::string* oat_file_name = oat_.Filename(); - if (oat_file_name == nullptr) { - *error_msg = "Failed to determine lock file"; - return false; - } - std::string lock_file_name = *oat_file_name + ".flock"; + // Note the lock will only succeed for secondary dex files and in test + // environment. + // + // The lock *will fail* for all primary apks in a production environment. + // The app does not have permissions to create locks next to its dex location + // (be it system, data or vendor parition). We also cannot use the odex or + // oat location for the same reasoning. + // + // This is best effort and if it fails it's unlikely that we will be able + // to generate oat files anyway. + std::string lock_file_name = dex_location_ + "." + GetInstructionSetString(isa_) + ".flock"; if (!flock_.Init(lock_file_name.c_str(), error_msg)) { unlink(lock_file_name.c_str()); @@ -170,7 +202,7 @@ static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter, CHECK(filter != nullptr); CHECK(error_msg != nullptr); - *filter = CompilerFilter::kDefaultCompilerFilter; + *filter = OatFileAssistant::kDefaultCompilerFilterForDexLoading; for (StringPiece option : Runtime::Current()->GetCompilerOptions()) { if (option.starts_with("--compiler-filter=")) { const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data(); @@ -207,7 +239,7 @@ OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) { case kDex2OatForBootImage: case kDex2OatForRelocation: case kDex2OatForFilter: - return GenerateOatFile(error_msg); + return GenerateOatFileNoChecks(info, error_msg); } UNREACHABLE(); } @@ -479,8 +511,110 @@ OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& return kOatUpToDate; } -OatFileAssistant::ResultOfAttemptToUpdate -OatFileAssistant::GenerateOatFile(std::string* error_msg) { +static bool DexLocationToOdexNames(const std::string& location, + InstructionSet isa, + std::string* odex_filename, + std::string* oat_dir, + std::string* isa_dir, + std::string* error_msg) { + CHECK(odex_filename != nullptr); + CHECK(error_msg != nullptr); + + // The odex file name is formed by replacing the dex_location extension with + // .odex and inserting an oat/<isa> directory. For example: + // location = /foo/bar/baz.jar + // odex_location = /foo/bar/oat/<isa>/baz.odex + + // Find the directory portion of the dex location and add the oat/<isa> + // directory. + size_t pos = location.rfind('/'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no directory."; + return false; + } + std::string dir = location.substr(0, pos+1); + // Add the oat directory. + dir += "oat"; + if (oat_dir != nullptr) { + *oat_dir = dir; + } + // Add the isa directory + dir += "/" + std::string(GetInstructionSetString(isa)); + if (isa_dir != nullptr) { + *isa_dir = dir; + } + + // Get the base part of the file without the extension. + std::string file = location.substr(pos+1); + pos = file.rfind('.'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no extension."; + return false; + } + std::string base = file.substr(0, pos); + + *odex_filename = dir + "/" + base + ".odex"; + return true; +} + +// Prepare a subcomponent of the odex directory. +// (i.e. create and set the expected permissions on the path `dir`). +static bool PrepareDirectory(const std::string& dir, std::string* error_msg) { + struct stat dir_stat; + if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &dir_stat)) == 0) { + // The directory exists. Check if it is indeed a directory. + if (!S_ISDIR(dir_stat.st_mode)) { + *error_msg = dir + " is not a dir"; + return false; + } else { + // The dir is already on disk. + return true; + } + } + + // Failed to stat. We need to create the directory. + if (errno != ENOENT) { + *error_msg = "Could not stat isa dir " + dir + ":" + strerror(errno); + return false; + } + + mode_t mode = S_IRWXU | S_IXGRP | S_IXOTH; + if (mkdir(dir.c_str(), mode) != 0) { + *error_msg = "Could not create dir " + dir + ":" + strerror(errno); + return false; + } + if (chmod(dir.c_str(), mode) != 0) { + *error_msg = "Could not create the oat dir " + dir + ":" + strerror(errno); + return false; + } + return true; +} + +// Prepares the odex directory for the given dex location. +static bool PrepareOdexDirectories(const std::string& dex_location, + const std::string& expected_odex_location, + InstructionSet isa, + std::string* error_msg) { + std::string actual_odex_location; + std::string oat_dir; + std::string isa_dir; + if (!DexLocationToOdexNames( + dex_location, isa, &actual_odex_location, &oat_dir, &isa_dir, error_msg)) { + return false; + } + DCHECK_EQ(expected_odex_location, actual_odex_location); + + if (!PrepareDirectory(oat_dir, error_msg)) { + return false; + } + if (!PrepareDirectory(isa_dir, error_msg)) { + return false; + } + return true; +} + +OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks( + OatFileAssistant::OatFileInfo& info, std::string* error_msg) { CHECK(error_msg != nullptr); Runtime* runtime = Runtime::Current(); @@ -490,22 +624,37 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (oat_.Filename() == nullptr) { + if (info.Filename() == nullptr) { *error_msg = "Generation of oat file for dex location " + dex_location_ + " not attempted because the oat file name could not be determined."; return kUpdateNotAttempted; } - const std::string& oat_file_name = *oat_.Filename(); + const std::string& oat_file_name = *info.Filename(); const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex"); // dex2oat ignores missing dex files and doesn't report an error. // Check explicitly here so we can detect the error properly. // TODO: Why does dex2oat behave that way? - if (!OS::FileExists(dex_location_.c_str())) { - *error_msg = "Dex location " + dex_location_ + " does not exists."; + struct stat dex_path_stat; + if (TEMP_FAILURE_RETRY(stat(dex_location_.c_str(), &dex_path_stat)) != 0) { + *error_msg = "Could not access dex location " + dex_location_ + ":" + strerror(errno); return kUpdateNotAttempted; } + // If this is the odex location, we need to create the odex file layout (../oat/isa/..) + if (!info.IsOatLocation()) { + if (!PrepareOdexDirectories(dex_location_, oat_file_name, isa_, error_msg)) { + return kUpdateNotAttempted; + } + } + + // Set the permissions for the oat and the vdex files. + // The user always gets read and write while the group and others propagate + // the reading access of the original dex file. + mode_t file_mode = S_IRUSR | S_IWUSR | + (dex_path_stat.st_mode & S_IRGRP) | + (dex_path_stat.st_mode & S_IROTH); + std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str())); if (vdex_file.get() == nullptr) { *error_msg = "Generation of oat file " + oat_file_name @@ -514,7 +663,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(vdex_file->Fd(), 0644) != 0) { + if (fchmod(vdex_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the vdex file " + vdex_file_name + " could not be made world readable."; @@ -528,7 +677,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(oat_file->Fd(), 0644) != 0) { + if (fchmod(oat_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the oat file could not be made world readable."; oat_file->Erase(); @@ -563,8 +712,8 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateFailed; } - // Mark that the oat file has changed and we should try to reload. - oat_.Reset(); + // Mark that the odex file has changed and we should try to reload. + info.Reset(); return kUpdateSucceeded; } @@ -623,35 +772,7 @@ bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location, InstructionSet isa, std::string* odex_filename, std::string* error_msg) { - CHECK(odex_filename != nullptr); - CHECK(error_msg != nullptr); - - // The odex file name is formed by replacing the dex_location extension with - // .odex and inserting an oat/<isa> directory. For example: - // location = /foo/bar/baz.jar - // odex_location = /foo/bar/oat/<isa>/baz.odex - - // Find the directory portion of the dex location and add the oat/<isa> - // directory. - size_t pos = location.rfind('/'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no directory."; - return false; - } - std::string dir = location.substr(0, pos+1); - dir += "oat/" + std::string(GetInstructionSetString(isa)); - - // Get the base part of the file without the extension. - std::string file = location.substr(pos+1); - pos = file.rfind('.'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no extension."; - return false; - } - std::string base = file.substr(0, pos); - - *odex_filename = dir + "/" + base + ".odex"; - return true; + return DexLocationToOdexNames(location, isa, odex_filename, nullptr, nullptr, error_msg); } bool OatFileAssistant::DexLocationToOatFilename(const std::string& location, @@ -752,8 +873,45 @@ const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() { } OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() { - bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen; - return use_oat ? oat_ : odex_; + // TODO(calin): Document the side effects of class loading when + // running dalvikvm command line. + if (dex_parent_writable_) { + // If the parent of the dex file is writable it means that we can + // create the odex file. In this case we unconditionally pick the odex + // as the best oat file. This corresponds to the regular use case when + // apps gets installed or when they load private, secondary dex file. + // For apps on the system partition the odex location will not be + // writable and thus the oat location might be more up to date. + return odex_; + } + + // We cannot write to the odex location. This must be a system app. + + // If the oat location is usable take it. + if (oat_.IsUseable()) { + return oat_; + } + + // The oat file is not usable but the odex file might be up to date. + // This is an indication that we are dealing with an up to date prebuilt + // (that doesn't need relocation). + if (odex_.Status() == kOatUpToDate) { + return odex_; + } + + // The oat file is not usable and the odex file is not up to date. + // However we have access to the original dex file which means we can make + // the oat location up to date. + if (HasOriginalDexFiles()) { + return oat_; + } + + // We got into the worst situation here: + // - the oat location is not usable + // - the prebuild odex location is not up to date + // - and we don't have the original dex file anymore (stripped). + // Pick the odex if it exists, or the oat if not. + return (odex_.Status() == kOatCannotOpen) ? oat_ : odex_; } std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) { diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index b84e711daa..7e2385ec6c 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -47,6 +47,11 @@ class ImageSpace; // dex location is in the boot class path. class OatFileAssistant { public: + // The default compile filter to use when optimizing dex file at load time if they + // are out of date. + static const CompilerFilter::Filter kDefaultCompilerFilterForDexLoading = + CompilerFilter::kQuicken; + enum DexOptNeeded { // No dexopt should (or can) be done to update the apk/jar. // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0 @@ -117,13 +122,6 @@ class OatFileAssistant { const InstructionSet isa, bool load_executable); - // Constructs an OatFileAssistant, providing an explicit target oat_location - // to use instead of the standard oat location. - OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable); - ~OatFileAssistant(); // Returns true if the dex location refers to an element of the boot class @@ -232,16 +230,6 @@ class OatFileAssistant { // Returns the status of the oat file for the dex location. OatStatus OatFileStatus(); - // Generate the oat file from the dex file using the current runtime - // compiler options. - // This does not check the current status before attempting to generate the - // oat file. - // - // If the result is not kUpdateSucceeded, the value of error_msg will be set - // to a string describing why there was a failure or the update was not - // attempted. error_msg must not be null. - ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg); - // Executes dex2oat using the current runtime configuration overridden with // the given arguments. This does not check to see if dex2oat is enabled in // the runtime configuration. @@ -377,6 +365,16 @@ class OatFileAssistant { bool file_released_ = false; }; + // Generate the oat file for the given info from the dex file using the + // current runtime compiler options. + // This does not check the current status before attempting to generate the + // oat file. + // + // If the result is not kUpdateSucceeded, the value of error_msg will be set + // to a string describing why there was a failure or the update was not + // attempted. error_msg must not be null. + ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg); + // Return info for the best oat file. OatFileInfo& GetBestInfo(); @@ -422,6 +420,9 @@ class OatFileAssistant { std::string dex_location_; + // Whether or not the parent directory of the dex file is writable. + bool dex_parent_writable_ = false; + // In a properly constructed OatFileAssistant object, isa_ should be either // the 32 or 64 bit variant for the current device. const InstructionSet isa_ = kNone; @@ -446,6 +447,8 @@ class OatFileAssistant { bool image_info_load_attempted_ = false; std::unique_ptr<ImageInfo> cached_image_info_; + friend class OatFileAssistantTest; + DISALLOW_COPY_AND_ASSIGN(OatFileAssistant); }; diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index 18924e9654..b2b86ee289 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -43,6 +43,38 @@ class OatFileAssistantNoDex2OatTest : public DexoptTest { } }; +class ScopedNonWritable { + public: + explicit ScopedNonWritable(const std::string& dex_location) { + is_valid_ = false; + size_t pos = dex_location.rfind('/'); + if (pos != std::string::npos) { + is_valid_ = true; + dex_parent_ = dex_location.substr(0, pos); + if (chmod(dex_parent_.c_str(), 0555) != 0) { + PLOG(ERROR) << "Could not change permissions on " << dex_parent_; + } + } + } + + bool IsSuccessful() { return is_valid_ && (access(dex_parent_.c_str(), W_OK) != 0); } + + ~ScopedNonWritable() { + if (is_valid_) { + if (chmod(dex_parent_.c_str(), 0777) != 0) { + PLOG(ERROR) << "Could not restore permissions on " << dex_parent_; + } + } + } + + private: + std::string dex_parent_; + bool is_valid_; +}; + +static bool IsExecutedAsRoot() { + return geteuid() == 0; +} // Case: We have a DEX file, but no OAT file for it. // Expect: The status is kDex2OatNeeded. @@ -87,13 +119,126 @@ TEST_F(OatFileAssistantTest, NoDexNoOat) { EXPECT_EQ(nullptr, oat_file.get()); } +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDate) { + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // For the use of oat location by making the dex parent not writable. + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. We load the dex +// file via a symlink. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDateSymLink) { + std::string scratch_dir = GetScratchDir(); + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // Now replace the dex location with a symlink. + std::string link = scratch_dir + "/link"; + ASSERT_EQ(0, symlink(scratch_dir.c_str(), link.c_str())); + dex_location = link + "/OdexUpToDate.jar"; + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + // Case: We have a DEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, OatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and up-to-date OAT file for it. We load the dex file +// via a symlink. +// Expect: The status is kNoDexOptNeeded. +TEST_F(OatFileAssistantTest, OatUpToDateSymLink) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + + std::string real = GetScratchDir() + "/real"; + ASSERT_EQ(0, mkdir(real.c_str(), 0700)); + std::string link = GetScratchDir() + "/link"; + ASSERT_EQ(0, symlink(real.c_str(), link.c_str())); + + std::string dex_location = real + "/OatUpToDate.jar"; + + Copy(GetDexSrc1(), dex_location); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + + // Update the dex location to point to the symlink. + dex_location = link + "/OatUpToDate.jar"; + + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, @@ -120,19 +265,16 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOdex) { } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOdex.jar"; - std::string oat_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; + std::string odex_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; Copy(GetDexSrc1(), dex_location); // Generating and deleting the oat file should have the side effect of // creating an up-to-date vdex file. - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat // file, we can't know that the vdex depends on the boot image and is up to @@ -169,6 +311,11 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { if (!kIsVdexEnabled) { return; } + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOat.jar"; std::string oat_location; @@ -180,6 +327,8 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); ASSERT_EQ(0, unlink(oat_location.c_str())); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat @@ -195,10 +344,19 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but // kDex2Oat if the profile has changed. TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, @@ -219,10 +377,19 @@ TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { // Case: We have a MultiDEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded and we load all dex files. TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar"; Copy(GetMultiDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -240,6 +407,12 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { // Case: We have a MultiDEX file where the non-main multdex entry is out of date. // Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexNonMainOutOfDate.jar"; // Compile code for GetMultiDexSrc1. @@ -250,6 +423,9 @@ TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { // is out of date. Copy(GetMultiDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -287,12 +463,12 @@ TEST_F(OatFileAssistantTest, StrippedMultiDexNonMainOutOfDate) { EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus()); } -// Case: We have a MultiDEX file and up-to-date OAT file for it with relative +// Case: We have a MultiDEX file and up-to-date ODEX file for it with relative // encoded dex locations. // Expect: The oat file status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar"; - std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat"; + std::string odex_location = GetOdexDir() + "/RelativeEncodedDexLocation.odex"; // Create the dex file Copy(GetMultiDexSrc1(), dex_location); @@ -301,16 +477,15 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::vector<std::string> args; args.push_back("--dex-file=" + dex_location); args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar")); - args.push_back("--oat-file=" + oat_location); + args.push_back("--oat-file=" + odex_location); args.push_back("--compiler-filter=speed"); std::string error_msg; ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg; // Verify we can load both dex files. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); + std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); ASSERT_TRUE(oat_file.get() != nullptr); EXPECT_TRUE(oat_file->IsExecutable()); @@ -322,6 +497,12 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { // Case: We have a DEX file and an OAT file out of date with respect to the // dex checksum. TEST_F(OatFileAssistantTest, OatDexOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar"; // We create a dex, generate an oat for it, then overwrite the dex with a @@ -330,6 +511,9 @@ TEST_F(OatFileAssistantTest, OatDexOutOfDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); Copy(GetDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -351,17 +535,14 @@ TEST_F(OatFileAssistantTest, VdexDexOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexDexOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; Copy(GetDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -376,17 +557,14 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexMultiDexNonMainOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.odex"; Copy(GetMultiDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetMultiDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -395,6 +573,12 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { // Case: We have a DEX file and an OAT file out of date with respect to the // boot image. TEST_F(OatFileAssistantTest, OatImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -404,6 +588,9 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -423,6 +610,12 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { // It shouldn't matter that the OAT file is out of date, because it is // verify-at-runtime. TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -432,6 +625,9 @@ TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); @@ -586,24 +782,23 @@ TEST_F(OatFileAssistantTest, ResourceOnlyDex) { TEST_F(OatFileAssistantTest, OdexOatOverlap) { std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar"; std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex"; - std::string oat_location = GetOdexDir() + "/OdexOatOverlap.oat"; - // Create the dex and odex files + // Create the dex, the odex and the oat files. Copy(GetDexSrc1(), dex_location); GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Create the oat file by copying the odex so they are located in the same - // place in memory. - Copy(odex_location, oat_location); + GenerateOatForTest(dex_location.c_str(), + CompilerFilter::kSpeed, + /*relocate*/false, + /*pic*/false, + /*with_alternate_image*/false); // Verify things don't go bad. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - // kDex2OatForRelocation is expected rather than -kDex2OatForRelocation - // based on the assumption that the oat location is more up-to-date than the odex + // -kDex2OatForRelocation is expected rather than kDex2OatForRelocation + // based on the assumption that the odex location is more up-to-date than the oat // location, even if they both need relocation. - EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation, + EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); @@ -621,30 +816,6 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file and a PIC ODEX file, but no OAT file. -// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. -TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { - std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar"; - std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex"; - - // Create the dex and odex files - Copy(GetDexSrc1(), dex_location); - GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Verify the status. - OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); - EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); - - EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); - EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); - EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); -} - // Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file. // Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code. TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { @@ -672,11 +843,20 @@ TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: We should load an executable dex file. TEST_F(OatFileAssistantTest, LoadOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -691,11 +871,20 @@ TEST_F(OatFileAssistantTest, LoadOatUpToDate) { // Case: We have a DEX file and up-to-date quicken OAT file for it. // Expect: We should still load the oat file as executable. TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kQuicken); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -710,9 +899,19 @@ TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: Loading non-executable should load the oat non-executable. TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); + + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); // Load the oat using an oat file assistant. @@ -726,70 +925,33 @@ TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file. -// Expect: We should load an executable dex file from an alternative oat -// location. -TEST_F(OatFileAssistantTest, LoadDexNoAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexNoAlternateOat.jar"; - std::string oat_location = GetScratchDir() + "/LoadDexNoAlternateOat.oat"; +// Case: We don't have a DEX file and can't write the oat file. +// Expect: We should fail to generate the oat file without crashing. +TEST_F(OatFileAssistantTest, GenNoDex) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + + std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - Copy(GetDexSrc1(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); std::string error_msg; Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); + // We should get kUpdateSucceeded from MakeUpToDate since there's nothing + // that can be done in this situation. ASSERT_EQ(OatFileAssistant::kUpdateSucceeded, - oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() != nullptr); - EXPECT_TRUE(oat_file->IsExecutable()); - std::vector<std::unique_ptr<const DexFile>> dex_files; - dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str()); - EXPECT_EQ(1u, dex_files.size()); - - EXPECT_TRUE(OS::FileExists(oat_location.c_str())); + oat_file_assistant.MakeUpToDate(false, &error_msg)); - // Verify it didn't create an oat in the default location. + // Verify it didn't create an oat in the default location (dalvik-cache). OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus()); -} - -// Case: We have a DEX file but can't write the oat file. -// Expect: We should fail to make the oat file up to date. -TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar"; - - // Make the oat location unwritable by inserting some non-existent - // intermediate directories. - std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat"; - - Copy(GetDexSrc1(), dex_location); - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.MakeUpToDate(false, &error_msg)); - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() == nullptr); -} - -// Case: We don't have a DEX file and can't write the oat file. -// Expect: We should fail to generate the oat file without crashing. -TEST_F(OatFileAssistantTest, GenNoDex) { - std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - std::string oat_location = GetScratchDir() + "/GenNoDex.oat"; - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.GenerateOatFile(&error_msg)); + // Verify it didn't create the odex file in the default location (../oat/isa/...odex) + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OdexFileStatus()); } // Turn an absolute path into a path relative to the current working @@ -1006,9 +1168,9 @@ TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) { Runtime::Current()->AddCompilerOption("--compiler-filter=quicken"); EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); - EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 932d5edbef..c1cf800e5d 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -615,9 +615,7 @@ std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat( Locks::mutator_lock_->AssertNotHeld(self); Runtime* const runtime = Runtime::Current(); - // TODO(calin): remove the explicit oat_location for OatFileAssistant OatFileAssistant oat_file_assistant(dex_location, - /*oat_location*/ nullptr, kRuntimeISA, !runtime->IsAotCompiler()); diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc index c3a94b93a0..4c00317d8e 100644 --- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc +++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc @@ -1078,9 +1078,156 @@ class JvmtiFunctions { jint* extension_count_ptr, jvmtiExtensionFunctionInfo** extensions) { ENSURE_VALID_ENV(env); - // We do not have any extension functions. - *extension_count_ptr = 0; - *extensions = nullptr; + ENSURE_NON_NULL(extension_count_ptr); + ENSURE_NON_NULL(extensions); + + std::vector<jvmtiExtensionFunctionInfo> ext_vector; + + // Holders for allocated values. + std::vector<JvmtiUniquePtr<char[]>> char_buffers; + std::vector<JvmtiUniquePtr<jvmtiParamInfo[]>> param_buffers; + std::vector<JvmtiUniquePtr<jvmtiError[]>> error_buffers; + + // Add a helper struct that takes an arbitrary const char*. add_extension will use Allocate + // appropriately. + struct CParamInfo { + const char* name; + jvmtiParamKind kind; + jvmtiParamTypes base_type; + jboolean null_ok; + }; + + auto add_extension = [&](jvmtiExtensionFunction func, + const char* id, + const char* short_description, + jint param_count, + const std::vector<CParamInfo>& params, + jint error_count, + const std::vector<jvmtiError>& errors) { + jvmtiExtensionFunctionInfo func_info; + jvmtiError error; + + func_info.func = func; + + JvmtiUniquePtr<char[]> id_ptr = CopyString(env, id, &error); + if (id_ptr == nullptr) { + return error; + } + func_info.id = id_ptr.get(); + char_buffers.push_back(std::move(id_ptr)); + + JvmtiUniquePtr<char[]> descr = CopyString(env, short_description, &error); + if (descr == nullptr) { + return error; + } + func_info.short_description = descr.get(); + char_buffers.push_back(std::move(descr)); + + func_info.param_count = param_count; + if (param_count > 0) { + JvmtiUniquePtr<jvmtiParamInfo[]> params_ptr = + AllocJvmtiUniquePtr<jvmtiParamInfo[]>(env, param_count, &error); + if (params_ptr == nullptr) { + return error; + } + func_info.params = params_ptr.get(); + param_buffers.push_back(std::move(params_ptr)); + + for (jint i = 0; i != param_count; ++i) { + JvmtiUniquePtr<char[]> param_name = CopyString(env, params[i].name, &error); + if (param_name == nullptr) { + return error; + } + func_info.params[i].name = param_name.get(); + char_buffers.push_back(std::move(param_name)); + + func_info.params[i].kind = params[i].kind; + func_info.params[i].base_type = params[i].base_type; + func_info.params[i].null_ok = params[i].null_ok; + } + } else { + func_info.params = nullptr; + } + + func_info.error_count = error_count; + if (error_count > 0) { + JvmtiUniquePtr<jvmtiError[]> errors_ptr = + AllocJvmtiUniquePtr<jvmtiError[]>(env, error_count, &error); + if (errors_ptr == nullptr) { + return error; + } + func_info.errors = errors_ptr.get(); + error_buffers.push_back(std::move(errors_ptr)); + + for (jint i = 0; i != error_count; ++i) { + func_info.errors[i] = errors[i]; + } + } else { + func_info.errors = nullptr; + } + + ext_vector.push_back(func_info); + + return ERR(NONE); + }; + + jvmtiError error; + + // Heap extensions. + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetObjectHeapId), + "com.android.art.heap.get_object_heap_id", + "Retrieve the heap id of the the object tagged with the given argument. An " + "arbitrary object is chosen if multiple objects exist with the same tag.", + 2, + { // NOLINT [whitespace/braces] [4] + { "tag", JVMTI_KIND_IN, JVMTI_TYPE_JLONG, false}, + { "heap_id", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false} + }, + 1, + { JVMTI_ERROR_NOT_FOUND }); + if (error != ERR(NONE)) { + return error; + } + + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetHeapName), + "com.android.art.heap.get_heap_name", + "Retrieve the name of the heap with the given id.", + 2, + { // NOLINT [whitespace/braces] [4] + { "heap_id", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false}, + { "heap_name", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_CCHAR, false} + }, + 1, + { JVMTI_ERROR_ILLEGAL_ARGUMENT }); + if (error != ERR(NONE)) { + return error; + } + + // Copy into output buffer. + + *extension_count_ptr = ext_vector.size(); + JvmtiUniquePtr<jvmtiExtensionFunctionInfo[]> out_data = + AllocJvmtiUniquePtr<jvmtiExtensionFunctionInfo[]>(env, ext_vector.size(), &error); + if (out_data == nullptr) { + return error; + } + memcpy(out_data.get(), + ext_vector.data(), + ext_vector.size() * sizeof(jvmtiExtensionFunctionInfo)); + *extensions = out_data.release(); + + // Release all the buffer holders, we're OK now. + for (auto& holder : char_buffers) { + holder.release(); + } + for (auto& holder : param_buffers) { + holder.release(); + } + for (auto& holder : error_buffers) { + holder.release(); + } return ERR(NONE); } diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h index f67fffccbb..64ab3e7b2e 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h +++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h @@ -384,6 +384,23 @@ jvmtiError JvmtiWeakTable<T>::GetTaggedObjects(jvmtiEnv* jvmti_env, return ERR(NONE); } +template <typename T> +art::mirror::Object* JvmtiWeakTable<T>::Find(T tag) { + art::Thread* self = art::Thread::Current(); + art::MutexLock mu(self, allow_disallow_lock_); + Wait(self); + + for (auto& pair : tagged_objects_) { + if (tag == pair.second) { + art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>(); + if (obj != nullptr) { + return obj; + } + } + } + return nullptr; +} + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_ diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h index eeea75aa9d..a6fd247c51 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table.h +++ b/runtime/openjdkjvmti/jvmti_weak_table.h @@ -116,6 +116,10 @@ class JvmtiWeakTable : public art::gc::SystemWeakHolder { void Unlock() RELEASE(allow_disallow_lock_); void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_); + art::mirror::Object* Find(T tag) + REQUIRES_SHARED(art::Locks::mutator_lock_) + REQUIRES(!allow_disallow_lock_); + protected: // Should HandleNullSweep be called when Sweep detects the release of an object? virtual bool DoesHandleNullOnSweep() { diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc index 7fc5104bce..9b4dcaa9d0 100644 --- a/runtime/openjdkjvmti/ti_heap.cc +++ b/runtime/openjdkjvmti/ti_heap.cc @@ -1400,4 +1400,95 @@ jvmtiError HeapUtil::ForceGarbageCollection(jvmtiEnv* env ATTRIBUTE_UNUSED) { return ERR(NONE); } + +static constexpr jint kHeapIdDefault = 0; +static constexpr jint kHeapIdImage = 1; +static constexpr jint kHeapIdZygote = 2; +static constexpr jint kHeapIdApp = 3; + +jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) { + if (heap_id == nullptr) { + return ERR(NULL_POINTER); + } + + art::Thread* self = art::Thread::Current(); + + auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) { + ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(); + art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag); + if (obj == nullptr) { + return ERR(NOT_FOUND); + } + + art::gc::Heap* const heap = art::Runtime::Current()->GetHeap(); + const art::gc::space::ContinuousSpace* const space = + heap->FindContinuousSpaceFromObject(obj, true); + jint heap_type = kHeapIdApp; + if (space != nullptr) { + if (space->IsZygoteSpace()) { + heap_type = kHeapIdZygote; + } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { + // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects + // as HPROF_HEAP_APP. b/35762934 + heap_type = kHeapIdImage; + } + } else { + const auto* los = heap->GetLargeObjectsSpace(); + if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(self, obj.Ptr())) { + heap_type = kHeapIdZygote; + } + } + *heap_id = heap_type; + return ERR(NONE); + }; + + if (!art::Locks::mutator_lock_->IsSharedHeld(self)) { + if (!self->IsThreadSuspensionAllowable()) { + return ERR(INTERNAL); + } + art::ScopedObjectAccess soa(self); + return work(); + } else { + // We cannot use SOA in this case. We might be holding the lock, but may not be in the + // runnable state (e.g., during GC). + art::Locks::mutator_lock_->AssertSharedHeld(self); + // TODO: Investigate why ASSERT_SHARED_CAPABILITY doesn't work. + auto annotalysis_workaround = [&]() NO_THREAD_SAFETY_ANALYSIS { + return work(); + }; + return annotalysis_workaround(); + } +} + +static jvmtiError CopyStringAndReturn(jvmtiEnv* env, const char* in, char** out) { + jvmtiError error; + JvmtiUniquePtr<char[]> param_name = CopyString(env, in, &error); + if (param_name == nullptr) { + return error; + } + *out = param_name.release(); + return ERR(NONE); +} + +static constexpr const char* kHeapIdDefaultName = "default"; +static constexpr const char* kHeapIdImageName = "image"; +static constexpr const char* kHeapIdZygoteName = "zygote"; +static constexpr const char* kHeapIdAppName = "app"; + +jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...) { + switch (heap_id) { + case kHeapIdDefault: + return CopyStringAndReturn(env, kHeapIdDefaultName, heap_name); + case kHeapIdImage: + return CopyStringAndReturn(env, kHeapIdImageName, heap_name); + case kHeapIdZygote: + return CopyStringAndReturn(env, kHeapIdZygoteName, heap_name); + case kHeapIdApp: + return CopyStringAndReturn(env, kHeapIdAppName, heap_name); + + default: + return ERR(ILLEGAL_ARGUMENT); + } +} + } // namespace openjdkjvmti diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h index dccecb4aa3..b4b71ba88e 100644 --- a/runtime/openjdkjvmti/ti_heap.h +++ b/runtime/openjdkjvmti/ti_heap.h @@ -56,6 +56,12 @@ class HeapUtil { ObjectTagTable* tags_; }; +class HeapExtensions { + public: + static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...); + static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...); +}; + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_ diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt index 702b247819..b128d1cb70 100644 --- a/test/913-heaps/expected.txt +++ b/test/913-heaps/expected.txt @@ -385,3 +385,10 @@ root@root --(thread)--> 1@1000 [size=16, length=-1] 5@1002 --(field@10)--> 1@1000 [size=16, length=-1] 5@1002 --(field@9)--> 6@1000 [size=16, length=-1] --- + +default +image +zygote +app + +3 diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc index e319f7d98c..f39c5f16d7 100644 --- a/test/913-heaps/heaps.cc +++ b/test/913-heaps/heaps.cc @@ -817,5 +817,192 @@ extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getGcFinishes(JNIEnv* env ATT return result; } +using GetObjectHeapId = jvmtiError(*)(jvmtiEnv*, jlong, jint*, ...); +static GetObjectHeapId gGetObjectHeapIdFn = nullptr; + +using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...); +static GetHeapName gGetHeapNameFn = nullptr; + +static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) { + for (size_t i = 0; i != static_cast<size_t>(count); ++i) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].short_description)); + for (size_t j = 0; j != static_cast<size_t>(extensions[i].param_count); ++j) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params[j].name)); + } + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].errors)); + } +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) { + jint extension_count; + jvmtiExtensionFunctionInfo* extensions; + jvmtiError result = jvmti_env->GetExtensionFunctions(&extension_count, &extensions); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return; + } + + for (size_t i = 0; i != static_cast<size_t>(extension_count); ++i) { + if (strcmp("com.android.art.heap.get_object_heap_id", extensions[i].id) == 0) { + CHECK(gGetObjectHeapIdFn == nullptr); + gGetObjectHeapIdFn = reinterpret_cast<GetObjectHeapId>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("tag", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JLONG); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_OUT); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_NOT_FOUND); + + continue; + } + + if (strcmp("com.android.art.heap.get_heap_name", extensions[i].id) == 0) { + CHECK(gGetHeapNameFn == nullptr); + gGetHeapNameFn = reinterpret_cast<GetHeapName>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_name", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_CCHAR); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_ALLOC_BUF); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT); + } + } + + CHECK(gGetObjectHeapIdFn != nullptr); + CHECK(gGetHeapNameFn != nullptr); + + FreeExtensionFunctionInfo(extensions, extension_count); +} + +extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getObjectHeapId( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) { + CHECK(gGetObjectHeapIdFn != nullptr); + jint heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, tag, &heap_id); + JvmtiErrorToException(env, jvmti_env, result); + return heap_id; +} + +extern "C" JNIEXPORT jstring JNICALL Java_art_Test913_getHeapName( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jint heap_id) { + CHECK(gGetHeapNameFn != nullptr); + char* heap_name; + jvmtiError result = gGetHeapNameFn(jvmti_env, heap_id, &heap_name); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return nullptr; + } + jstring ret = env->NewStringUTF(heap_name); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(heap_name)); + return ret; +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallback( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag, jint heap_id) { + CHECK(gGetObjectHeapIdFn != nullptr); + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL FollowReferencesCallback( + jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED, + const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED, + jlong class_tag ATTRIBUTE_UNUSED, + jlong referrer_class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jlong* referrer_tag_ptr ATTRIBUTE_UNUSED, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return JVMTI_VISIT_OBJECTS; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_reference_callback = GetObjectHeapIdCallbacks::FollowReferencesCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return 0; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_iteration_callback = GetObjectHeapIdCallbacks::HeapIterationCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } +} + } // namespace Test913Heaps } // namespace art diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java index 8800b1a4d7..6694aad868 100644 --- a/test/913-heaps/src/art/Test913.java +++ b/test/913-heaps/src/art/Test913.java @@ -16,6 +16,9 @@ package art; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -44,6 +47,8 @@ public class Test913 { }; t.start(); cdl1.await(); + + doExtensionTests(); } public static void runFollowReferences() throws Exception { @@ -215,6 +220,50 @@ public class Test913 { System.out.println(getTag(floatObject)); } + static ArrayList<Object> extensionTestHolder; + + private static void doExtensionTests() { + checkForExtensionApis(); + + extensionTestHolder = new ArrayList<>(); + System.out.println(); + + try { + getHeapName(-1); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + System.out.println(getHeapName(0)); + System.out.println(getHeapName(1)); + System.out.println(getHeapName(2)); + System.out.println(getHeapName(3)); + try { + getHeapName(4); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + + System.out.println(); + + setTag(Object.class, 100000); + int objectClassHeapId = getObjectHeapId(100000); + int objClassExpectedHeapId = hasImage() ? 1 : 3; + if (objectClassHeapId != objClassExpectedHeapId) { + throw new RuntimeException("Expected object class in heap " + objClassExpectedHeapId + + " but received " + objectClassHeapId); + } + + A a = new A(); + extensionTestHolder.add(a); + setTag(a, 100001); + System.out.println(getObjectHeapId(100001)); + + checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId); + checkGetObjectHeapIdInCallback(100001, 3); + + extensionTestHolder = null; + } + private static void runGc() { clearStats(); forceGarbageCollection(); @@ -233,6 +282,24 @@ public class Test913 { System.out.println((s > 0) + " " + (f > 0)); } + private static boolean hasImage() { + try { + int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName()); + BufferedReader reader = new BufferedReader(new FileReader("/proc/" + pid + "/maps")); + String line; + while ((line = reader.readLine()) != null) { + if (line.endsWith(".art")) { + reader.close(); + return true; + } + } + reader.close(); + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private static class TestConfig { private Class<?> klass = null; private int heapFilter = 0; @@ -642,6 +709,11 @@ public class Test913 { private static native int getGcFinishes(); private static native void forceGarbageCollection(); + private static native void checkForExtensionApis(); + private static native int getObjectHeapId(long tag); + private static native String getHeapName(int heapId); + private static native void checkGetObjectHeapIdInCallback(long tag, int heapId); + public static native String[] followReferences(int heapFilter, Class<?> klassFilter, Object initialObject, int stopAfter, int followSet, Object jniRef); public static native String[] followReferencesString(Object initialObject); |