diff options
36 files changed, 938 insertions, 1411 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7aaa7bf65e..15e3d274a5 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -93,16 +93,6 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; -// Some instructions have special requirements for a temporary, for example -// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require -// temp that's not an R0 (to avoid an extra move) and Baker read barrier field -// loads with large offsets need a fixed register to limit the number of link-time -// thunks we generate. For these and similar cases, we want to reserve a specific -// register that's neither callee-save nor an argument register. We choose x15. -inline Location FixedTempLocation() { - return Location::RegisterLocation(x15.GetCode()); -} - inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -609,459 +599,6 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { - protected: - ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(ref_.reg(), LR); - DCHECK_NE(ref_.reg(), WSP); - DCHECK_NE(ref_.reg(), WZR); - // IP0 is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_.reg(), IP0); - DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in W0): - // - // W0 <- ref - // W0 <- ReadBarrierMark(W0) - // ref <- W0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blr(XRegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`. - Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never -// tries to update `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - Register ref_reg = WRegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, 0u); - DCHECK_EQ(use_load_acquire_, false); - // The location of the offset of the marked reference field within `obj_`. - Location field_offset = index_; - DCHECK(field_offset.IsRegister()) << field_offset; - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - - // Save the old value of the reference before marking it. - // Note that we cannot use IP to save the old reference, as IP is - // used internally by the ReadBarrierMarkRegX entry point, and we - // need the old reference after the call to that entry point. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - __ Mov(temp_.W(), ref_reg); - - GenerateReadBarrierMarkRuntimeCall(codegen); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // LDXR/CMP/BNE sequence of instructions in the compare-and-set - // (CAS) operation below would abort the CAS, leaving the field - // as-is. - __ Cmp(temp_.W(), ref_reg); - __ B(eq, GetExitLabel()); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); - - // Convenience aliases. - Register base = obj_.W(); - Register offset = XRegisterFrom(field_offset); - Register expected = temp_.W(); - Register value = ref_reg; - Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireW(); // Value in memory. - - __ Add(tmp_ptr, base.X(), Operand(offset)); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - arm64_codegen->GetAssembler()->PoisonHeapReference(value); - } - } - - // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - - vixl::aarch64::Label loop_head, comparison_failed, exit_loop; - __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&comparison_failed, ne); - __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_value, &loop_head); - __ B(&exit_loop); - __ Bind(&comparison_failed); - __ Clrex(); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); - } - } - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - const Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. - const Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); -}; - // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -1447,9 +984,12 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { case BakerReadBarrierKind::kGcRoot: { DCHECK_GE(literal_offset, 4u); uint32_t prev_insn = GetInsn(literal_offset - 4u); - // LDR (immediate) with correct root_reg. const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); + // Usually LDR (immediate) with correct root_reg but + // we may have a "MOV marked, old_value" for UnsafeCASObject. + if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? + } break; } default: @@ -1634,8 +1174,24 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ if (value_can_be_null) { __ Cbz(value, &done); } + // Load the address of the card table into `card`. __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); if (value_can_be_null) { __ Bind(&done); @@ -4710,7 +4266,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( } void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { - ExactAssemblyScope guard(GetVIXLAssembler(), 1 * vixl::aarch64::kInstructionSize); + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. if (Runtime::Current()->UseJitCompilation()) { auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); vixl::aarch64::Label* slow_path_entry = &it->second.label; @@ -6256,7 +5812,7 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( __ bind(fixup_label); } static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 instruction (8B) before the return address label."); + "GC root LDR must be 2 instructions (8B) before the return address label."); __ ldr(root_reg, MemOperand(obj.X(), offset)); EmitBakerReadBarrierCbnz(custom_data); __ bind(&return_address); @@ -6286,11 +5842,29 @@ void CodeGeneratorARM64::GenerateGcRootFieldLoad( MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } +void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier( + vixl::aarch64::Register marked, + vixl::aarch64::Register old_value) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instructions (8B) before the return address label."); + __ mov(marked, old_value); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); +} + void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, - Register obj, - uint32_t offset, - Location maybe_temp, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); @@ -6317,27 +5891,16 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - Register base = obj; - if (use_load_acquire) { - DCHECK(maybe_temp.IsRegister()); - base = WRegisterFrom(maybe_temp); - __ Add(base, obj, offset); - offset = 0u; - } else if (offset >= kReferenceLoadMinFarOffset) { - DCHECK(maybe_temp.IsRegister()); - base = WRegisterFrom(maybe_temp); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - } + DCHECK(src.GetAddrMode() == vixl::aarch64::Offset); + DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>)); + UseScratchRegisterScope temps(GetVIXLAssembler()); DCHECK(temps.IsAvailable(ip0)); DCHECK(temps.IsAvailable(ip1)); temps.Exclude(ip0, ip1); uint32_t custom_data = use_load_acquire - ? EncodeBakerReadBarrierAcquireData(base.GetCode(), obj.GetCode()) - : EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode()); + ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode()) + : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode()); { ExactAssemblyScope guard(GetVIXLAssembler(), @@ -6350,10 +5913,10 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins " 2 instructions (8B) for heap poisoning."); Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); if (use_load_acquire) { - DCHECK_EQ(offset, 0u); - __ ldar(ref_reg, MemOperand(base.X())); + DCHECK_EQ(src.GetOffset(), 0); + __ ldar(ref_reg, src); } else { - __ ldr(ref_reg, MemOperand(base.X(), offset)); + __ ldr(ref_reg, src); } if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); @@ -6368,6 +5931,32 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); } +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location maybe_temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (use_load_acquire) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + __ Add(base, obj, offset); + offset = 0u; + } else if (offset >= kReferenceLoadMinFarOffset) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + MemOperand src(base.X(), offset); + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, src, needs_null_check, use_load_acquire); +} + void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref, Register obj, uint32_t data_offset, @@ -6435,198 +6024,6 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(Location ref, MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); } -void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - Location field_offset, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to update the reference field within `obj`. Then, in the - // slow path, check the gray bit in the lock word of the reference's - // holder (`obj`) to decide whether to mark `ref` and update the - // field or not. - // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - // } - - // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - /* offset */ 0u, - /* index */ field_offset, - /* scale_factor */ 0u /* "times 1" */, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: nothing to do (the field is - // up-to-date, and we don't need to load the reference). - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(obj.IsW()); - DataType::Type type = DataType::Type::kReference; - Register ref_reg = RegisterFrom(ref, type); - - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. - - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - if (use_load_acquire) { - // UnsafeGetObjectVolatile intrinsic case. - // Register `index` is not an index in an object array, but an - // offset to an object reference field within object `obj`. - DCHECK(instruction->IsInvoke()) << instruction->DebugName(); - DCHECK(instruction->GetLocations()->Intrinsified()); - DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) - << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); - DCHECK_EQ(needs_null_check, false); - // /* HeapReference<mirror::Object> */ ref = *(obj + index) - MemOperand field = HeapOperand(obj, XRegisterFrom(index)); - LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); - } else { - // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64FromLocation(index) << scale_factor); - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(obj, computed_offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireW(); - __ Add(temp, obj, offset); - { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } - } - } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - MemOperand field = HeapOperand(obj, offset); - if (use_load_acquire) { - // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. - LoadAcquire(instruction, ref_reg, field, needs_null_check); - } else { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, field); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } - - // Object* ref = ref_addr->AsMirrorPtr() - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); -} - void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { // The following condition is a compile-time one, so it does not have a run-time cost. if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 6a358a4306..4f6a44fe4d 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -92,6 +92,16 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers = ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), vixl::aarch64::lr); +// Some instructions have special requirements for a temporary, for example +// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require +// temp that's not an R0 (to avoid an extra move) and Baker read barrier field +// loads with large offsets need a fixed register to limit the number of link-time +// thunks we generate. For these and similar cases, we want to reserve a specific +// register that's neither callee-save nor an argument register. We choose x15. +inline Location FixedTempLocation() { + return Location::RegisterLocation(vixl::aarch64::x15.GetCode()); +} + // Callee-save registers AAPCS64, without x19 (Thread Register) (nor // x20 (Marking Register) when emitting Baker read barriers). const vixl::aarch64::CPURegList callee_saved_core_registers( @@ -661,6 +671,18 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t offset, vixl::aarch64::Label* fixup_label, ReadBarrierOption read_barrier_option); + // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier. + void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked, + vixl::aarch64::Register old_value); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, + bool needs_null_check, + bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -678,51 +700,6 @@ class CodeGeneratorARM64 : public CodeGenerator { Location index, vixl::aarch64::Register temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate code checking whether the the reference field at the - // address `obj + field_offset`, held by object `obj`, needs to be - // marked, and if so, marking it and updating the field within `obj` - // with the marked value. - // - // This routine is used for the implementation of the - // UnsafeCASObject intrinsic with Baker read barriers. - // - // This method has a structure similar to - // GenerateReferenceLoadWithBakerReadBarrier, but note that argument - // `ref` is only as a temporary here, and thus its value should not - // be used afterwards. - void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - Location field_offset, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire); // Emit code checking the status of the Marking Register, and // aborting the program if MR does not match the value stored in the diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 3e63c2674c..f62421645e 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -786,160 +786,9 @@ class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { // Slow path loading `obj`'s lock word, loading a reference from // object `*(obj + offset + (index << scale_factor))` into `ref`, and // marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP, as we may use it to emit the reference load (in the - // call to GenerateRawReferenceLoad below), and we need the lock - // word to still be in `temp_` after the reference load. - DCHECK(!temp_.Is(ip)); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`. - vixl32::Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker // read barrier). If needed, this slow path also atomically updates // the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never -// tries to update `obj.field`). +// after marking. // // This means that after the execution of this slow path, both `ref` // and `obj.field` will be up-to-date; i.e., after the flip, both will @@ -1006,7 +855,7 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL __ Bind(GetEntryLabel()); - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: + // The implementation is: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -6995,9 +6844,25 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } + // Load the address of the card table into `card`. GetAssembler()->LoadFromOffset( kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); if (can_be_null) { __ Bind(&is_null); @@ -8796,8 +8661,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, - uint32_t offset, - Location temp, + const vixl32::MemOperand& src, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8823,23 +8687,15 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // HeapReference<mirror::Object> reference = *(obj+offset); // gray_return_address: - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + DCHECK(src.GetAddrMode() == vixl32::Offset); + DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>)); vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); - vixl32::Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - base = RegisterFrom(temp); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large - // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely - // increase the overall code size when taking the generated thunks into account. - DCHECK(!narrow); - } + bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate()); + UseScratchRegisterScope temps(GetVIXLAssembler()); temps.Exclude(ip); - uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow); + uint32_t custom_data = + EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow); { vixl::EmissionCheckScope guard( @@ -8850,7 +8706,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i __ cmp(mr, Operand(0)); EmitBakerReadBarrierBne(custom_data); ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } @@ -8871,6 +8727,24 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); } +void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl32::Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, MemOperand(base, offset), needs_null_check); +} + void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl32::Register obj, uint32_t data_offset, @@ -8938,53 +8812,6 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, MaybeGenerateMarkingRegisterCheck(/* code */ 21, /* temp_loc */ LocationFrom(ip)); } -void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - vixl32::Register temp_reg = RegisterFrom(temp); - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); - AddSlowPath(slow_path); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 22); -} - void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 0106236b17..2fd18cab47 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -624,6 +624,14 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ReadBarrierOption read_barrier_option); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + const vixl::aarch32::MemOperand& src, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch32::Register obj, @@ -638,20 +646,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Location index, Location temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check); // Generate code checking whether the the reference field at the // address `obj + field_offset`, held by object `obj`, needs to be diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 0ed5756b53..476e8ab944 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1868,12 +1868,27 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, if (value_can_be_null) { __ Beqz(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kMipsPointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Srl(temp, object, gc::accounting::CardTable::kCardShift); __ Addu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 2b6928eee2..c05f62722c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1490,12 +1490,27 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, if (value_can_be_null) { __ Beqzc(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadDoubleword, card, TR, Thread::CardTableOffset<kMips64PointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift); __ Daddu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a835aed6b0..63bd8413eb 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -5104,9 +5104,25 @@ void CodeGeneratorX86::MarkGCCard(Register temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movl(temp, object); __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); if (value_can_be_null) { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dee891b8de..0bd7319677 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -5436,10 +5436,26 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true)); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movq(temp, object); __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); if (value_can_be_null) { __ Bind(&is_null); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4b2bcc8ca8..74d4a8f63b 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -745,15 +745,15 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); - codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, - temp, - /* needs_null_check */ false, - is_volatile); + MacroAssembler* masm = codegen->GetVIXLAssembler(); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits. + codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + MemOperand(temp.X()), + /* needs_null_check */ false, + is_volatile); } else { // Other cases. MemOperand mem_op(base.X(), offset); @@ -782,9 +782,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* in kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + // We need a temporary register for the read barrier load in order to use + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). + locations->AddTemp(FixedTempLocation()); } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -984,106 +984,155 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. Likewise when - // emitting a (Baker) read barrier, which may call. - Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // Temporary register for (Baker) read barrier. + // We need two non-scratch temporary registers for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } +class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke) + : SlowPathCodeARM64(invoke) {} + + const char* GetDescription() const OVERRIDE { return "BakerReadBarrierCasSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + Arm64Assembler* assembler = arm64_codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); + __ Bind(GetEntryLabel()); + + // Get the locations. + LocationSummary* locations = instruction_->GetLocations(); + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = WRegisterFrom(locations->InAt(3)); // Expected. + Register value = WRegisterFrom(locations->InAt(4)); // Value. + + Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. + Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value. + + // Mark the `old_value` from the main path and compare with `expected`. This clobbers the + // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary. + arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value); + __ Cmp(marked, expected); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + + // The `old_value` we have read did not match `expected` (which is always a to-space reference) + // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked + // to-space value matched, so the `old_value` must be a from-space reference to the same + // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked + // old value representing the to-space and from-space references for the same object. + + UseScratchRegisterScope temps(masm); + Register tmp_ptr = temps.AcquireX(); + Register tmp = temps.AcquireSameSizeAs(value); + + // Recalculate the `tmp_ptr` clobbered above. + __ Add(tmp_ptr, base.X(), Operand(offset)); + + // do { + // tmp_value = [tmp_ptr]; + // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value)); + // result = (tmp_value == expected || tmp == old_value); + + vixl::aarch64::Label loop_head; + __ Bind(&loop_head); + __ Ldaxr(tmp, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(tmp); + __ Cmp(tmp, expected); + __ Ccmp(tmp, old_value, ZFlag, ne); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + assembler->MaybePoisonHeapReference(value); + __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(value); + __ Cbnz(tmp.W(), &loop_head); + + // Z=true from the above CMP+CCMP indicates success. + __ B(GetExitLabel()); + } +}; + static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { - MacroAssembler* masm = codegen->GetVIXLAssembler(); + Arm64Assembler* assembler = codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); - Register out = WRegisterFrom(out_loc); // Boolean result. - - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Location offset_loc = locations->InAt(2); - Register offset = XRegisterFrom(offset_loc); // Long offset. - Register expected = RegisterFrom(locations->InAt(3), type); // Expected. - Register value = RegisterFrom(locations->InAt(4), type); // Value. + Register out = WRegisterFrom(locations->Out()); // Boolean result. + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = RegisterFrom(locations->InAt(3), type); // Expected. + Register value = RegisterFrom(locations->InAt(4), type); // Value. // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. if (type == DataType::Type::kReference) { // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); - - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Register temp = WRegisterFrom(locations->GetTemp(0)); - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->UpdateReferenceFieldWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* field_offset */ offset_loc, - temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); - } } UseScratchRegisterScope temps(masm); Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory. + Register old_value; // Value in memory. - Register tmp_32 = tmp_value.W(); + vixl::aarch64::Label exit_loop_label; + vixl::aarch64::Label* exit_loop = &exit_loop_label; + vixl::aarch64::Label* failure = &exit_loop_label; - __ Add(tmp_ptr, base.X(), Operand(offset)); + if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(kUseBakerReadBarrier); - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - codegen->GetAssembler()->PoisonHeapReference(value); - } + BakerReadBarrierCasSlowPathARM64* slow_path = + new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + exit_loop = slow_path->GetExitLabel(); + failure = slow_path->GetEntryLabel(); + // We need to store the `old_value` in a non-scratch register to make sure + // the Baker read barrier in the slow path does not clobber it. + old_value = WRegisterFrom(locations->GetTemp(0)); + } else { + old_value = temps.AcquireSameSizeAs(value); } + __ Add(tmp_ptr, base.X(), Operand(offset)); + // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - // result = tmp_value != 0; + // tmp_value = [tmp_ptr]; + // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value)); + // result = tmp_value == expected; - vixl::aarch64::Label loop_head, exit_loop; + vixl::aarch64::Label loop_head; __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&exit_loop, ne); - __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_32, &loop_head); - __ Bind(&exit_loop); - __ Cset(out, eq); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - codegen->GetAssembler()->UnpoisonHeapReference(value); - } + __ Ldaxr(old_value, MemOperand(tmp_ptr)); + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(old_value); } + __ Cmp(old_value, expected); + __ B(failure, ne); + if (type == DataType::Type::kReference) { + assembler->MaybePoisonHeapReference(value); + } + __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result. + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(value); + } + __ Cbnz(old_value.W(), &loop_head); + __ Bind(exit_loop); + __ Cset(out, eq); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 2963308da8..b92075053e 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -638,8 +638,11 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(RegisterFrom(temp), base, Operand(offset)); + MemOperand src(RegisterFrom(temp), 0); + codegen->GenerateFieldLoadWithBakerReadBarrier( + invoke, trg_loc, base, src, /* needs_null_check */ false); if (is_volatile) { __ Dmb(vixl32::ISH); } diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 3918b65a62..60ca61c133 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -296,10 +296,10 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream)); BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer); - EncodeVarintBits(out, packed_frame_size_); - EncodeVarintBits(out, core_spill_mask_); - EncodeVarintBits(out, fp_spill_mask_); - EncodeVarintBits(out, num_dex_registers_); + out.WriteVarint(packed_frame_size_); + out.WriteVarint(core_spill_mask_); + out.WriteVarint(fp_spill_mask_); + out.WriteVarint(num_dex_registers_); EncodeTable(out, stack_maps_); EncodeTable(out, register_masks_); EncodeTable(out, stack_masks_); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index df11709f03..01c6bf9e0e 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -109,7 +109,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; BitmapTableBuilder dex_register_masks_; - BitTableBuilder<MaskInfo> dex_register_maps_; + BitTableBuilder<DexRegisterMapInfo> dex_register_maps_; BitTableBuilder<DexRegisterInfo> dex_register_catalog_; ScopedArenaVector<BitVector*> lazy_stack_masks_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index a281bb30f4..d28f09fbba 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -750,9 +750,9 @@ TEST(StackMapTest, TestDedupeBitTables) { ScopedArenaVector<uint8_t> memory = stream.Encode(); std::vector<uint8_t> out; - CodeInfo::DedupeMap dedupe_map; - size_t deduped1 = CodeInfo::Dedupe(&out, memory.data(), &dedupe_map); - size_t deduped2 = CodeInfo::Dedupe(&out, memory.data(), &dedupe_map); + CodeInfo::Deduper deduper(&out); + size_t deduped1 = deduper.Dedupe(memory.data()); + size_t deduped2 = deduper.Dedupe(memory.data()); for (size_t deduped : { deduped1, deduped2 }) { CodeInfo code_info(out.data() + deduped); diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc index 28942dad95..8bac7206c6 100644 --- a/dex2oat/linker/oat_writer.cc +++ b/dex2oat/linker/oat_writer.cc @@ -1438,10 +1438,10 @@ class OatWriter::LayoutReserveOffsetCodeMethodVisitor : public OrderedMethodVisi class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { public: - static constexpr bool kDebugVerifyDedupedCodeInfo = false; - InitMapMethodVisitor(OatWriter* writer, size_t offset) - : OatDexMethodVisitor(writer, offset) {} + : OatDexMethodVisitor(writer, offset), + dedupe_bit_table_(&writer_->code_info_data_) { + } bool VisitMethod(size_t class_def_method_index, const ClassAccessor::Method& method ATTRIBUTE_UNUSED) @@ -1455,21 +1455,9 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { ArrayRef<const uint8_t> map = compiled_method->GetVmapTable(); if (map.size() != 0u) { - // Deduplicate the inner bittables within the CodeInfo. - std::vector<uint8_t>* data = &writer_->code_info_data_; size_t offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() { - size_t deduped_offset = CodeInfo::Dedupe(data, map.data(), &dedupe_bit_table_); - if (kDebugVerifyDedupedCodeInfo) { - InstructionSet isa = writer_->GetCompilerOptions().GetInstructionSet(); - std::stringstream old_code_info; - VariableIndentationOutputStream old_vios(&old_code_info); - std::stringstream new_code_info; - VariableIndentationOutputStream new_vios(&new_code_info); - CodeInfo(map.data()).Dump(&old_vios, 0, true, isa); - CodeInfo(data->data() + deduped_offset).Dump(&new_vios, 0, true, isa); - DCHECK_EQ(old_code_info.str(), new_code_info.str()); - } - return offset_ + deduped_offset; + // Deduplicate the inner BitTable<>s within the CodeInfo. + return offset_ + dedupe_bit_table_.Dedupe(map.data()); }); // Code offset is not initialized yet, so set the map offset to 0u-offset. DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u); @@ -1487,8 +1475,8 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { // The compiler already deduplicated the pointers but it did not dedupe the tables. SafeMap<const uint8_t*, size_t> dedupe_code_info_; - // Deduplicate at BitTable level. The value is bit offset within code_info_data_. - std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> dedupe_bit_table_; + // Deduplicate at BitTable level. + CodeInfo::Deduper dedupe_bit_table_; }; class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { @@ -2082,6 +2070,7 @@ size_t OatWriter::InitOatMaps(size_t offset) { InitMapMethodVisitor visitor(this, offset); bool success = VisitDexMethods(&visitor); DCHECK(success); + code_info_data_.shrink_to_fit(); offset += code_info_data_.size(); } return offset; diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h index 7d8de399b9..5668b6cd79 100644 --- a/libartbase/base/bit_memory_region.h +++ b/libartbase/base/bit_memory_region.h @@ -29,47 +29,43 @@ namespace art { class BitMemoryRegion FINAL : public ValueObject { public: struct Less { - constexpr bool operator()(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) const { - if (lhs.size_in_bits() != rhs.size_in_bits()) { - return lhs.size_in_bits() < rhs.size_in_bits(); - } - size_t bit = 0; - constexpr size_t kNumBits = BitSizeOf<uint32_t>(); - for (; bit + kNumBits <= lhs.size_in_bits(); bit += kNumBits) { - uint32_t lhs_bits = lhs.LoadBits(bit, kNumBits); - uint32_t rhs_bits = rhs.LoadBits(bit, kNumBits); - if (lhs_bits != rhs_bits) { - return lhs_bits < rhs_bits; - } - } - size_t num_bits = lhs.size_in_bits() - bit; - return lhs.LoadBits(bit, num_bits) < rhs.LoadBits(bit, num_bits); + bool operator()(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) const { + return Compare(lhs, rhs) < 0; } }; BitMemoryRegion() = default; - ALWAYS_INLINE BitMemoryRegion(void* data, size_t bit_start, size_t bit_size) - : data_(reinterpret_cast<uintptr_t*>(AlignDown(data, sizeof(uintptr_t)))), - bit_start_(bit_start + 8 * (reinterpret_cast<uintptr_t>(data) % sizeof(uintptr_t))), - bit_size_(bit_size) { + ALWAYS_INLINE BitMemoryRegion(uint8_t* data, ssize_t bit_start, size_t bit_size) { + // Normalize the data pointer. Note that bit_start may be negative. + uint8_t* aligned_data = AlignDown(data + (bit_start >> kBitsPerByteLog2), sizeof(uintptr_t)); + data_ = reinterpret_cast<uintptr_t*>(aligned_data); + bit_start_ = bit_start + kBitsPerByte * (data - aligned_data); + bit_size_ = bit_size; + DCHECK_LT(bit_start_, static_cast<size_t>(kBitsPerIntPtrT)); } ALWAYS_INLINE explicit BitMemoryRegion(MemoryRegion region) : BitMemoryRegion(region.begin(), /* bit_start */ 0, region.size_in_bits()) { } ALWAYS_INLINE BitMemoryRegion(MemoryRegion region, size_t bit_offset, size_t bit_length) : BitMemoryRegion(region) { - DCHECK_LE(bit_offset, bit_size_); - DCHECK_LE(bit_length, bit_size_ - bit_offset); - bit_start_ += bit_offset; - bit_size_ = bit_length; + *this = Subregion(bit_offset, bit_length); } ALWAYS_INLINE bool IsValid() const { return data_ != nullptr; } + const uint8_t* data() const { + DCHECK_ALIGNED(bit_start_, kBitsPerByte); + return reinterpret_cast<const uint8_t*>(data_) + bit_start_ / kBitsPerByte; + } + size_t size_in_bits() const { return bit_size_; } + void Resize(size_t bit_size) { + bit_size_ = bit_size; + } + ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset, size_t bit_length) const { DCHECK_LE(bit_offset, bit_size_); DCHECK_LE(bit_length, bit_size_ - bit_offset); @@ -79,12 +75,11 @@ class BitMemoryRegion FINAL : public ValueObject { return result; } - // Increase the size of the region and return the newly added range (starting at the old end). - ALWAYS_INLINE BitMemoryRegion Extend(size_t bit_length) { + ALWAYS_INLINE BitMemoryRegion Subregion(size_t bit_offset) const { + DCHECK_LE(bit_offset, bit_size_); BitMemoryRegion result = *this; - result.bit_start_ += result.bit_size_; - result.bit_size_ = bit_length; - bit_size_ += bit_length; + result.bit_start_ += bit_offset; + result.bit_size_ -= bit_offset; return result; } @@ -183,10 +178,26 @@ class BitMemoryRegion FINAL : public ValueObject { return count; } - ALWAYS_INLINE bool Equals(const BitMemoryRegion& other) const { - return data_ == other.data_ && - bit_start_ == other.bit_start_ && - bit_size_ == other.bit_size_; + static int Compare(const BitMemoryRegion& lhs, const BitMemoryRegion& rhs) { + if (lhs.size_in_bits() != rhs.size_in_bits()) { + return (lhs.size_in_bits() < rhs.size_in_bits()) ? -1 : 1; + } + size_t bit = 0; + constexpr size_t kNumBits = BitSizeOf<uint32_t>(); + for (; bit + kNumBits <= lhs.size_in_bits(); bit += kNumBits) { + uint32_t lhs_bits = lhs.LoadBits(bit, kNumBits); + uint32_t rhs_bits = rhs.LoadBits(bit, kNumBits); + if (lhs_bits != rhs_bits) { + return (lhs_bits < rhs_bits) ? -1 : 1; + } + } + size_t num_bits = lhs.size_in_bits() - bit; + uint32_t lhs_bits = lhs.LoadBits(bit, num_bits); + uint32_t rhs_bits = rhs.LoadBits(bit, num_bits); + if (lhs_bits != rhs_bits) { + return (lhs_bits < rhs_bits) ? -1 : 1; + } + return 0; } private: @@ -196,30 +207,49 @@ class BitMemoryRegion FINAL : public ValueObject { size_t bit_size_ = 0; }; +constexpr uint32_t kVarintHeaderBits = 4; +constexpr uint32_t kVarintSmallValue = 11; // Maximum value which is stored as-is. + class BitMemoryReader { public: - explicit BitMemoryReader(const uint8_t* data, size_t bit_offset = 0) - : finished_region_(const_cast<uint8_t*>(data), /* bit_start */ 0, bit_offset) { - DCHECK_EQ(GetBitOffset(), bit_offset); + BitMemoryReader(BitMemoryReader&&) = default; + explicit BitMemoryReader(BitMemoryRegion data) + : finished_region_(data.Subregion(0, 0) /* set the length to zero */ ) { + } + explicit BitMemoryReader(const uint8_t* data, ssize_t bit_offset = 0) + : finished_region_(const_cast<uint8_t*>(data), bit_offset, /* bit_length */ 0) { } - size_t GetBitOffset() const { return finished_region_.size_in_bits(); } + const uint8_t* data() const { return finished_region_.data(); } - ALWAYS_INLINE BitMemoryRegion Skip(size_t bit_length) { - return finished_region_.Extend(bit_length); - } + BitMemoryRegion GetReadRegion() const { return finished_region_; } + + size_t NumberOfReadBits() const { return finished_region_.size_in_bits(); } - // Get the most recently read bits. - ALWAYS_INLINE BitMemoryRegion Tail(size_t bit_length) { - return finished_region_.Subregion(finished_region_.size_in_bits() - bit_length, bit_length); + ALWAYS_INLINE BitMemoryRegion ReadRegion(size_t bit_length) { + size_t bit_offset = finished_region_.size_in_bits(); + finished_region_.Resize(bit_offset + bit_length); + return finished_region_.Subregion(bit_offset, bit_length); } ALWAYS_INLINE uint32_t ReadBits(size_t bit_length) { - return finished_region_.Extend(bit_length).LoadBits(0, bit_length); + return ReadRegion(bit_length).LoadBits(/* bit_offset */ 0, bit_length); } ALWAYS_INLINE bool ReadBit() { - return finished_region_.Extend(1).LoadBit(0); + return ReadRegion(/* bit_length */ 1).LoadBit(/* bit_offset */ 0); + } + + // Read variable-length bit-packed integer. + // The first four bits determine the variable length of the encoded integer: + // Values 0..11 represent the result as-is, with no further following bits. + // Values 12..15 mean the result is in the next 8/16/24/32-bits respectively. + ALWAYS_INLINE uint32_t ReadVarint() { + uint32_t x = ReadBits(kVarintHeaderBits); + if (x > kVarintSmallValue) { + x = ReadBits((x - kVarintSmallValue) * kBitsPerByte); + } + return x; } private: @@ -234,36 +264,58 @@ template<typename Vector> class BitMemoryWriter { public: explicit BitMemoryWriter(Vector* out, size_t bit_offset = 0) - : out_(out), bit_offset_(bit_offset) { - DCHECK_EQ(GetBitOffset(), bit_offset); + : out_(out), bit_start_(bit_offset), bit_offset_(bit_offset) { + DCHECK_EQ(NumberOfWrittenBits(), 0u); + } + + BitMemoryRegion GetWrittenRegion() const { + return BitMemoryRegion(out_->data(), bit_start_, bit_offset_ - bit_start_); } const uint8_t* data() const { return out_->data(); } - size_t GetBitOffset() const { return bit_offset_; } + size_t NumberOfWrittenBits() const { return bit_offset_ - bit_start_; } ALWAYS_INLINE BitMemoryRegion Allocate(size_t bit_length) { out_->resize(BitsToBytesRoundUp(bit_offset_ + bit_length)); - BitMemoryRegion region(MemoryRegion(out_->data(), out_->size()), bit_offset_, bit_length); + BitMemoryRegion region(out_->data(), bit_offset_, bit_length); DCHECK_LE(bit_length, std::numeric_limits<size_t>::max() - bit_offset_) << "Overflow"; bit_offset_ += bit_length; return region; } + ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) { + Allocate(region.size_in_bits()).StoreBits(/* bit_offset */ 0, region, region.size_in_bits()); + } + ALWAYS_INLINE void WriteBits(uint32_t value, size_t bit_length) { - Allocate(bit_length).StoreBits(0, value, bit_length); + Allocate(bit_length).StoreBits(/* bit_offset */ 0, value, bit_length); } ALWAYS_INLINE void WriteBit(bool value) { - Allocate(1).StoreBit(0, value); + Allocate(1).StoreBit(/* bit_offset */ 0, value); } - ALWAYS_INLINE void WriteRegion(const BitMemoryRegion& region) { - Allocate(region.size_in_bits()).StoreBits(0, region, region.size_in_bits()); + // Write variable-length bit-packed integer. + ALWAYS_INLINE void WriteVarint(uint32_t value) { + if (value <= kVarintSmallValue) { + WriteBits(value, kVarintHeaderBits); + } else { + uint32_t num_bits = RoundUp(MinimumBitsToStore(value), kBitsPerByte); + uint32_t header = kVarintSmallValue + num_bits / kBitsPerByte; + WriteBits(header, kVarintHeaderBits); + WriteBits(value, num_bits); + } + } + + ALWAYS_INLINE void ByteAlign() { + size_t end = bit_start_ + bit_offset_; + bit_offset_ += RoundUp(end, kBitsPerByte) - end; } private: Vector* out_; + size_t bit_start_; size_t bit_offset_; DISALLOW_COPY_AND_ASSIGN(BitMemoryWriter); diff --git a/libartbase/base/bit_memory_region_test.cc b/libartbase/base/bit_memory_region_test.cc index b7546985a9..02623bf040 100644 --- a/libartbase/base/bit_memory_region_test.cc +++ b/libartbase/base/bit_memory_region_test.cc @@ -33,6 +33,24 @@ static void CheckBits(uint8_t* data, } } +TEST(BitMemoryRegion, TestVarint) { + for (size_t start_bit_offset = 0; start_bit_offset <= 32; start_bit_offset++) { + uint32_t values[] = { 0, 1, 11, 12, 15, 16, 255, 256, 1u << 16, 1u << 24, ~1u, ~0u }; + for (uint32_t value : values) { + std::vector<uint8_t> buffer; + BitMemoryWriter<std::vector<uint8_t>> writer(&buffer, start_bit_offset); + writer.WriteVarint(value); + + BitMemoryReader reader(buffer.data(), start_bit_offset); + uint32_t result = reader.ReadVarint(); + uint32_t upper_bound = RoundUp(MinimumBitsToStore(value), kBitsPerByte) + kVarintHeaderBits; + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); + EXPECT_EQ(value, result); + EXPECT_GE(upper_bound, writer.NumberOfWrittenBits()); + } + } +} + TEST(BitMemoryRegion, TestBit) { uint8_t data[sizeof(uint32_t) * 2]; for (size_t bit_offset = 0; bit_offset < 2 * sizeof(uint32_t) * kBitsPerByte; ++bit_offset) { diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h index 1c7614b695..54e88618cb 100644 --- a/libartbase/base/bit_table.h +++ b/libartbase/base/bit_table.h @@ -33,34 +33,6 @@ namespace art { -constexpr uint32_t kVarintHeaderBits = 4; -constexpr uint32_t kVarintSmallValue = 11; // Maximum value which is stored as-is. - -// Load variable-length bit-packed integer from `data` starting at `bit_offset`. -// The first four bits determine the variable length of the encoded integer: -// Values 0..11 represent the result as-is, with no further following bits. -// Values 12..15 mean the result is in the next 8/16/24/32-bits respectively. -ALWAYS_INLINE static inline uint32_t DecodeVarintBits(BitMemoryReader& reader) { - uint32_t x = reader.ReadBits(kVarintHeaderBits); - if (x > kVarintSmallValue) { - x = reader.ReadBits((x - kVarintSmallValue) * kBitsPerByte); - } - return x; -} - -// Store variable-length bit-packed integer from `data` starting at `bit_offset`. -template<typename Vector> -ALWAYS_INLINE static inline void EncodeVarintBits(BitMemoryWriter<Vector>& out, uint32_t value) { - if (value <= kVarintSmallValue) { - out.WriteBits(value, kVarintHeaderBits); - } else { - uint32_t num_bits = RoundUp(MinimumBitsToStore(value), kBitsPerByte); - uint32_t header = kVarintSmallValue + num_bits / kBitsPerByte; - out.WriteBits(header, kVarintHeaderBits); - out.WriteBits(value, num_bits); - } -} - // Generic purpose table of uint32_t values, which are tightly packed at bit level. // It has its own header with the number of rows and the bit-widths of all columns. // The values are accessible by (row, column). The value -1 is stored efficiently. @@ -77,23 +49,21 @@ class BitTableBase { ALWAYS_INLINE void Decode(BitMemoryReader& reader) { // Decode row count and column sizes from the table header. - size_t initial_bit_offset = reader.GetBitOffset(); - num_rows_ = DecodeVarintBits(reader); + num_rows_ = reader.ReadVarint(); if (num_rows_ != 0) { column_offset_[0] = 0; for (uint32_t i = 0; i < kNumColumns; i++) { - size_t column_end = column_offset_[i] + DecodeVarintBits(reader); + size_t column_end = column_offset_[i] + reader.ReadVarint(); column_offset_[i + 1] = dchecked_integral_cast<uint16_t>(column_end); } } - header_bit_size_ = reader.GetBitOffset() - initial_bit_offset; // Record the region which contains the table data and skip past it. - table_data_ = reader.Skip(num_rows_ * NumRowBits()); + table_data_ = reader.ReadRegion(num_rows_ * NumRowBits()); } ALWAYS_INLINE uint32_t Get(uint32_t row, uint32_t column = 0) const { - DCHECK_NE(header_bit_size_, 0u) << "Table has not been loaded"; + DCHECK(table_data_.IsValid()) << "Table has not been loaded"; DCHECK_LT(row, num_rows_); DCHECK_LT(column, kNumColumns); size_t offset = row * NumRowBits() + column_offset_[column]; @@ -101,7 +71,7 @@ class BitTableBase { } ALWAYS_INLINE BitMemoryRegion GetBitMemoryRegion(uint32_t row, uint32_t column = 0) const { - DCHECK_NE(header_bit_size_, 0u) << "Table has not been loaded"; + DCHECK(table_data_.IsValid()) << "Table has not been loaded"; DCHECK_LT(row, num_rows_); DCHECK_LT(column, kNumColumns); size_t offset = row * NumRowBits() + column_offset_[column]; @@ -118,16 +88,20 @@ class BitTableBase { return column_offset_[column + 1] - column_offset_[column]; } - size_t HeaderBitSize() const { return header_bit_size_; } + size_t DataBitSize() const { return table_data_.size_in_bits(); } - size_t BitSize() const { return header_bit_size_ + table_data_.size_in_bits(); } + bool Equals(const BitTableBase& other) const { + return num_rows_ == other.num_rows_ && + std::equal(column_offset_, column_offset_ + kNumColumns, other.column_offset_) && + BitMemoryRegion::Compare(table_data_, other.table_data_) == 0; + } protected: BitMemoryRegion table_data_; size_t num_rows_ = 0; - uint16_t column_offset_[kNumColumns + 1] = {}; - uint16_t header_bit_size_ = 0; + + DISALLOW_COPY_AND_ASSIGN(BitTableBase); }; // Helper class which can be used to create BitTable accessors with named getters. @@ -151,9 +125,10 @@ class BitTableAccessor { } // Helper macro to create constructors and per-table utilities in derived class. -#define BIT_TABLE_HEADER() \ +#define BIT_TABLE_HEADER(NAME) \ using BitTableAccessor<kNumColumns>::BitTableAccessor; /* inherit constructors */ \ template<int COLUMN, int UNUSED /*needed to compile*/> struct ColumnName; \ + static constexpr const char* kTableName = #NAME; \ // Helper macro to create named column accessors in derived class. #define BIT_TABLE_COLUMN(COLUMN, NAME) \ @@ -176,12 +151,6 @@ static const char* const* GetBitTableColumnNamesImpl(std::index_sequence<Columns return names; } -// Returns the names of all columns in the given accessor. -template<typename Accessor> -static const char* const* GetBitTableColumnNames() { - return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<Accessor::kNumColumns>()); -} - // Wrapper which makes it easier to use named accessors for the individual rows. template<typename Accessor> class BitTable : public BitTableBase<Accessor::kNumColumns> { @@ -239,6 +208,14 @@ class BitTable : public BitTableBase<Accessor::kNumColumns> { ALWAYS_INLINE Accessor GetInvalidRow() const { return Accessor(this, static_cast<uint32_t>(-1)); } + + const char* GetName() const { + return Accessor::kTableName; + } + + const char* const* GetColumnNames() const { + return GetBitTableColumnNamesImpl<Accessor>(std::make_index_sequence<Accessor::kNumColumns>()); + } }; template<typename Accessor> @@ -376,15 +353,15 @@ class BitTableBuilderBase { // Encode the stored data into a BitTable. template<typename Vector> void Encode(BitMemoryWriter<Vector>& out) const { - size_t initial_bit_offset = out.GetBitOffset(); + size_t initial_bit_offset = out.NumberOfWrittenBits(); std::array<uint32_t, kNumColumns> column_bits; Measure(&column_bits); - EncodeVarintBits(out, size()); + out.WriteVarint(size()); if (size() != 0) { // Write table header. for (uint32_t c = 0; c < kNumColumns; c++) { - EncodeVarintBits(out, column_bits[c]); + out.WriteVarint(column_bits[c]); } // Write table data. @@ -398,7 +375,7 @@ class BitTableBuilderBase { // Verify the written data. if (kIsDebugBuild) { BitTableBase<kNumColumns> table; - BitMemoryReader reader(out.data(), initial_bit_offset); + BitMemoryReader reader(out.GetWrittenRegion().Subregion(initial_bit_offset)); table.Decode(reader); DCHECK_EQ(size(), table.NumRows()); for (uint32_t c = 0; c < kNumColumns; c++) { @@ -467,11 +444,11 @@ class BitmapTableBuilder { // Encode the stored data into a BitTable. template<typename Vector> void Encode(BitMemoryWriter<Vector>& out) const { - size_t initial_bit_offset = out.GetBitOffset(); + size_t initial_bit_offset = out.NumberOfWrittenBits(); - EncodeVarintBits(out, size()); + out.WriteVarint(size()); if (size() != 0) { - EncodeVarintBits(out, max_num_bits_); + out.WriteVarint(max_num_bits_); // Write table data. for (MemoryRegion row : rows_) { @@ -484,7 +461,7 @@ class BitmapTableBuilder { // Verify the written data. if (kIsDebugBuild) { BitTableBase<1> table; - BitMemoryReader reader(out.data(), initial_bit_offset); + BitMemoryReader reader(out.GetWrittenRegion().Subregion(initial_bit_offset)); table.Decode(reader); DCHECK_EQ(size(), table.NumRows()); DCHECK_EQ(max_num_bits_, table.NumColumnBits(0)); diff --git a/libartbase/base/bit_table_test.cc b/libartbase/base/bit_table_test.cc index 2fd9052516..bf32dc6e00 100644 --- a/libartbase/base/bit_table_test.cc +++ b/libartbase/base/bit_table_test.cc @@ -26,22 +26,6 @@ namespace art { -TEST(BitTableTest, TestVarint) { - for (size_t start_bit_offset = 0; start_bit_offset <= 32; start_bit_offset++) { - uint32_t values[] = { 0, 1, 11, 12, 15, 16, 255, 256, ~1u, ~0u }; - for (uint32_t value : values) { - std::vector<uint8_t> buffer; - BitMemoryWriter<std::vector<uint8_t>> writer(&buffer, start_bit_offset); - EncodeVarintBits(writer, value); - - BitMemoryReader reader(buffer.data(), start_bit_offset); - uint32_t result = DecodeVarintBits(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); - EXPECT_EQ(value, result); - } - } -} - TEST(BitTableTest, TestEmptyTable) { MallocArenaPool pool; ArenaStack arena_stack(&pool); @@ -54,7 +38,7 @@ TEST(BitTableTest, TestEmptyTable) { BitMemoryReader reader(buffer.data()); BitTableBase<1> table(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); EXPECT_EQ(0u, table.NumRows()); } @@ -75,7 +59,7 @@ TEST(BitTableTest, TestSingleColumnTable) { BitMemoryReader reader(buffer.data()); BitTableBase<1> table(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); EXPECT_EQ(4u, table.NumRows()); EXPECT_EQ(42u, table.Get(0)); EXPECT_EQ(kNoValue, table.Get(1)); @@ -98,7 +82,7 @@ TEST(BitTableTest, TestUnalignedTable) { BitMemoryReader reader(buffer.data(), start_bit_offset); BitTableBase<1> table(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); EXPECT_EQ(1u, table.NumRows()); EXPECT_EQ(42u, table.Get(0)); } @@ -119,7 +103,7 @@ TEST(BitTableTest, TestBigTable) { BitMemoryReader reader(buffer.data()); BitTableBase<4> table(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); EXPECT_EQ(2u, table.NumRows()); EXPECT_EQ(42u, table.Get(0, 0)); EXPECT_EQ(kNoValue, table.Get(0, 1)); @@ -169,7 +153,7 @@ TEST(BitTableTest, TestBitmapTable) { BitMemoryReader reader(buffer.data()); BitTableBase<1> table(reader); - EXPECT_EQ(writer.GetBitOffset(), reader.GetBitOffset()); + EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits()); for (auto it : indicies) { uint64_t expected = it.first; BitMemoryRegion actual = table.GetBitMemoryRegion(it.second); diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 9d73879979..c04c50e027 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1578,7 +1578,7 @@ class OatDumper { // The optimizing compiler outputs its CodeInfo data in the vmap table. StackMapsHelper helper(oat_method.GetVmapTable(), instruction_set_); if (AddStatsObject(oat_method.GetVmapTable())) { - helper.GetCodeInfo().AddSizeStats(&stats_); + helper.GetCodeInfo().CollectSizeStats(oat_method.GetVmapTable(), &stats_); } const uint8_t* quick_native_pc = reinterpret_cast<const uint8_t*>(quick_code); size_t offset = 0; diff --git a/oatdump/oatdump_test.h b/oatdump/oatdump_test.h index 2c28f06b2e..4ee510130b 100644 --- a/oatdump/oatdump_test.h +++ b/oatdump/oatdump_test.h @@ -171,7 +171,7 @@ class OatDumpTest : public CommonRuntimeTest { // Code and dex code do not show up if list only. expected_prefixes.push_back("DEX CODE:"); expected_prefixes.push_back("CODE:"); - expected_prefixes.push_back("InlineInfos"); + expected_prefixes.push_back("InlineInfo"); } if (mode == kModeArt) { exec_argv.push_back("--image=" + core_art_location_); diff --git a/openjdkjvmti/ti_redefine.cc b/openjdkjvmti/ti_redefine.cc index 1476880f45..dd0428dfcf 100644 --- a/openjdkjvmti/ti_redefine.cc +++ b/openjdkjvmti/ti_redefine.cc @@ -368,17 +368,6 @@ jvmtiError Redefiner::RedefineClasses(ArtJvmTiEnv* env, if (res != OK) { return res; } - // We make a copy of the class_bytes to pass into the retransformation. - // This makes cleanup easier (since we unambiguously own the bytes) and also is useful since we - // will need to keep the original bytes around unaltered for subsequent RetransformClasses calls - // to get the passed in bytes. - unsigned char* class_bytes_copy = nullptr; - res = env->Allocate(definitions[i].class_byte_count, &class_bytes_copy); - if (res != OK) { - return res; - } - memcpy(class_bytes_copy, definitions[i].class_bytes, definitions[i].class_byte_count); - ArtClassDefinition def; res = def.Init(self, definitions[i]); if (res != OK) { diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 8b77453f30..40a8dbc008 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2785,6 +2785,8 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29 * the root register to IP0 and jumps to the customized entrypoint, * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also * performs all the fast-path checks, so we need just the slow path. + * The UnsafeCASObject intrinsic is also using the GC root entrypoint with + * MOV instead of LDR, the destination register is in the same bits. * * The code structure is * art_quick_read_barrier_mark_introspection: diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc index c7f936fb11..22104a30fe 100644 --- a/runtime/gc/accounting/card_table.cc +++ b/runtime/gc/accounting/card_table.cc @@ -41,10 +41,10 @@ constexpr uint8_t CardTable::kCardDirty; * non-null values to heap addresses should go through an entry in * WriteBarrier, and from there to here. * - * The heap is divided into "cards" of GC_CARD_SIZE bytes, as - * determined by GC_CARD_SHIFT. The card table contains one byte of + * The heap is divided into "cards" of `kCardSize` bytes, as + * determined by `kCardShift`. The card table contains one byte of * data per card, to be used by the GC. The value of the byte will be - * one of GC_CARD_CLEAN or GC_CARD_DIRTY. + * one of `kCardClean` or `kCardDirty`. * * After any store of a non-null object pointer into a heap object, * code is obliged to mark the card dirty. The setters in @@ -53,9 +53,9 @@ constexpr uint8_t CardTable::kCardDirty; * * The card table's base [the "biased card table"] gets set to a * rather strange value. In order to keep the JIT from having to - * fabricate or load GC_DIRTY_CARD to store into the card table, + * fabricate or load `kCardDirty` to store into the card table, * biased base is within the mmap allocation at a point where its low - * byte is equal to GC_DIRTY_CARD. See CardTable::Create for details. + * byte is equal to `kCardDirty`. See CardTable::Create for details. */ CardTable* CardTable::Create(const uint8_t* heap_begin, size_t heap_capacity) { @@ -75,8 +75,8 @@ CardTable* CardTable::Create(const uint8_t* heap_begin, size_t heap_capacity) { uint8_t* cardtable_begin = mem_map->Begin(); CHECK(cardtable_begin != nullptr); - // We allocated up to a bytes worth of extra space to allow biased_begin's byte value to equal - // kCardDirty, compute a offset value to make this the case + // We allocated up to a bytes worth of extra space to allow `biased_begin`'s byte value to equal + // `kCardDirty`, compute a offset value to make this the case size_t offset = 0; uint8_t* biased_begin = reinterpret_cast<uint8_t*>(reinterpret_cast<uintptr_t>(cardtable_begin) - (reinterpret_cast<uintptr_t>(heap_begin) >> kCardShift)); diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index f3548f7ce5..b8520b7dc0 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -56,7 +56,7 @@ class CardTable { static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity); ~CardTable(); - // Set the card associated with the given address to GC_CARD_DIRTY. + // Set the card associated with the given address to `kCardDirty`. ALWAYS_INLINE void MarkCard(const void *addr) { *CardFromAddr(addr) = kCardDirty; } @@ -84,8 +84,8 @@ class CardTable { } } - // Returns a value that when added to a heap address >> GC_CARD_SHIFT will address the appropriate - // card table byte. For convenience this value is cached in every Thread + // Returns a value that when added to a heap address >> `kCardShift` will address the appropriate + // card table byte. For convenience this value is cached in every Thread. uint8_t* GetBiasedBegin() const { return biased_begin_; } @@ -148,7 +148,7 @@ class CardTable { // Value used to compute card table addresses from object addresses, see GetBiasedBegin uint8_t* const biased_begin_; // Card table doesn't begin at the beginning of the mem_map_, instead it is displaced by offset - // to allow the byte value of biased_begin_ to equal GC_CARD_DIRTY + // to allow the byte value of `biased_begin_` to equal `kCardDirty`. const size_t offset_; DISALLOW_IMPLICIT_CONSTRUCTORS(CardTable); diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 0701330e81..0569092bcd 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -16,6 +16,7 @@ #include "bump_pointer_space-inl.h" #include "bump_pointer_space.h" +#include "base/dumpable.h" #include "gc/accounting/read_barrier_table.h" #include "mirror/class-inl.h" #include "mirror/object-inl.h" @@ -42,6 +43,9 @@ static constexpr bool kPoisonDeadObjectsInUnevacuatedRegions = true; // points to a valid, non-protected memory area. static constexpr uint32_t kPoisonDeadObject = 0xBADDB01D; // "BADDROID" +// Whether we check a region's live bytes count against the region bitmap. +static constexpr bool kCheckLiveBytesAgainstRegionBitmap = kIsDebugBuild; + MemMap* RegionSpace::CreateMemMap(const std::string& name, size_t capacity, uint8_t* requested_begin) { CHECK_ALIGNED(capacity, kRegionSize); @@ -316,6 +320,9 @@ void RegionSpace::ClearFromSpace(/* out */ uint64_t* cleared_bytes, }; for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) { Region* r = ®ions_[i]; + if (kCheckLiveBytesAgainstRegionBitmap) { + CheckLiveBytesAgainstRegionBitmap(r); + } if (r->IsInFromSpace()) { *cleared_bytes += r->BytesAllocated(); *cleared_objects += r->ObjectsAllocated(); @@ -404,6 +411,42 @@ void RegionSpace::ClearFromSpace(/* out */ uint64_t* cleared_bytes, num_evac_regions_ = 0; } +void RegionSpace::CheckLiveBytesAgainstRegionBitmap(Region* r) { + if (r->LiveBytes() == static_cast<size_t>(-1)) { + // Live bytes count is undefined for `r`; nothing to check here. + return; + } + + // Functor walking the region space bitmap for the range corresponding + // to region `r` and calculating the sum of live bytes. + size_t live_bytes_recount = 0u; + auto recount_live_bytes = + [&r, &live_bytes_recount](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_ALIGNED(obj, kAlignment); + if (r->IsLarge()) { + // If `r` is a large region, then it contains at most one + // object, which must start at the beginning of the + // region. The live byte count in that case is equal to the + // allocated regions (large region + large tails regions). + DCHECK_EQ(reinterpret_cast<uint8_t*>(obj), r->Begin()); + DCHECK_EQ(live_bytes_recount, 0u); + live_bytes_recount = r->Top() - r->Begin(); + } else { + DCHECK(r->IsAllocated()) + << "r->State()=" << r->State() << " r->LiveBytes()=" << r->LiveBytes(); + size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>(); + size_t alloc_size = RoundUp(obj_size, space::RegionSpace::kAlignment); + live_bytes_recount += alloc_size; + } + }; + // Visit live objects in `r` and recount the live bytes. + GetLiveBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(r->Begin()), + reinterpret_cast<uintptr_t>(r->Top()), + recount_live_bytes); + // Check that this recount matches the region's current live bytes count. + DCHECK_EQ(live_bytes_recount, r->LiveBytes()); +} + // Poison the memory area in range [`begin`, `end`) with value `kPoisonDeadObject`. static void PoisonUnevacuatedRange(uint8_t* begin, uint8_t* end) { static constexpr size_t kPoisonDeadObjectSize = sizeof(kPoisonDeadObject); @@ -423,7 +466,8 @@ void RegionSpace::PoisonDeadObjectsInUnevacuatedRegion(Region* r) { // The live byte count of `r` should be different from -1, as this // region should neither be a newly allocated region nor an // evacuated region. - DCHECK_NE(r->LiveBytes(), static_cast<size_t>(-1)); + DCHECK_NE(r->LiveBytes(), static_cast<size_t>(-1)) + << "Unexpected live bytes count of -1 in " << Dumpable<Region>(*r); // Past-the-end address of the previously visited (live) object (or // the beginning of the region, if `maybe_poison` has not run yet). diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index fa33a8a21b..90f1f1dd2a 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -602,6 +602,11 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { /* out */ size_t* bytes_tl_bulk_allocated, /* out */ size_t* next_region = nullptr) REQUIRES(region_lock_); + // Check that the value of `r->LiveBytes()` matches the number of + // (allocated) bytes used by live objects according to the live bits + // in the region space bitmap range corresponding to region `r`. + void CheckLiveBytesAgainstRegionBitmap(Region* r); + // Poison memory areas used by dead objects within unevacuated // region `r`. This is meant to detect dangling references to dead // objects earlier in debug mode. diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc index d1000c5375..62dec15c57 100644 --- a/runtime/stack_map.cc +++ b/runtime/stack_map.cc @@ -31,83 +31,70 @@ CodeInfo::CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags) : CodeInfo(header->GetOptimizedCodeInfoPtr(), flags) { } +// Returns true if the decoded table was deduped. template<typename Accessor> -ALWAYS_INLINE static void DecodeTable(BitTable<Accessor>& table, - BitMemoryReader& reader, - const uint8_t* data) { +ALWAYS_INLINE static bool DecodeTable(BitTable<Accessor>& table, BitMemoryReader& reader) { bool is_deduped = reader.ReadBit(); if (is_deduped) { - // 'data' points to the start of the reader's data. - uint32_t current_bit_offset = reader.GetBitOffset(); - uint32_t bit_offset_backwards = DecodeVarintBits(reader) - current_bit_offset; - uint32_t byte_offset_backwards = BitsToBytesRoundUp(bit_offset_backwards); - BitMemoryReader reader2(data - byte_offset_backwards, - byte_offset_backwards * kBitsPerByte - bit_offset_backwards); + ssize_t bit_offset = reader.NumberOfReadBits() - reader.ReadVarint(); + BitMemoryReader reader2(reader.data(), bit_offset); // The offset is negative. table.Decode(reader2); } else { table.Decode(reader); } + return is_deduped; } void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) { BitMemoryReader reader(data); - packed_frame_size_ = DecodeVarintBits(reader); - core_spill_mask_ = DecodeVarintBits(reader); - fp_spill_mask_ = DecodeVarintBits(reader); - number_of_dex_registers_ = DecodeVarintBits(reader); - DecodeTable(stack_maps_, reader, data); - DecodeTable(register_masks_, reader, data); - DecodeTable(stack_masks_, reader, data); - if (flags & DecodeFlags::GcMasksOnly) { - return; - } - DecodeTable(inline_infos_, reader, data); - DecodeTable(method_infos_, reader, data); - if (flags & DecodeFlags::InlineInfoOnly) { - return; - } - DecodeTable(dex_register_masks_, reader, data); - DecodeTable(dex_register_maps_, reader, data); - DecodeTable(dex_register_catalog_, reader, data); - size_in_bits_ = reader.GetBitOffset(); + ForEachHeaderField([this, &reader](auto member_pointer) { + this->*member_pointer = reader.ReadVarint(); + }); + ForEachBitTableField([this, &reader](auto member_pointer) { + DecodeTable(this->*member_pointer, reader); + }, flags); + size_in_bits_ = reader.NumberOfReadBits(); } -template<typename Accessor> -ALWAYS_INLINE static void DedupeTable(BitMemoryWriter<std::vector<uint8_t>>& writer, - BitMemoryReader& reader, - CodeInfo::DedupeMap* dedupe_map) { - bool is_deduped = reader.ReadBit(); - DCHECK(!is_deduped); - BitTable<Accessor> bit_table(reader); - BitMemoryRegion region = reader.Tail(bit_table.BitSize()); - auto it = dedupe_map->insert(std::make_pair(region, writer.GetBitOffset() + 1 /* dedupe bit */)); - if (it.second /* new bit table */ || region.size_in_bits() < 32) { - writer.WriteBit(false); // Is not deduped. - writer.WriteRegion(region); - } else { - writer.WriteBit(true); // Is deduped. - EncodeVarintBits(writer, writer.GetBitOffset() - it.first->second); +size_t CodeInfo::Deduper::Dedupe(const uint8_t* code_info_data) { + writer_.ByteAlign(); + size_t deduped_offset = writer_.NumberOfWrittenBits() / kBitsPerByte; + BitMemoryReader reader(code_info_data); + CodeInfo code_info; // Temporary storage for decoded data. + ForEachHeaderField([this, &reader, &code_info](auto member_pointer) { + code_info.*member_pointer = reader.ReadVarint(); + writer_.WriteVarint(code_info.*member_pointer); + }); + ForEachBitTableField([this, &reader, &code_info](auto member_pointer) { + bool is_deduped = reader.ReadBit(); + DCHECK(!is_deduped); + size_t bit_table_start = reader.NumberOfReadBits(); + (code_info.*member_pointer).Decode(reader); + BitMemoryRegion region = reader.GetReadRegion().Subregion(bit_table_start); + auto it = dedupe_map_.insert(std::make_pair(region, /* placeholder */ 0)); + if (it.second /* new bit table */ || region.size_in_bits() < 32) { + writer_.WriteBit(false); // Is not deduped. + it.first->second = writer_.NumberOfWrittenBits(); + writer_.WriteRegion(region); + } else { + writer_.WriteBit(true); // Is deduped. + size_t bit_offset = writer_.NumberOfWrittenBits(); + writer_.WriteVarint(bit_offset - it.first->second); + } + }); + + if (kIsDebugBuild) { + CodeInfo old_code_info(code_info_data); + CodeInfo new_code_info(writer_.data() + deduped_offset); + ForEachHeaderField([&old_code_info, &new_code_info](auto member_pointer) { + DCHECK_EQ(old_code_info.*member_pointer, new_code_info.*member_pointer); + }); + ForEachBitTableField([&old_code_info, &new_code_info](auto member_pointer) { + DCHECK((old_code_info.*member_pointer).Equals(new_code_info.*member_pointer)); + }); } -} -size_t CodeInfo::Dedupe(std::vector<uint8_t>* out, const uint8_t* in, DedupeMap* dedupe_map) { - // Remember the current offset in the output buffer so that we can return it later. - const size_t result = out->size(); - BitMemoryReader reader(in); - BitMemoryWriter<std::vector<uint8_t>> writer(out, /* bit_offset */ out->size() * kBitsPerByte); - EncodeVarintBits(writer, DecodeVarintBits(reader)); // packed_frame_size_. - EncodeVarintBits(writer, DecodeVarintBits(reader)); // core_spill_mask_. - EncodeVarintBits(writer, DecodeVarintBits(reader)); // fp_spill_mask_. - EncodeVarintBits(writer, DecodeVarintBits(reader)); // number_of_dex_registers_. - DedupeTable<StackMap>(writer, reader, dedupe_map); - DedupeTable<RegisterMask>(writer, reader, dedupe_map); - DedupeTable<MaskInfo>(writer, reader, dedupe_map); - DedupeTable<InlineInfo>(writer, reader, dedupe_map); - DedupeTable<MethodInfo>(writer, reader, dedupe_map); - DedupeTable<MaskInfo>(writer, reader, dedupe_map); - DedupeTable<DexRegisterMapInfo>(writer, reader, dedupe_map); - DedupeTable<DexRegisterInfo>(writer, reader, dedupe_map); - return result; + return deduped_offset; } BitTable<StackMap>::const_iterator CodeInfo::BinarySearchNativePc(uint32_t packed_pc) const { @@ -194,33 +181,32 @@ void CodeInfo::DecodeDexRegisterMap(uint32_t stack_map_index, } } -template<typename Accessor> -static void AddTableSizeStats(const char* table_name, - const BitTable<Accessor>& table, - /*out*/ Stats* parent) { - Stats* table_stats = parent->Child(table_name); - table_stats->AddBits(table.BitSize()); - table_stats->Child("Header")->AddBits(table.HeaderBitSize()); - const char* const* column_names = GetBitTableColumnNames<Accessor>(); - for (size_t c = 0; c < table.NumColumns(); c++) { - if (table.NumColumnBits(c) > 0) { - Stats* column_stats = table_stats->Child(column_names[c]); - column_stats->AddBits(table.NumRows() * table.NumColumnBits(c), table.NumRows()); +// Decode the CodeInfo while collecting size statistics. +void CodeInfo::CollectSizeStats(const uint8_t* code_info_data, /*out*/ Stats* parent) { + Stats* codeinfo_stats = parent->Child("CodeInfo"); + BitMemoryReader reader(code_info_data); + ForEachHeaderField([&reader](auto) { reader.ReadVarint(); }); + codeinfo_stats->Child("Header")->AddBits(reader.NumberOfReadBits()); + CodeInfo code_info; // Temporary storage for decoded tables. + ForEachBitTableField([codeinfo_stats, &reader, &code_info](auto member_pointer) { + auto& table = code_info.*member_pointer; + size_t bit_offset = reader.NumberOfReadBits(); + bool deduped = DecodeTable(table, reader); + if (deduped) { + codeinfo_stats->Child("DedupeOffset")->AddBits(reader.NumberOfReadBits() - bit_offset); + } else { + Stats* table_stats = codeinfo_stats->Child(table.GetName()); + table_stats->AddBits(reader.NumberOfReadBits() - bit_offset); + const char* const* column_names = table.GetColumnNames(); + for (size_t c = 0; c < table.NumColumns(); c++) { + if (table.NumColumnBits(c) > 0) { + Stats* column_stats = table_stats->Child(column_names[c]); + column_stats->AddBits(table.NumRows() * table.NumColumnBits(c), table.NumRows()); + } + } } - } -} - -void CodeInfo::AddSizeStats(/*out*/ Stats* parent) const { - Stats* stats = parent->Child("CodeInfo"); - stats->AddBytes(Size()); - AddTableSizeStats<StackMap>("StackMaps", stack_maps_, stats); - AddTableSizeStats<RegisterMask>("RegisterMasks", register_masks_, stats); - AddTableSizeStats<MaskInfo>("StackMasks", stack_masks_, stats); - AddTableSizeStats<InlineInfo>("InlineInfos", inline_infos_, stats); - AddTableSizeStats<MethodInfo>("MethodInfo", method_infos_, stats); - AddTableSizeStats<MaskInfo>("DexRegisterMasks", dex_register_masks_, stats); - AddTableSizeStats<DexRegisterMapInfo>("DexRegisterMaps", dex_register_maps_, stats); - AddTableSizeStats<DexRegisterInfo>("DexRegisterCatalog", dex_register_catalog_, stats); + }); + codeinfo_stats->AddBytes(BitsToBytesRoundUp(reader.NumberOfReadBits())); } void DexRegisterMap::Dump(VariableIndentationOutputStream* vios) const { @@ -236,56 +222,49 @@ void DexRegisterMap::Dump(VariableIndentationOutputStream* vios) const { } } -template<typename Accessor> -static void DumpTable(VariableIndentationOutputStream* vios, - const char* table_name, - const BitTable<Accessor>& table, - bool verbose, - bool is_mask = false) { - if (table.NumRows() != 0) { - vios->Stream() << table_name << " BitSize=" << table.BitSize(); - vios->Stream() << " Rows=" << table.NumRows() << " Bits={"; - const char* const* column_names = GetBitTableColumnNames<Accessor>(); - for (size_t c = 0; c < table.NumColumns(); c++) { - vios->Stream() << (c != 0 ? " " : ""); - vios->Stream() << column_names[c] << "=" << table.NumColumnBits(c); - } - vios->Stream() << "}\n"; - if (verbose) { - ScopedIndentation indent1(vios); - for (size_t r = 0; r < table.NumRows(); r++) { - vios->Stream() << "[" << std::right << std::setw(3) << r << "]={"; - for (size_t c = 0; c < table.NumColumns(); c++) { - vios->Stream() << (c != 0 ? " " : ""); - if (is_mask) { - BitMemoryRegion bits = table.GetBitMemoryRegion(r, c); - for (size_t b = 0, e = bits.size_in_bits(); b < e; b++) { - vios->Stream() << bits.LoadBit(e - b - 1); - } - } else { - vios->Stream() << std::right << std::setw(8) << static_cast<int32_t>(table.Get(r, c)); - } - } - vios->Stream() << "}\n"; - } - } - } -} - void CodeInfo::Dump(VariableIndentationOutputStream* vios, uint32_t code_offset, bool verbose, InstructionSet instruction_set) const { - vios->Stream() << "CodeInfo\n"; + vios->Stream() << "CodeInfo BitSize=" << size_in_bits_ + << " FrameSize:" << packed_frame_size_ * kStackAlignment + << " CoreSpillMask:" << std::hex << core_spill_mask_ + << " FpSpillMask:" << std::hex << fp_spill_mask_ + << " NumberOfDexRegisters:" << std::dec << number_of_dex_registers_ + << "\n"; ScopedIndentation indent1(vios); - DumpTable<StackMap>(vios, "StackMaps", stack_maps_, verbose); - DumpTable<RegisterMask>(vios, "RegisterMasks", register_masks_, verbose); - DumpTable<MaskInfo>(vios, "StackMasks", stack_masks_, verbose, true /* is_mask */); - DumpTable<InlineInfo>(vios, "InlineInfos", inline_infos_, verbose); - DumpTable<MethodInfo>(vios, "MethodInfo", method_infos_, verbose); - DumpTable<MaskInfo>(vios, "DexRegisterMasks", dex_register_masks_, verbose, true /* is_mask */); - DumpTable<DexRegisterMapInfo>(vios, "DexRegisterMaps", dex_register_maps_, verbose); - DumpTable<DexRegisterInfo>(vios, "DexRegisterCatalog", dex_register_catalog_, verbose); + ForEachBitTableField([this, &vios, verbose](auto member_pointer) { + const auto& table = this->*member_pointer; + if (table.NumRows() != 0) { + vios->Stream() << table.GetName() << " BitSize=" << table.DataBitSize(); + vios->Stream() << " Rows=" << table.NumRows() << " Bits={"; + const char* const* column_names = table.GetColumnNames(); + for (size_t c = 0; c < table.NumColumns(); c++) { + vios->Stream() << (c != 0 ? " " : ""); + vios->Stream() << column_names[c] << "=" << table.NumColumnBits(c); + } + vios->Stream() << "}\n"; + if (verbose) { + ScopedIndentation indent1(vios); + for (size_t r = 0; r < table.NumRows(); r++) { + vios->Stream() << "[" << std::right << std::setw(3) << r << "]={"; + for (size_t c = 0; c < table.NumColumns(); c++) { + vios->Stream() << (c != 0 ? " " : ""); + if (&table == static_cast<const void*>(&stack_masks_) || + &table == static_cast<const void*>(&dex_register_masks_)) { + BitMemoryRegion bits = table.GetBitMemoryRegion(r, c); + for (size_t b = 0, e = bits.size_in_bits(); b < e; b++) { + vios->Stream() << bits.LoadBit(e - b - 1); + } + } else { + vios->Stream() << std::right << std::setw(8) << static_cast<int32_t>(table.Get(r, c)); + } + } + vios->Stream() << "}\n"; + } + } + } + }); // Display stack maps along with (live) Dex register maps. if (verbose) { diff --git a/runtime/stack_map.h b/runtime/stack_map.h index d6db05a3b8..5f44286089 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -128,7 +128,7 @@ class StackMap : public BitTableAccessor<8> { OSR = 1, Debug = 2, }; - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(StackMap) BIT_TABLE_COLUMN(0, Kind) BIT_TABLE_COLUMN(1, PackedNativePc) BIT_TABLE_COLUMN(2, DexPc) @@ -174,7 +174,7 @@ class StackMap : public BitTableAccessor<8> { */ class InlineInfo : public BitTableAccessor<6> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(InlineInfo) BIT_TABLE_COLUMN(0, IsLast) // Determines if there are further rows for further depths. BIT_TABLE_COLUMN(1, DexPc) BIT_TABLE_COLUMN(2, MethodInfoIndex) @@ -201,21 +201,27 @@ class InlineInfo : public BitTableAccessor<6> { const StackMap& stack_map) const; }; -class MaskInfo : public BitTableAccessor<1> { +class StackMask : public BitTableAccessor<1> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(StackMask) + BIT_TABLE_COLUMN(0, Mask) +}; + +class DexRegisterMask : public BitTableAccessor<1> { + public: + BIT_TABLE_HEADER(DexRegisterMask) BIT_TABLE_COLUMN(0, Mask) }; class DexRegisterMapInfo : public BitTableAccessor<1> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(DexRegisterMapInfo) BIT_TABLE_COLUMN(0, CatalogueIndex) }; class DexRegisterInfo : public BitTableAccessor<2> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(DexRegisterInfo) BIT_TABLE_COLUMN(0, Kind) BIT_TABLE_COLUMN(1, PackedValue) @@ -246,7 +252,7 @@ class DexRegisterInfo : public BitTableAccessor<2> { // therefore it is worth encoding the mask as value+shift. class RegisterMask : public BitTableAccessor<2> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(RegisterMask) BIT_TABLE_COLUMN(0, Value) BIT_TABLE_COLUMN(1, Shift) @@ -259,7 +265,7 @@ class RegisterMask : public BitTableAccessor<2> { // Separating them greatly improves dedup efficiency of the other tables. class MethodInfo : public BitTableAccessor<1> { public: - BIT_TABLE_HEADER() + BIT_TABLE_HEADER(MethodInfo) BIT_TABLE_COLUMN(0, MethodIndex) }; @@ -269,8 +275,25 @@ class MethodInfo : public BitTableAccessor<1> { */ class CodeInfo { public: + class Deduper { + public: + explicit Deduper(std::vector<uint8_t>* output) : writer_(output) { + DCHECK_EQ(output->size(), 0u); + } + + // Copy CodeInfo into output while de-duplicating the internal bit tables. + // It returns the byte offset of the copied CodeInfo within the output. + size_t Dedupe(const uint8_t* code_info); + + private: + BitMemoryWriter<std::vector<uint8_t>> writer_; + + // Deduplicate at BitTable level. The value is bit offset within the output. + std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> dedupe_map_; + }; + enum DecodeFlags { - Default = 0, + AllTables = 0, // Limits the decoding only to the data needed by GC. GcMasksOnly = 1, // Limits the decoding only to the main stack map table and inline info table. @@ -278,11 +301,11 @@ class CodeInfo { InlineInfoOnly = 2, }; - explicit CodeInfo(const uint8_t* data, DecodeFlags flags = DecodeFlags::Default) { + explicit CodeInfo(const uint8_t* data, DecodeFlags flags = AllTables) { Decode(reinterpret_cast<const uint8_t*>(data), flags); } - explicit CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags = DecodeFlags::Default); + explicit CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags = AllTables); size_t Size() const { return BitsToBytesRoundUp(size_in_bits_); @@ -411,27 +434,19 @@ class CodeInfo { InstructionSet instruction_set) const; // Accumulate code info size statistics into the given Stats tree. - void AddSizeStats(/*out*/ Stats* parent) const; + static void CollectSizeStats(const uint8_t* code_info, /*out*/ Stats* parent); ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* data) { BitMemoryReader reader(data); return QuickMethodFrameInfo( - DecodeVarintBits(reader) * kStackAlignment, // Decode packed_frame_size_ and unpack. - DecodeVarintBits(reader), // core_spill_mask_. - DecodeVarintBits(reader)); // fp_spill_mask_. + reader.ReadVarint() * kStackAlignment, // Decode packed_frame_size_ and unpack. + reader.ReadVarint(), // core_spill_mask_. + reader.ReadVarint()); // fp_spill_mask_. } - typedef std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less> DedupeMap; - - // Copy CodeInfo data while de-duplicating the internal bit tables. - // The 'out' vector must be reused between Dedupe calls (it does not have to be empty). - // The 'dedupe_map' stores the bit offsets of bit tables within the 'out' vector. - // It returns the byte offset of the copied CodeInfo within the 'out' vector. - static size_t Dedupe(std::vector<uint8_t>* out, - const uint8_t* in, - /*inout*/ DedupeMap* dedupe_map); - private: + CodeInfo() {} + // Returns lower bound (fist stack map which has pc greater or equal than the desired one). // It ignores catch stack maps at the end (it is the same as if they had maximum pc value). BitTable<StackMap>::const_iterator BinarySearchNativePc(uint32_t packed_pc) const; @@ -443,16 +458,44 @@ class CodeInfo { void Decode(const uint8_t* data, DecodeFlags flags); + // Invokes the callback with member pointer of each header field. + template<typename Callback> + ALWAYS_INLINE static void ForEachHeaderField(Callback callback) { + callback(&CodeInfo::packed_frame_size_); + callback(&CodeInfo::core_spill_mask_); + callback(&CodeInfo::fp_spill_mask_); + callback(&CodeInfo::number_of_dex_registers_); + } + + // Invokes the callback with member pointer of each BitTable field. + template<typename Callback> + ALWAYS_INLINE static void ForEachBitTableField(Callback callback, DecodeFlags flags = AllTables) { + callback(&CodeInfo::stack_maps_); + callback(&CodeInfo::register_masks_); + callback(&CodeInfo::stack_masks_); + if (flags & DecodeFlags::GcMasksOnly) { + return; + } + callback(&CodeInfo::inline_infos_); + callback(&CodeInfo::method_infos_); + if (flags & DecodeFlags::InlineInfoOnly) { + return; + } + callback(&CodeInfo::dex_register_masks_); + callback(&CodeInfo::dex_register_maps_); + callback(&CodeInfo::dex_register_catalog_); + } + uint32_t packed_frame_size_; // Frame size in kStackAlignment units. uint32_t core_spill_mask_; uint32_t fp_spill_mask_; uint32_t number_of_dex_registers_; BitTable<StackMap> stack_maps_; BitTable<RegisterMask> register_masks_; - BitTable<MaskInfo> stack_masks_; + BitTable<StackMask> stack_masks_; BitTable<InlineInfo> inline_infos_; BitTable<MethodInfo> method_infos_; - BitTable<MaskInfo> dex_register_masks_; + BitTable<DexRegisterMask> dex_register_masks_; BitTable<DexRegisterMapInfo> dex_register_maps_; BitTable<DexRegisterInfo> dex_register_catalog_; uint32_t size_in_bits_ = 0; diff --git a/runtime/thread.cc b/runtime/thread.cc index 0703a074d5..df7f19d118 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3605,7 +3605,7 @@ class ReferenceMapVisitor : public StackVisitor { reinterpret_cast<uintptr_t>(cur_quick_frame)); uintptr_t native_pc_offset = method_header->NativeQuickPcOffset(GetCurrentQuickFramePc()); CodeInfo code_info(method_header, kPrecise - ? CodeInfo::DecodeFlags::Default // We will need dex register maps. + ? CodeInfo::DecodeFlags::AllTables // We will need dex register maps. : CodeInfo::DecodeFlags::GcMasksOnly); StackMap map = code_info.GetStackMapForNativePcOffset(native_pc_offset); DCHECK(map.IsValid()); diff --git a/test/160-read-barrier-stress/src/Main.java b/test/160-read-barrier-stress/src/Main.java index 7e130cef94..04880c5c4b 100644 --- a/test/160-read-barrier-stress/src/Main.java +++ b/test/160-read-barrier-stress/src/Main.java @@ -14,8 +14,11 @@ * limitations under the License. */ +import java.lang.reflect.Field; +import sun.misc.Unsafe; + public class Main { - public static void main(String[] args) { + public static void main(String[] args) throws Exception { // Initialize local variables for comparison. Object f0000 = manyFields.testField0000; Object f1024 = manyFields.testField1024; @@ -44,6 +47,16 @@ public class Main { testString2 = "testString2"; testString3 = "testString3"; } + // Initialize Unsafe. + Unsafe unsafe = getUnsafe(); + long f0000Offset = + unsafe.objectFieldOffset(ManyFields.class.getField("testField0000")); + long f1024Offset = + unsafe.objectFieldOffset(ManyFields.class.getField("testField1024")); + long f4444Offset = + unsafe.objectFieldOffset(ManyFields.class.getField("testField4444")); + long f4999Offset = + unsafe.objectFieldOffset(ManyFields.class.getField("testField4999")); // Continually check reads from `manyFields` and `largeArray` while allocating // over 64MiB memory (with heap size limited to 16MiB), ensuring we run GC and @@ -74,7 +87,17 @@ public class Main { assertSameObject(testString2, "testString2"); assertSameObject(testString3, "testString3"); } - // TODO: Stress GC roots (const-string/const-class, kBssEntry/kReferrersClass). + // TODO: Stress GC roots (const-class, kBssEntry/kReferrersClass). + // Test Unsafe.getObject(). + assertSameObject(f0000, unsafe.getObject(mf, f0000Offset)); + assertSameObject(f1024, unsafe.getObject(mf, f1024Offset)); + assertSameObject(f4444, unsafe.getObject(mf, f4444Offset)); + assertSameObject(f4999, unsafe.getObject(mf, f4999Offset)); + // Test Unsafe.compareAndSwapObject(). + assertEqual(false, unsafe.compareAndSwapObject(mf, f4444Offset, f1024, f4444)); + assertEqual(true, unsafe.compareAndSwapObject(mf, f1024Offset, f1024, f4444)); + assertEqual(true, unsafe.compareAndSwapObject(mf, f1024Offset, f4444, f1024)); + assertEqual(false, unsafe.compareAndSwapObject(mf, f1024Offset, f4444, f1024)); } } @@ -84,6 +107,19 @@ public class Main { } } + public static void assertEqual(boolean expected, boolean actual) { + if (expected != actual) { + throw new Error("Expected " + expected +", got " + actual); + } + } + + public static Unsafe getUnsafe() throws Exception { + Class<?> unsafeClass = Class.forName("sun.misc.Unsafe"); + Field f = unsafeClass.getDeclaredField("theUnsafe"); + f.setAccessible(true); + return (Unsafe) f.get(null); + } + public static void allocateAtLeast1KiB() { // Give GC more work by allocating Object arrays. memory[allocationIndex] = new Object[1024 / 4]; @@ -359,7 +359,7 @@ if [ "$ANDROID_DATA" = "/data" ] || [ "$ANDROID_DATA" = "" ]; then fi if [ "$PERF" != "" ]; then - LAUNCH_WRAPPER="perf record -g -o $ANDROID_DATA/perf.data -e cycles:u $LAUNCH_WRAPPER" + LAUNCH_WRAPPER="perf record -g --call-graph dwarf -F 10000 -o $ANDROID_DATA/perf.data -e cycles:u $LAUNCH_WRAPPER" EXTRA_OPTIONS+=(-Xcompiler-option --generate-debug-info) fi diff --git a/tools/class2greylist/Android.bp b/tools/class2greylist/Android.bp index 7b1233bb85..1e3cdff59c 100644 --- a/tools/class2greylist/Android.bp +++ b/tools/class2greylist/Android.bp @@ -20,6 +20,7 @@ java_library_host { static_libs: [ "commons-cli-1.2", "apache-bcel", + "guava", ], } diff --git a/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java b/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java index 6b9ef161eb..c5c8ef02d7 100644 --- a/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java +++ b/tools/class2greylist/src/com/android/class2greylist/AnnotationVisitor.java @@ -16,6 +16,8 @@ package com.android.class2greylist; +import com.google.common.annotations.VisibleForTesting; + import org.apache.bcel.Const; import org.apache.bcel.classfile.AnnotationEntry; import org.apache.bcel.classfile.DescendingVisitor; @@ -27,6 +29,8 @@ import org.apache.bcel.classfile.JavaClass; import org.apache.bcel.classfile.Method; import java.util.Locale; +import java.util.Set; +import java.util.function.Predicate; /** * Visits a JavaClass instance and pulls out all members annotated with a @@ -44,13 +48,46 @@ public class AnnotationVisitor extends EmptyVisitor { private final JavaClass mClass; private final String mAnnotationType; + private final Predicate<Member> mMemberFilter; private final Status mStatus; private final DescendingVisitor mDescendingVisitor; - public AnnotationVisitor(JavaClass clazz, String annotation, Status d) { + /** + * Represents a member of a class file (a field or method). + */ + @VisibleForTesting + public static class Member { + + /** + * Signature of this member. + */ + public final String signature; + /** + * Indicates if this is a synthetic bridge method. + */ + public final boolean bridge; + + public Member(String signature, boolean bridge) { + this.signature = signature; + this.bridge = bridge; + } + } + + public AnnotationVisitor( + JavaClass clazz, String annotation, Set<String> publicApis, Status status) { + this(clazz, + annotation, + member -> !(member.bridge && publicApis.contains(member.signature)), + status); + } + + @VisibleForTesting + public AnnotationVisitor( + JavaClass clazz, String annotation, Predicate<Member> memberFilter, Status status) { mClass = clazz; mAnnotationType = annotation; - mStatus = d; + mMemberFilter = memberFilter; + mStatus = status; mDescendingVisitor = new DescendingVisitor(clazz, this); } @@ -102,7 +139,9 @@ public class AnnotationVisitor extends EmptyVisitor { break; } } - mStatus.greylistEntry(signature); + if (mMemberFilter.test(new Member(signature, bridge))) { + mStatus.greylistEntry(signature); + } } } } diff --git a/tools/class2greylist/src/com/android/class2greylist/Class2Greylist.java b/tools/class2greylist/src/com/android/class2greylist/Class2Greylist.java index 3e9e320b5b..abc9421e65 100644 --- a/tools/class2greylist/src/com/android/class2greylist/Class2Greylist.java +++ b/tools/class2greylist/src/com/android/class2greylist/Class2Greylist.java @@ -16,6 +16,9 @@ package com.android.class2greylist; +import com.google.common.collect.Sets; +import com.google.common.io.Files; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; @@ -23,9 +26,12 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PatternOptionBuilder; +import java.io.File; import java.io.IOException; +import java.nio.charset.Charset; +import java.util.Collections; +import java.util.Set; /** * Build time tool for extracting a list of members from jar files that have the @UsedByApps @@ -38,6 +44,11 @@ public class Class2Greylist { public static void main(String[] args) { Options options = new Options(); options.addOption(OptionBuilder + .withLongOpt("public-api-list") + .hasArgs(1) + .withDescription("Public API list file. Used to de-dupe bridge methods.") + .create("p")); + options.addOption(OptionBuilder .withLongOpt("debug") .hasArgs(0) .withDescription("Enable debug") @@ -61,6 +72,7 @@ public class Class2Greylist { if (cmd.hasOption('h')) { help(options); } + String publicApiFilename = cmd.getOptionValue('p', null); String[] jarFiles = cmd.getArgs(); if (jarFiles.length == 0) { @@ -70,12 +82,26 @@ public class Class2Greylist { Status status = new Status(cmd.hasOption('d')); + Set<String> publicApis; + if (publicApiFilename != null) { + try { + publicApis = Sets.newHashSet( + Files.readLines(new File(publicApiFilename), Charset.forName("UTF-8"))); + } catch (IOException e) { + status.error(e); + System.exit(1); + return; + } + } else { + publicApis = Collections.emptySet(); + } + for (String jarFile : jarFiles) { status.debug("Processing jar file %s", jarFile); try { JarReader reader = new JarReader(status, jarFile); - reader.stream().forEach(clazz -> new AnnotationVisitor( - clazz, ANNOTATION_TYPE, status).visit()); + reader.stream().forEach(clazz -> new AnnotationVisitor(clazz, ANNOTATION_TYPE, + publicApis, status).visit()); reader.close(); } catch (IOException e) { status.error(e); diff --git a/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java b/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java index a4ad20c605..ff9c265a25 100644 --- a/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java +++ b/tools/class2greylist/test/src/com/android/javac/AnnotationVisitorTest.java @@ -29,6 +29,7 @@ import com.android.class2greylist.AnnotationVisitor; import com.android.class2greylist.Status; import com.google.common.base.Joiner; +import com.google.common.collect.Sets; import org.junit.Before; import org.junit.Rule; @@ -37,6 +38,7 @@ import org.junit.rules.TestName; import org.mockito.ArgumentCaptor; import java.io.IOException; +import java.util.Set; public class AnnotationVisitorTest { @@ -81,7 +83,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -101,7 +103,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -121,7 +123,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -141,7 +143,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -161,7 +163,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); verify(mStatus, times(1)).error(any(String.class)); @@ -180,7 +182,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class$Inner"), ANNOTATION, + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class$Inner"), ANNOTATION, x -> true, mStatus).visit(); assertNoErrors(); @@ -198,7 +200,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -216,7 +218,7 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -243,9 +245,9 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Base"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Base"), ANNOTATION, x -> true, mStatus) .visit(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -275,9 +277,9 @@ public class AnnotationVisitorTest { "}")); assertThat(mJavac.compile()).isTrue(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Base"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Base"), ANNOTATION, x -> true, mStatus) .visit(); - new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus) + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus) .visit(); assertNoErrors(); @@ -312,11 +314,11 @@ public class AnnotationVisitorTest { assertThat(mJavac.compile()).isTrue(); new AnnotationVisitor( - mJavac.getCompiledClass("a.b.Interface"), ANNOTATION, mStatus).visit(); + mJavac.getCompiledClass("a.b.Interface"), ANNOTATION, x -> true, mStatus).visit(); new AnnotationVisitor( - mJavac.getCompiledClass("a.b.Base"), ANNOTATION, mStatus).visit(); + mJavac.getCompiledClass("a.b.Base"), ANNOTATION, x -> true, mStatus).visit(); new AnnotationVisitor( - mJavac.getCompiledClass("a.b.Class"), ANNOTATION, mStatus).visit(); + mJavac.getCompiledClass("a.b.Class"), ANNOTATION, x -> true, mStatus).visit(); assertNoErrors(); ArgumentCaptor<String> greylist = ArgumentCaptor.forClass(String.class); @@ -326,4 +328,37 @@ public class AnnotationVisitorTest { "La/b/Class;->method(Ljava/lang/Object;)V", "La/b/Base;->method(Ljava/lang/Object;)V"); } + + @Test + public void testPublicBridgeExcluded() throws IOException { + mJavac.addSource("a.b.Base", Joiner.on('\n').join( + "package a.b;", + "public abstract class Base<T> {", + " public void method(T arg) {}", + "}")); + + mJavac.addSource("a.b.Class", Joiner.on('\n').join( + "package a.b;", + "import annotation.Anno;", + "public class Class<T extends String> extends Base<T> {", + " @Override", + " @Anno", + " public void method(T arg) {}", + "}")); + assertThat(mJavac.compile()).isTrue(); + + Set<String> publicApis = Sets.newHashSet( + "La/b/Base;->method(Ljava/lang/Object;)V", + "La/b/Class;->method(Ljava/lang/Object;)V"); + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Base"), ANNOTATION, publicApis, + mStatus).visit(); + new AnnotationVisitor(mJavac.getCompiledClass("a.b.Class"), ANNOTATION, publicApis, + mStatus).visit(); + + assertNoErrors(); + ArgumentCaptor<String> greylist = ArgumentCaptor.forClass(String.class); + // The bridge method generated for the above, is a public API so should be excluded + verify(mStatus, times(1)).greylistEntry(greylist.capture()); + assertThat(greylist.getValue()).isEqualTo("La/b/Class;->method(Ljava/lang/String;)V"); + } } diff --git a/tools/teardown-buildbot-device.sh b/tools/teardown-buildbot-device.sh index be68b9f490..6634fb4bb7 100755 --- a/tools/teardown-buildbot-device.sh +++ b/tools/teardown-buildbot-device.sh @@ -42,15 +42,14 @@ if [[ -n "$ART_TEST_CHROOT" ]]; then # that started this process. for_all_chroot_process() { local action=$1 - for link in $(adb shell ls -d "/proc/*/root"); do - local root=$(adb shell readlink "$link") - if [[ "x$root" = "x$ART_TEST_CHROOT" ]]; then - local dir=$(dirname "$link") - local pid=$(basename "$dir") - local cmdline=$(adb shell cat "$dir"/cmdline | tr '\000' ' ') - $action "$pid" "$cmdline" - fi - done + adb shell ls -ld "/proc/*/root" \ + | sed -n -e "s,^.* \\(/proc/.*/root\\) -> $ART_TEST_CHROOT\$,\\1,p" \ + | while read link; do + local dir=$(dirname "$link") + local pid=$(basename "$dir") + local cmdline=$(adb shell cat "$dir"/cmdline | tr '\000' ' ') + $action "$pid" "$cmdline" + done } # display_process PID CMDLINE |