diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 475 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 484 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 472 |
3 files changed, 419 insertions, 1012 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 2560c9fedd..710ca7ad45 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -636,75 +636,10 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM { - protected: - ReadBarrierMarkSlowPathBaseARM(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - Register ref_reg = ref_.AsRegister<Register>(); - - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(ref_reg, SP); - DCHECK_NE(ref_reg, LR); - DCHECK_NE(ref_reg, PC); - // IP is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_reg, IP); - DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in R0): - // - // R0 <- ref - // R0 <- ReadBarrierMark(R0) - // ref <- R0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ blx(entrypoint_.AsRegister<Register>()); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM); -}; - // Slow path marking an object reference `ref` during a read // barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). // // This means that after the execution of this slow path, `ref` will // always be up-to-date, but `obj.field` may not; i.e., after the @@ -716,12 +651,12 @@ class ReadBarrierMarkSlowPathBaseARM : public SlowPathCodeARM { // If `entrypoint` is a valid location it is assumed to already be // holding the entrypoint. The case where the entrypoint is passed in // is when the decision to mark is based on whether the GC is marking. -class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM { +class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref, Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint) { + : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); } @@ -729,77 +664,15 @@ class ReadBarrierMarkSlowPathARM : public ReadBarrierMarkSlowPathBaseARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - if (kIsDebugBuild) { - Register ref_reg = ref_.AsRegister<Register>(); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - } - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ b(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowPathBaseARM { - public: - LoadReferenceWithBakerReadBarrierSlowPathARM(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - Register temp, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARM"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - DCHECK_NE(ref_reg, temp_); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || @@ -813,202 +686,158 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } + // Instead of using the standard runtime calling convention (input + // and output in R0): // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ add(obj_, obj_, ShifterOperand(temp_, LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); + // rX <- ReadBarrierMarkRegX(rX) // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); - __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS. - GenerateReadBarrierMarkRuntimeCall(codegen); - + if (entrypoint_.IsValid()) { + arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ blx(entrypoint_.AsRegister<Register>()); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } __ b(GetExitLabel()); } private: - // The register containing the object holding the marked object reference field. - Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`. - Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM); + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARM above, which never -// tries to update `obj.field`). +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). // // This means that after the execution of this slow path, both `ref` // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). // -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM - : public ReadBarrierMarkSlowPathBaseARM { +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is when the decision to mark is based on whether the GC is marking. +class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - Register temp1, - Register temp2, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARM(instruction, ref, entrypoint), + ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1, + Register temp2, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeARM(instruction), + ref_(ref), obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), + field_offset_(field_offset), temp1_(temp1), - temp2_(temp2) { + temp2_(temp2), + entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); } - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM"; - } + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; - DCHECK_NE(ref_reg, temp1_); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. + // This slow path is only used by the UnsafeCASObject intrinsic. DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); - // The location of the offset of the marked reference field within `obj_`. - Location field_offset = index_; - DCHECK(field_offset.IsRegisterPair()) << field_offset; + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; __ Bind(GetEntryLabel()); - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp1`. - __ add(obj_, obj_, ShifterOperand(temp1_, LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); - __ b(GetExitLabel(), CC); // Carry flag is the last bit shifted out by LSRS. - - // Save the old value of the reference before marking it. + // Save the old reference. // Note that we cannot use IP to save the old reference, as IP is // used internally by the ReadBarrierMarkRegX entry point, and we // need the old reference after the call to that entry point. DCHECK_NE(temp1_, IP); __ Mov(temp1_, ref_reg); - GenerateReadBarrierMarkRuntimeCall(codegen); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ blx(entrypoint_.AsRegister<Register>()); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). + // update the field in the holder (`*(obj_ + field_offset_)`). // // Note that this field could also hold a different object, if // another thread had concurrently changed it. In that case, the // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set // (CAS) operation below would abort the CAS, leaving the field // as-is. + Label done; __ cmp(temp1_, ShifterOperand(ref_reg)); - __ b(GetExitLabel(), EQ); + __ b(&done, EQ); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -1021,7 +850,7 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM // The UnsafeCASObject intrinsic uses a register pair as field // offset ("long offset"), of which only the low part contains // data. - Register offset = field_offset.AsRegisterPairLow<Register>(); + Register offset = field_offset_.AsRegisterPairLow<Register>(); Register expected = temp1_; Register value = ref_reg; Register tmp_ptr = IP; // Pointer to actual memory. @@ -1071,27 +900,25 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM } } + __ Bind(&done); __ b(GetExitLabel()); } private: + // The location (register) of the marked object reference. + const Location ref_; // The register containing the object holding the marked object reference field. const Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + const Register temp1_; - // A temporary register used in the implementation of the CAS, to - // update the object's reference field. const Register temp2_; - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM); + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); }; // Slow path generating a read barrier for a heap reference. @@ -7483,11 +7310,13 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to mark the reference. - // Then, in the slow path, check the gray bit in the lock word of - // the reference's holder (`obj`) to decide whether to mark `ref` or - // not. + // After loading the reference from `obj.field` into `ref`, query + // `art::Thread::Current()->GetIsGcMarking()` to decide whether we + // need to enter the slow path to mark the reference. This + // optimistic strategy (we expect the GC to not be marking most of + // the time) does not check `obj`'s lock word (to see if it is a + // gray object or not), so may sometimes mark an already marked + // object. // // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point @@ -7495,19 +7324,14 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // that `GetIsGcMarking()` is false, and vice versa. // // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // HeapReference<mirror::Object> ref = *src; // Original reference load. // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // } + // TODO: This temp register is only necessary when + // `always_update_field` is true; make it optional (like `temp2`). Register temp_reg = temp.AsRegister<Register>(); // Slow path marking the object `ref` when the GC is marking. The @@ -7516,37 +7340,18 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i SlowPathCodeARM* slow_path; if (always_update_field) { DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only - // supports address of the form `obj + field_offset`, where `obj` - // is a register and `field_offset` is a register pair (of which - // only the lower half is used). Thus `offset` and `scale_factor` - // above are expected to be null in this code path. + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3); } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, ref, /* entrypoint */ temp3); } AddSlowPath(slow_path); @@ -7556,11 +7361,11 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset); + // The reference load. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7b6c97cd23..5bdaac2e4a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -633,73 +633,10 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { - protected: - ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(ref_.reg(), LR); - DCHECK_NE(ref_.reg(), WSP); - DCHECK_NE(ref_.reg(), WZR); - // IP0 is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_.reg(), IP0); - DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in W0): - // - // W0 <- ref - // W0 <- ReadBarrierMark(W0) - // ref <- W0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blr(XRegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); -}; - // Slow path marking an object reference `ref` during a read // barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that). // // This means that after the execution of this slow path, `ref` will // always be up-to-date, but `obj.field` may not; i.e., after the @@ -711,12 +648,14 @@ class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { // If `entrypoint` is a valid location it is assumed to already be // holding the entrypoint. The case where the entrypoint is passed in // is when the decision to mark is based on whether the GC is marking. -class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { +class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref, Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { + : SlowPathCodeARM64(instruction), + ref_(ref), + entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); } @@ -727,75 +666,12 @@ class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { DCHECK(locations->CanCall()); DCHECK(ref_.IsRegister()) << ref_; DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || @@ -809,129 +685,87 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } + // Instead of using the standard runtime calling convention (input + // and output in W0): // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); + // rX <- ReadBarrierMarkRegX(rX) // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - + if (entrypoint_.IsValid()) { + arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ Blr(XRegisterFrom(entrypoint_)); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } __ B(GetExitLabel()); } private: - // The register containing the object holding the marked object reference field. - Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`. - Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if it is already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never -// tries to update `obj.field`). +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM64 above, which never tries to update +// `obj.field`). // // This means that after the execution of this slow path, both `ref` // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). // -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - : public ReadBarrierMarkSlowPathBaseARM64 { +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is when the decision to mark is based on whether the GC is marking. +class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), + ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeARM64(instruction), + ref_(ref), obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { + field_offset_(field_offset), + temp_(temp), + entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); } const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; + return "ReadBarrierMarkAndUpdateFieldSlowPathARM64"; } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { @@ -940,82 +774,70 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 DCHECK(locations->CanCall()); DCHECK(ref_.IsRegister()) << ref_; DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. + // This slow path is only used by the UnsafeCASObject intrinsic. DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, 0u); - DCHECK_EQ(use_load_acquire_, false); - // The location of the offset of the marked reference field within `obj_`. - Location field_offset = index_; - DCHECK(field_offset.IsRegister()) << field_offset; + DCHECK(field_offset_.IsRegister()) << field_offset_; __ Bind(GetEntryLabel()); - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - - // Save the old value of the reference before marking it. + // Save the old reference. // Note that we cannot use IP to save the old reference, as IP is // used internally by the ReadBarrierMarkRegX entry point, and we // need the old reference after the call to that entry point. DCHECK_NE(LocationFrom(temp_).reg(), IP0); __ Mov(temp_.W(), ref_reg); - GenerateReadBarrierMarkRuntimeCall(codegen); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ Blr(XRegisterFrom(entrypoint_)); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). + // update the field in the holder (`*(obj_ + field_offset_)`). // // Note that this field could also hold a different object, if // another thread had concurrently changed it. In that case, the // LDXR/CMP/BNE sequence of instructions in the compare-and-set // (CAS) operation below would abort the CAS, leaving the field // as-is. + vixl::aarch64::Label done; __ Cmp(temp_.W(), ref_reg); - __ B(eq, GetExitLabel()); + __ B(eq, &done); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -1028,7 +850,7 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 // Convenience aliases. Register base = obj_.W(); - Register offset = XRegisterFrom(field_offset); + Register offset = XRegisterFrom(field_offset_); Register expected = temp_.W(); Register value = ref_reg; Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. @@ -1072,26 +894,24 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 } } + __ Bind(&done); __ B(GetExitLabel()); } private: + // The location (register) of the marked object reference. + const Location ref_; // The register containing the object holding the marked object reference field. const Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + const Register temp_; - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); + // The location of the entrypoint if it is already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); }; // Slow path generating a read barrier for a heap reference. @@ -5953,11 +5773,13 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // `instruction->IsArrayGet()` => `!use_load_acquire`. DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to mark the reference. - // Then, in the slow path, check the gray bit in the lock word of - // the reference's holder (`obj`) to decide whether to mark `ref` or - // not. + // After loading the reference from `obj.field` into `ref`, query + // `art::Thread::Current()->GetIsGcMarking()` to decide whether we + // need to enter the slow path to mark the reference. This + // optimistic strategy (we expect the GC to not be marking most of + // the time) does not check `obj`'s lock word (to see if it is a + // gray object or not), so may sometimes mark an already marked + // object. // // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp2` the read barrier mark entry point @@ -5965,17 +5787,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // that `GetIsGcMarking()` is false, and vice versa. // // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // HeapReference<mirror::Object> ref = *src; // Original reference load. // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // } // Slow path marking the object `ref` when the GC is marking. The @@ -5984,38 +5799,17 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* Location temp2_loc = LocationFrom(temp2); SlowPathCodeARM64* slow_path; if (always_update_field) { - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register. Thus - // `offset` and `scale_factor` above are expected to be null in - // this code path. + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp, /* entrypoint */ temp2_loc); } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( + instruction, ref, /* entrypoint */ temp2_loc); } AddSlowPath(slow_path); @@ -6025,12 +5819,12 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ Ldr(temp2, MemOperand(tr, entry_point_offset)); + // The reference load. + GenerateRawReferenceLoad( + instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp2, slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 0239ac9c45..c92a056f32 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -657,25 +657,56 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL); }; -// Abstract base class for read barrier slow paths marking a reference -// `ref`. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). // -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { - protected: - ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint) +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// when the decision to mark is based on whether the GC is marking. +class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; } + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); vixl32::Register ref_reg = RegisterFrom(ref_); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + // The read barrier instrumentation of object ArrayGet + // instructions does not support the HIntermediateAddress + // instruction. + DCHECK(!(instruction_->IsArrayGet() && + instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); + __ Bind(GetEntryLabel()); // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. @@ -711,246 +742,54 @@ class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); } + __ B(GetExitLabel()); } + private: // The location (register) of the marked object reference. const Location ref_; // The location of the entrypoint if already loaded. const Location entrypoint_; - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. -class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL); }; -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`. - vixl32::Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never -// tries to update `obj.field`). +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). // // This means that after the execution of this slow path, both `ref` // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). // -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - : public ReadBarrierMarkSlowPathBaseARMVIXL { +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// when the decision to mark is based on whether the GC is marking. +class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp1, - vixl32::Register temp2, - Location entrypoint) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), + ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction, + Location ref, + vixl32::Register obj, + Location field_offset, + vixl32::Register temp1, + vixl32::Register temp2, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeARMVIXL(instruction), + ref_(ref), obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), + field_offset_(field_offset), temp1_(temp1), - temp2_(temp2) { + temp2_(temp2), + entrypoint_(entrypoint) { DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); } const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL"; + return "ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL"; } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { @@ -958,77 +797,70 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL vixl32::Register ref_reg = RegisterFrom(ref_); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. + // This slow path is only used by the UnsafeCASObject intrinsic. DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking and field updating slow path: " << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); - Location field_offset = index_; - DCHECK(field_offset.IsRegisterPair()) << field_offset; + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; __ Bind(GetEntryLabel()); - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - - // Save the old value of the reference before marking it. + // Save the old reference. // Note that we cannot use IP to save the old reference, as IP is // used internally by the ReadBarrierMarkRegX entry point, and we // need the old reference after the call to that entry point. DCHECK(!temp1_.Is(ip)); __ Mov(temp1_, ref_reg); - GenerateReadBarrierMarkRuntimeCall(codegen); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + DCHECK(!ref_reg.Is(sp)); + DCHECK(!ref_reg.Is(lr)); + DCHECK(!ref_reg.Is(pc)); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK(!ref_reg.Is(ip)); + DCHECK(ref_reg.IsRegister()) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + __ Blx(RegisterFrom(entrypoint_)); + } else { + // Entrypoint is not already loaded, load from the thread. + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + } // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). + // update the field in the holder (`*(obj_ + field_offset_)`). // // Note that this field could also hold a different object, if // another thread had concurrently changed it. In that case, the // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set // (CAS) operation below would abort the CAS, leaving the field // as-is. + vixl32::Label done; __ Cmp(temp1_, ref_reg); - __ B(eq, GetExitLabel()); + __ B(eq, &done, /* far_target */ false); // Update the the holder's field atomically. This may fail if // mutator updates before us, but it's OK. This is achieved @@ -1042,7 +874,7 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL // The UnsafeCASObject intrinsic uses a register pair as field // offset ("long offset"), of which only the low part contains // data. - vixl32::Register offset = LowRegisterFrom(field_offset); + vixl32::Register offset = LowRegisterFrom(field_offset_); vixl32::Register expected = temp1_; vixl32::Register value = ref_reg; vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory. @@ -1098,27 +930,25 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL } } + __ Bind(&done); __ B(GetExitLabel()); } private: + // The location (register) of the marked object reference. + const Location ref_; // The register containing the object holding the marked object reference field. const vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + const vixl32::Register temp1_; - // A temporary register used in the implementation of the CAS, to - // update the object's reference field. const vixl32::Register temp2_; - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL); + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL); }; // Slow path generating a read barrier for a heap reference. @@ -7562,11 +7392,13 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` to decide - // whether we need to enter the slow path to mark the reference. - // Then, in the slow path, check the gray bit in the lock word of - // the reference's holder (`obj`) to decide whether to mark `ref` or - // not. + // After loading the reference from `obj.field` into `ref`, query + // `art::Thread::Current()->GetIsGcMarking()` to decide whether we + // need to enter the slow path to mark the reference. This + // optimistic strategy (we expect the GC to not be marking most of + // the time) does not check `obj`'s lock word (to see if it is a + // gray object or not), so may sometimes mark an already marked + // object. // // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point @@ -7574,19 +7406,14 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // that `GetIsGcMarking()` is false, and vice versa. // // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // HeapReference<mirror::Object> ref = *src; // Original reference load. // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. // } + // TODO: This temp register is only necessary when + // `always_update_field` is true; make it optional (like `temp2`). vixl32::Register temp_reg = RegisterFrom(temp); // Slow path marking the object `ref` when the GC is marking. The @@ -7595,37 +7422,18 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio SlowPathCodeARMVIXL* slow_path; if (always_update_field) { DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register pair (of - // which only the lower half is used). Thus `offset` and - // `scale_factor` above are expected to be null in this code path. + // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3); } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, ref, /* entrypoint */ temp3); } AddSlowPath(slow_path); @@ -7635,11 +7443,11 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset); + // The reference load. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); } |