diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 203 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 12 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 236 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 14 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 226 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 14 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 2 | ||||
| -rw-r--r-- | runtime/lock_word.h | 3 | ||||
| -rw-r--r-- | runtime/type_lookup_table.h | 2 |
11 files changed, 287 insertions, 429 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 710ca7ad45..7b84ef83cd 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -650,7 +650,7 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { // // If `entrypoint` is a valid location it is assumed to already be // holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. +// is for the GcRoot read barrier. class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: ReadBarrierMarkSlowPathARM(HInstruction* instruction, @@ -715,7 +715,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); __ blx(entrypoint_.AsRegister<Register>()); } else { - // Entrypoint is not already loaded, load from the thread. int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); // This runtime call does not require a stack map. @@ -744,10 +743,6 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { public: ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, @@ -755,15 +750,13 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { Register obj, Location field_offset, Register temp1, - Register temp2, - Location entrypoint = Location::NoLocation()) + Register temp2) : SlowPathCodeARM(instruction), ref_(ref), obj_(obj), field_offset_(field_offset), temp1_(temp1), - temp2_(temp2), - entrypoint_(entrypoint) { + temp2_(temp2) { DCHECK(kEmitCompilerReadBarrier); } @@ -816,16 +809,10 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { // // rX <- ReadBarrierMarkRegX(rX) // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ blx(entrypoint_.AsRegister<Register>()); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); // If the new reference is different from the old reference, // update the field in the holder (`*(obj_ + field_offset_)`). @@ -915,9 +902,6 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { const Register temp1_; const Register temp2_; - // The location of the entrypoint if already loaded. - const Location entrypoint_; - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); }; @@ -7201,35 +7185,14 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. + // Baker's read barrier are used: // + // root = obj.field; // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // if (temp != null) { + // root = temp(root) // } - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); - - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadWord, root_reg, obj, offset); static_assert( @@ -7240,6 +7203,21 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); @@ -7310,79 +7288,51 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // After loading the reference from `obj.field` into `ref`, query - // `art::Thread::Current()->GetIsGcMarking()` to decide whether we - // need to enter the slow path to mark the reference. This - // optimistic strategy (we expect the GC to not be marking most of - // the time) does not check `obj`'s lock word (to see if it is a - // gray object or not), so may sometimes mark an already marked - // object. + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp3` the read barrier mark entry point - // corresponding to register `ref`. If `temp3` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. - // - // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. - // TODO: This temp register is only necessary when - // `always_update_field` is true; make it optional (like `temp2`). + Register ref_reg = ref.AsRegister<Register>(); Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. - Location temp3 = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address - // of the form `obj + field_offset`, where `obj` is a register and - // `field_offset` is a register pair (of which only the lower half - // is used). Thus `offset` and `scale_factor` above are expected - // to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( - instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, ref, /* entrypoint */ temp3); + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); } - AddSlowPath(slow_path); - - // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp3.AsRegister<Register>(), TR, entry_point_offset); - // The reference load. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); -void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check) { - Register ref_reg = ref.AsRegister<Register>(); + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `obj` is unchanged by this operation, but its value now depends + // on `temp_reg`. + __ add(obj, obj, ShifterOperand(temp_reg, LSR, 32)); + // The actual reference load. if (index.IsValid()) { // Load types involving an "index": ArrayGet, // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject // intrinsics. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; @@ -7399,16 +7349,41 @@ void CodeGeneratorARM::GenerateRawReferenceLoad(HInstruction* instruction, __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); } } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) + // /* HeapReference<Object> */ ref = *(obj + offset) __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); } - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Object* ref = ref_addr->AsMirrorPtr() __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); + __ b(slow_path->GetEntryLabel(), CS); // Carry flag is the last bit shifted out by LSRS. + __ Bind(slow_path->GetExitLabel()); } void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1f68777f88..df2dbc74ab 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -520,6 +520,9 @@ class CodeGeneratorARM : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. // @@ -542,15 +545,6 @@ class CodeGeneratorARM : public CodeGenerator { bool always_update_field = false, Register* temp2 = nullptr); - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check); - // Generate a read barrier for a heap reference within `instruction` // using a slow path. // diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5bdaac2e4a..edccbd4904 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -647,7 +647,7 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { // // If `entrypoint` is a valid location it is assumed to already be // holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. +// is for the GcRoot read barrier. class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierMarkSlowPathARM64(HInstruction* instruction, @@ -743,24 +743,18 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// is when the decision to mark is based on whether the GC is marking. class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, Location ref, Register obj, Location field_offset, - Register temp, - Location entrypoint = Location::NoLocation()) + Register temp) : SlowPathCodeARM64(instruction), ref_(ref), obj_(obj), field_offset_(field_offset), - temp_(temp), - entrypoint_(entrypoint) { + temp_(temp) { DCHECK(kEmitCompilerReadBarrier); } @@ -816,16 +810,10 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { // // rX <- ReadBarrierMarkRegX(rX) // - if (entrypoint_.IsValid()) { - arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blr(XRegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); // If the new reference is different from the old reference, // update the field in the holder (`*(obj_ + field_offset_)`). @@ -908,9 +896,6 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { const Register temp_; - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); }; @@ -5629,35 +5614,14 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. + // Baker's read barrier are used: // + // root = obj.field; // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // if (temp != null) { + // root = temp(root) // } - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Register temp = lr; - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( - instruction, root, /* entrypoint */ LocationFrom(temp)); - codegen_->AddSlowPath(slow_path); - - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp, MemOperand(tr, entry_point_offset)); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); @@ -5672,6 +5636,20 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); + Register temp = lr; + + // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp. + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, + root, + LocationFrom(temp)); + codegen_->AddSlowPath(slow_path); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp, MemOperand(tr, entry_point_offset)); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp, slow_path->GetEntryLabel()); @@ -5773,77 +5751,54 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // `instruction->IsArrayGet()` => `!use_load_acquire`. DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // After loading the reference from `obj.field` into `ref`, query - // `art::Thread::Current()->GetIsGcMarking()` to decide whether we - // need to enter the slow path to mark the reference. This - // optimistic strategy (we expect the GC to not be marking most of - // the time) does not check `obj`'s lock word (to see if it is a - // gray object or not), so may sometimes mark an already marked - // object. - // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp2` the read barrier mark entry point - // corresponding to register `ref`. If `temp2` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: // - // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp2`. - Register temp2 = lr; - Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path; - if (always_update_field) { - // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports - // address of the form `obj + field_offset`, where `obj` is a - // register and `field_offset` is a register. Thus `offset` and - // `scale_factor` above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( - instruction, ref, obj, /* field_offset */ index, temp, /* entrypoint */ temp2_loc); - } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64( - instruction, ref, /* entrypoint */ temp2_loc); - } - AddSlowPath(slow_path); - - // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ Ldr(temp2, MemOperand(tr, entry_point_offset)); - // The reference load. - GenerateRawReferenceLoad( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Cbnz(temp2, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(obj.IsW()); Primitive::Type type = Primitive::kPrimNot; Register ref_reg = RegisterFrom(ref, type); + DCHECK(obj.IsW()); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + { + // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + // /* int32_t */ monitor = obj->monitor_ + __ Ldr(temp, HeapOperand(obj, monitor_offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `obj` is unchanged by this operation, but its value now depends + // on `temp`. + __ Add(obj.X(), obj.X(), Operand(temp.X(), LSR, 32)); + // The actual reference load. if (index.IsValid()) { // Load types involving an "index": ArrayGet, // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject @@ -5858,50 +5813,59 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, << instruction->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, 0u); - DCHECK_EQ(needs_null_check, false); - // /* HeapReference<mirror::Object> */ ref = *(obj + index) + DCHECK_EQ(needs_null_check, 0u); + // /* HeapReference<Object> */ ref = *(obj + index) MemOperand field = HeapOperand(obj, XRegisterFrom(index)); LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); } else { - // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) + // ArrayGet and UnsafeGetObject intrinsics cases. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); Load(type, ref_reg, HeapOperand(obj, computed_offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireW(); - __ Add(temp, obj, offset); - { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } + Register temp3 = temps.AcquireW(); + __ Add(temp3, obj, offset); + Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp3); } } } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) + // /* HeapReference<Object> */ ref = *(obj + offset) MemOperand field = HeapOperand(obj, offset); if (use_load_acquire) { - // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. - LoadAcquire(instruction, ref_reg, field, needs_null_check); + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); } else { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); Load(type, ref_reg, field); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } } } // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeARM64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); /* "times 1" */ + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); } void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 231fb057c8..5faf29a90f 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -616,8 +616,8 @@ class CodeGeneratorARM64 : public CodeGenerator { Location index, vixl::aarch64::Register temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. // // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into @@ -636,16 +636,6 @@ class CodeGeneratorARM64 : public CodeGenerator { bool use_load_acquire, bool always_update_field = false); - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire); - // Generate a read barrier for a heap reference within `instruction` // using a slow path. // diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index c92a056f32..6bfbe4a9c9 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -668,10 +668,6 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { // probably still be a from-space reference (unless it gets updated by // another thread, or if another thread installed another object // reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// when the decision to mark is based on whether the GC is marking. class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, @@ -736,7 +732,6 @@ class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL { arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); __ Blx(RegisterFrom(entrypoint_)); } else { - // Entrypoint is not already loaded, load from the thread. int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); // This runtime call does not require a stack map. @@ -765,10 +760,6 @@ class ReadBarrierMarkSlowPathARMVIXL : public SlowPathCodeARMVIXL { // and `obj.field` will be up-to-date; i.e., after the flip, both will // hold the same to-space reference (unless another thread installed // another object reference (different from `ref`) in `obj.field`). -// -// If `entrypoint` is a valid location it is assumed to already be -// holding the entrypoint. The case where the entrypoint is passed in -// when the decision to mark is based on whether the GC is marking. class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL(HInstruction* instruction, @@ -776,15 +767,13 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL vixl32::Register obj, Location field_offset, vixl32::Register temp1, - vixl32::Register temp2, - Location entrypoint = Location::NoLocation()) + vixl32::Register temp2) : SlowPathCodeARMVIXL(instruction), ref_(ref), obj_(obj), field_offset_(field_offset), temp1_(temp1), - temp2_(temp2), - entrypoint_(entrypoint) { + temp2_(temp2) { DCHECK(kEmitCompilerReadBarrier); } @@ -839,16 +828,10 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL // // rX <- ReadBarrierMarkRegX(rX) // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blx(RegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); // If the new reference is different from the old reference, // update the field in the holder (`*(obj_ + field_offset_)`). @@ -945,9 +928,6 @@ class ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL : public SlowPathCodeARMVIXL const vixl32::Register temp1_; const vixl32::Register temp2_; - // The location of the entrypoint if already loaded. - const Location entrypoint_; - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL); }; @@ -7283,35 +7263,14 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when - // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. + // Baker's read barrier are used: // + // root = obj.field; // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // if (temp != null) { + // root = temp(root) // } - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); - - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); static_assert( @@ -7322,6 +7281,21 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( "art::mirror::CompressedReference<mirror::Object> and int32_t " "have different sizes."); + // Slow path marking the GC root `root`. + Location temp = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); @@ -7392,92 +7366,55 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // After loading the reference from `obj.field` into `ref`, query - // `art::Thread::Current()->GetIsGcMarking()` to decide whether we - // need to enter the slow path to mark the reference. This - // optimistic strategy (we expect the GC to not be marking most of - // the time) does not check `obj`'s lock word (to see if it is a - // gray object or not), so may sometimes mark an already marked - // object. + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: // - // Note that we do not actually check the value of `GetIsGcMarking()`; - // instead, we load into `temp3` the read barrier mark entry point - // corresponding to register `ref`. If `temp3` is null, it means - // that `GetIsGcMarking()` is false, and vice versa. - // - // temp3 = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // if (temp3 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. - // TODO: This temp register is only necessary when - // `always_update_field` is true; make it optional (like `temp2`). + vixl32::Register ref_reg = RegisterFrom(ref); vixl32::Register temp_reg = RegisterFrom(temp); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. - Location temp3 = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address - // of the form `obj + field_offset`, where `obj` is a register and - // `field_offset` is a register pair (of which only the lower half - // is used). Thus `offset` and `scale_factor` above are expected - // to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL( - instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2, /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, ref, /* entrypoint */ temp3); + // /* int32_t */ monitor = obj->monitor_ + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); } - AddSlowPath(slow_path); - - // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp3), tr, entry_point_offset); - // The reference load. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); -} - -void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check) { - Primitive::Type type = Primitive::kPrimNot; - vixl32::Register ref_reg = RegisterFrom(ref, type); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `obj` is unchanged by this operation, but its value now depends + // on `temp_reg`. + __ Add(obj, obj, Operand(temp_reg, ShiftType::LSR, 32)); + // The actual reference load. if (index.IsValid()) { // Load types involving an "index": ArrayGet, // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject // intrinsics. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = (Int32ConstantFrom(index) << scale_factor) + offset; - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } } else { // Handle the special case of the // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject @@ -7487,27 +7424,46 @@ void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, ? LowRegisterFrom(index) : RegisterFrom(index); UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor)); - { - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } + const vixl32::Register temp3 = temps.Acquire(); + __ Add(temp3, obj, Operand(index_reg, ShiftType::LSL, scale_factor)); + GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp3, offset); } } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); + // /* HeapReference<Object> */ ref = *(obj + offset) GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } } // Object* ref = ref_addr->AsMirrorPtr() GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeARMVIXL* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARMVIXL( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1); + __ B(cs, slow_path->GetEntryLabel()); // Carry flag is the last bit shifted out by LSRS. + __ Bind(slow_path->GetExitLabel()); } void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 2a636dbd99..3f52c72bd4 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -45,11 +45,6 @@ static constexpr bool kArmUseVIXL32 = true; namespace art { namespace arm { -// This constant is used as an approximate margin when emission of veneer and literal pools -// must be blocked. -static constexpr int kMaxMacroInstructionSizeInBytes = - 15 * vixl::aarch32::kMaxInstructionSizeInBytes; - static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = { vixl::aarch32::r1, vixl::aarch32::r2, @@ -630,15 +625,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { bool always_update_field = false, vixl::aarch32::Register* temp2 = nullptr); - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check); - // Generate a read barrier for a heap reference within `instruction` // using a slow path. // diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 751623c177..c262cf983d 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1946,8 +1946,6 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { } if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - // The base destination address is computed later, as `temp2` is // used for intermediate computations. diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index f38642242d..86e54294ae 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2732,8 +2732,6 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - // SystemArrayCopy implementation for Baker read barriers (see // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): // diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index cc4889b26a..70a3d38c13 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2264,8 +2264,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - // The base destination address is computed later, as `temp2` is // used for intermediate computations. diff --git a/runtime/lock_word.h b/runtime/lock_word.h index edc64f35a1..2f2565b9d0 100644 --- a/runtime/lock_word.h +++ b/runtime/lock_word.h @@ -57,8 +57,7 @@ class Monitor; * |10|9|87654321098765432109876543210| * |11|0| ForwardingAddress | * - * The `r` bit stores the read barrier state. - * The `m` bit stores the mark state. + * The rb bits store the read barrier state. */ class LockWord { public: diff --git a/runtime/type_lookup_table.h b/runtime/type_lookup_table.h index fd68deb71c..3f6f76f510 100644 --- a/runtime/type_lookup_table.h +++ b/runtime/type_lookup_table.h @@ -148,7 +148,7 @@ class TypeLookupTable { return mask_; } - // Attempt to set an entry on its hash's slot. If there is already something there, return false. + // Attempt to set an entry on it's hash' slot. If there is alrady something there, return false. // Otherwise return true. bool SetOnInitialPos(const Entry& entry, uint32_t hash); |