diff options
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 145 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 29 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 137 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 26 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 146 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 29 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 9 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 9 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 9 |
9 files changed, 370 insertions, 169 deletions
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7cc577580..29f238b552 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -819,7 +819,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -954,6 +954,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); @@ -8016,9 +8028,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8029,6 +8039,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + Register temp_reg = temp.AsRegister<Register>(); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8041,52 +8118,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } Register temp_reg = temp.AsRegister<Register>(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + + // field_offset` when the GC is marking. The entrypoint will already + // be loaded in `temp3`. Location temp3 = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only - // supports address of the form `obj + field_offset`, where `obj` - // is a register and `field_offset` is a register pair (of which - // only the lower half is used). Thus `offset` and `scale_factor` - // above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8098,8 +8153,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 86f2f21df7..54db137e4e 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -526,11 +526,6 @@ class CodeGeneratorARM : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -538,9 +533,27 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index eee832a732..2d95a2ed87 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -851,7 +851,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -1002,6 +1002,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); @@ -6230,8 +6242,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field) { + bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -6268,41 +6279,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // entrypoint will already be loaded in `temp2`. Register temp2 = lr; Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path; - if (always_update_field) { - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register. Thus - // `offset` and `scale_factor` above are expected to be null in - // this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); AddSlowPath(slow_path); // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -6314,12 +6302,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp2, slow_path->GetEntryLabel()); - // Fast path: just load the reference. + // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); } +void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // } + + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp2`. + Register temp2 = lr; + Location temp2_loc = LocationFrom(temp2); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ 0u /* "times 1" */, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp2, MemOperand(tr, entry_point_offset)); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Cbnz(temp2, slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). + __ Bind(slow_path->GetExitLabel()); +} + void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, Location ref, Register obj, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 3ded3e4412..f16f625b6c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -635,9 +635,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -646,8 +643,27 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field = false); + bool use_load_acquire); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + Location field_offset, + vixl::aarch64::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b6678b03ef..e943db733a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -845,7 +845,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -987,6 +987,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); // /* int32_t */ monitor = obj->monitor_ @@ -8091,9 +8103,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - vixl32::Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8104,6 +8114,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + vixl32::Register temp_reg = RegisterFrom(temp); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl32::Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8113,55 +8190,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } vixl32::Register temp_reg = RegisterFrom(temp); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp3`. Location temp3 = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register pair (of - // which only the lower half is used). Thus `offset` and - // `scale_factor` above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8173,8 +8227,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 1e9669dc38..28fcbb8ca7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -612,11 +612,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch32::Register obj, @@ -624,9 +619,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - vixl::aarch32::Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl::aarch32::Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 750f9cc213..e067b01f45 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4d360158a2..65a82229e9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co Register temp = WRegisterFrom(locations->GetTemp(0)); // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, + /* field_offset */ offset_loc, temp, /* needs_null_check */ false, - /* use_load_acquire */ false, - /* always_update_field */ true); + /* use_load_acquire */ false); } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fd8a37ae05..01a7c9de04 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } |