diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 193 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 216 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 205 |
3 files changed, 322 insertions, 292 deletions
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 987a2af64a..98b80f5d3c 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1967,101 +1967,108 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); - - // Compute the base source address in `temp1`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); - // Compute the end source address in `temp3`. - GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); - // The base destination address is computed later, as `temp2` is - // used for intermediate computations. + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. + } else { + Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - - // /* int32_t */ monitor = src->monitor_ - __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `temp2`. - __ add(src, src, ShifterOperand(temp2, LSR, 32)); - - // Slow path used to copy array when `src` is gray. - // Note that the base destination address is computed in `temp2` - // by the slow path code. - SlowPathCode* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); - // Carry flag is the last bit shifted out by LSRS. - __ b(read_barrier_slow_path->GetEntryLabel(), CS); - - // Fast-path copy. - // Compute the base destination address in `temp2`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done); + } - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); - } else { - // Non read barrier code. - // Compute the base destination address in `temp2`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - Label loop, done; - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // /* int32_t */ monitor = src->monitor_ + __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ add(src, src, ShifterOperand(temp2, LSR, 32)); + + // Compute the base source address in `temp1`. + // Note that `temp1` (the base source address) is computed from + // `src` (and `src_pos`) here, and thus honors the artificial + // dependency of `src` on `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // Slow path used to copy array when `src` is gray. + // Note that the base destination address is computed in `temp2` + // by the slow path code. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ b(read_barrier_slow_path->GetEntryLabel(), CS); + + // Fast-path copy. + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop; + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + } else { + // Non read barrier code. + // Compute the base source address in `temp1`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop; + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + } __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index b012608e75..423fd3c6ae 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2716,111 +2716,127 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); } - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - - Register src_curr_addr = temp1.X(); - Register dst_curr_addr = temp2.X(); - Register src_stop_addr = temp3.X(); - - // Compute base source address, base destination address, and end - // source address in `src_curr_addr`, `dst_curr_addr` and - // `src_stop_addr` respectively. - GenSystemArrayCopyAddresses(masm, - type, - src, - src_pos, - dest, - dest_pos, - length, - src_curr_addr, - dst_curr_addr, - src_stop_addr); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - vixl::aarch64::Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - - // Make sure `tmp` is not IP0, as it is clobbered by - // ReadBarrierMarkRegX entry points in - // ReadBarrierSystemArrayCopySlowPathARM64. - temps.Exclude(ip0); - Register tmp = temps.AcquireW(); - DCHECK_NE(LocationFrom(tmp).reg(), IP0); - - // /* int32_t */ monitor = src->monitor_ - __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `tmp`. - __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); - - // Slow path used to copy array when `src` is gray. - SlowPathCodeARM64* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); - - // Fast-path copy. - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); - __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&loop, ne); - - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. } else { - // Non read barrier code. - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop, done; - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - { + Register src_curr_addr = temp1.X(); + Register dst_curr_addr = temp2.X(); + Register src_stop_addr = temp3.X(); + vixl::aarch64::Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); + + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ Cbz(WRegisterFrom(length), &done); + } + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // Make sure `tmp` is not IP0, as it is clobbered by + // ReadBarrierMarkRegX entry points in + // ReadBarrierSystemArrayCopySlowPathARM64. + temps.Exclude(ip0); Register tmp = temps.AcquireW(); + DCHECK_NE(LocationFrom(tmp).reg(), IP0); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `tmp`. + __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); + + // Compute base source address, base destination address, and end + // source address for System.arraycopy* intrinsics in `src_base`, + // `dst_base` and `src_end` respectively. + // Note that `src_curr_addr` is computed from from `src` (and + // `src_pos`) here, and thus honors the artificial dependency + // of `src` on `tmp`. + GenSystemArrayCopyAddresses(masm, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARM64* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop; + __ Bind(&loop); __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + } else { + // Non read barrier code. + // Compute base source address, base destination address, and end + // source address for System.arraycopy* intrinsics in `src_base`, + // `dst_base` and `src_end` respectively. + GenSystemArrayCopyAddresses(masm, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop; + __ Bind(&loop); + { + Register tmp = temps.AcquireW(); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + } + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); } - __ B(&loop); __ Bind(&done); } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 0167891b34..19ff49c6ce 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -2280,109 +2280,116 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); - - // Compute the base source address in `temp1`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); - // Compute the end source address in `temp3`. - GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); - // The base destination address is computed later, as `temp2` is - // used for intermediate computations. + if (length.IsConstant() && Int32ConstantFrom(length) == 0) { + // Null constant length: not need to emit the loop code at all. + } else { + vixl32::Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - vixl32::Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ Cmp(temp1, temp3); - __ B(eq, &done, /* far_target */ false); - - // /* int32_t */ monitor = src->monitor_ - __ Ldr(temp2, MemOperand(src, monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `temp2`. - __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); - - // Slow path used to copy array when `src` is gray. - // Note that the base destination address is computed in `temp2` - // by the slow path code. - SlowPathCodeARMVIXL* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); - // Carry flag is the last bit shifted out by LSRS. - __ B(cs, read_barrier_slow_path->GetEntryLabel()); - - // Fast-path copy. - // Compute the base destination address in `temp2`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - { - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp_reg = temps.Acquire(); - __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); - __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); } - __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); - } else { - // Non read barrier code. - // Compute the base destination address in `temp2`. - GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl32::Label loop, done; - __ Cmp(temp1, temp3); - __ B(eq, &done, /* far_target */ false); - __ Bind(&loop); - { - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp_reg = temps.Acquire(); - __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); - __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(temp2, MemOperand(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); + + // Compute the base source address in `temp1`. + // Note that `temp1` (the base source address) is computed from + // `src` (and `src_pos`) here, and thus honors the artificial + // dependency of `src` on `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // Slow path used to copy array when `src` is gray. + // Note that the base destination address is computed in `temp2` + // by the slow path code. + SlowPathCodeARMVIXL* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ B(cs, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop; + __ Bind(&loop); + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + __ Cmp(temp1, temp3); + __ B(ne, &loop, /* far_target */ false); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + } else { + // Non read barrier code. + // Compute the base source address in `temp1`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop; + __ Bind(&loop); + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + __ Cmp(temp1, temp3); + __ B(ne, &loop, /* far_target */ false); } - __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); __ Bind(&done); } |