ARM64: Link-time generated thunks for ArrayGet Baker CC read barrier.
Test: Added a test to relative_patcher_arm64
Test: m test-art-target-gtest on Nexus 6P.
Test: Nexus 6P boots.
Test: testrunner.py --target on Nexus 6P.
Test: Nexus 6P boots with heap poisoning.
Test: testrunner.py --target on Nexus 6P with heap poisoning.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Id0f23089c55cbb53b84305c11bb4b03718561ade
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4629c54..eee832a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -91,6 +91,7 @@
// Flags controlling the use of link-time generated thunks for Baker read barriers.
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
// Some instructions have special requirements for a temporary, for example
@@ -2759,6 +2760,7 @@
// Object ArrayGet with Baker's read barrier case.
// Note that a potential implicit null check is handled in the
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type);
@@ -2769,12 +2771,12 @@
obj.W(),
offset,
maybe_temp,
- /* needs_null_check */ true,
+ /* needs_null_check */ false,
/* use_load_acquire */ false);
} else {
Register temp = WRegisterFrom(locations->GetTemp(0));
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true);
+ instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
}
} else {
// General case.
@@ -5928,9 +5930,9 @@
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
// to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk
// checks the reference and jumps to the entrypoint if needed.
@@ -6054,24 +6056,24 @@
!use_load_acquire &&
!Runtime::Current()->UseJitCompilation()) {
// Note that we do not actually check the value of `GetIsGcMarking()`
- // to decide whether to mark the loaded GC root or not. Instead, we
- // load into `temp` the read barrier mark introspection entrypoint.
- // If `temp` is null, it means that `GetIsGcMarking()` is false, and
- // vice versa.
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
//
// We use link-time generated thunks for the slow path. That thunk checks
// the holder and jumps to the entrypoint if needed. If the holder is not
// gray, it creates a fake dependency and returns to the LDR instruction.
//
// temp = Thread::Current()->pReadBarrierMarkIntrospection
- // lr = &return_address;
+ // lr = &gray_return_address;
// if (temp != nullptr) {
// goto field_thunk<holder_reg, base_reg>(lr)
// }
// not_gray_return_address:
// // Original reference load. If the offset is too large to fit
// // into LDR, we use an adjusted base register here.
- // GcRoot<mirror::Object> root = *(obj+offset);
+ // GcRoot<mirror::Object> reference = *(obj+offset);
// gray_return_address:
DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
@@ -6141,16 +6143,74 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually IP1) the read barrier mark introspection
+ // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is
+ // false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data =
+ linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
+ vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+
+ // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(ip1, MemOperand(tr, entry_point_offset));
+ __ Add(temp.X(), obj.X(), Operand(data_offset));
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// Array cells are never volatile variables, therefore array loads
// never use Load-Acquire instructions on ARM64.
const bool use_load_acquire = false;
- static_assert(
- sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot);
GenerateReferenceLoadWithBakerReadBarrier(instruction,
ref,
obj,
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index c2b1374..c4a5226 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -247,6 +247,7 @@
access->GetType() == Primitive::kPrimNot) {
// For object arrays, the read barrier instrumentation requires
// the original array pointer.
+ // TODO: This can be relaxed for Baker CC.
return false;
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 6be237e..f9eb343 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -5376,10 +5376,16 @@
}
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
// TODO: We can be smarter here.
- // Currently, the array access is always preceded by an ArrayLength or a NullCheck
- // which generates the implicit null check. There are cases when these can be removed
- // to produce better code. If we ever add optimizations to do so we should allow an
- // implicit check here (as long as the address falls in the first page).
+ // Currently, unless the array is the result of NewArray, the array access is always
+ // preceded by some form of null NullCheck necessary for the bounds check, usually
+ // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for
+ // dynamic BCE. There are cases when these could be removed to produce better code.
+ // If we ever add optimizations to do so we should allow an implicit check here
+ // (as long as the address falls in the first page).
+ //
+ // As an example of such fancy optimization, we could eliminate BoundsCheck for
+ // a = cond ? new int[1] : null;
+ // a[0]; // The Phi does not need bounds check for either input.
return false;
}