ARM: Link-time generated thunks for Baker CC read barrier.
Remaining work for follow-up CLs:
- use implicit null check in field thunk,
- use 16-bit LDRs for fields and GC roots.
Test: m test-art-target-gtest
Test: testrunner.py --target on Nexus 6P.
Test: testrunner.py --target on Nexus 6P with heap poisoning enabled.
Test: Repeat the above tests with ART_USE_OLD_ARM_BACKEND=true.
Bug: 29516974
Bug: 30126666
Bug: 36141117
Change-Id: Iad5addab72d790a9d61879f61f2e75b246bcdf5a
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index ebd578c..3c6e277 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -25,6 +26,7 @@
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -60,10 +62,41 @@
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const Register kBakerCcEntrypointRegister = R4;
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
+static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) {
+ DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK_EQ(kBakerCcEntrypointRegister,
+ instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>());
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) {
+ DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit());
+ __ BindTrackedLabel(bne_label);
+ Label placeholder_label;
+ __ b(&placeholder_label, NE); // Placeholder, patched at link-time.
+ __ Bind(&placeholder_label);
+}
+
static constexpr int kRegListThreshold = 4;
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
@@ -1962,6 +1995,7 @@
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -5281,7 +5315,18 @@
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5747,11 +5792,35 @@
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5863,8 +5932,20 @@
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
@@ -6701,6 +6782,13 @@
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6880,6 +6968,9 @@
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7050,6 +7141,9 @@
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7923,48 +8017,93 @@
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = Location::RegisterLocation(LR);
- SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg);
+ Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ static_assert(
+ BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = Location::RegisterLocation(LR);
+ SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -7982,6 +8121,16 @@
}
}
+void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister));
+ }
+ }
+}
+
void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
Register obj,
@@ -7991,6 +8140,69 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = temp.AsRegister<Register>();
+ DCHECK_NE(base, kBakerCcEntrypointRegister);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ EmitPlaceholderBne(this, bne_label);
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
+ " 2 32-bit instructions (8B) for heap poisoning.");
+ Register ref_reg = ref.AsRegister<Register>();
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ __ LoadFromOffset(kLoadWord, ref_reg, base, offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8011,9 +8223,67 @@
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ Register index_reg = index.AsRegister<Register>();
+ Register ref_reg = ref.AsRegister<Register>();
+ Register data_reg = temp.AsRegister<Register>();
+ DCHECK_NE(data_reg, kBakerCcEntrypointRegister);
+
+ CheckLastTempIsBakerCcEntrypointRegister(instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg);
+ Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(IP, 12);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP);
+ __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset);
+ __ AddConstant(data_reg, obj, data_offset);
+
+ Label return_address;
+ __ AdrCode(LR, &return_address);
+ __ CmpConstant(kBakerCcEntrypointRegister, 0);
+ ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler()));
+ EmitPlaceholderBne(this, bne_label);
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
+ " 2 32-bit instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8379,6 +8649,11 @@
return &patches->back();
}
+Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index) {
return boot_image_string_patches_.GetOrCreate(
@@ -8445,7 +8720,8 @@
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -8479,6 +8755,10 @@
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 86f2f21..6f007e1 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -488,6 +488,11 @@
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index);
Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -503,6 +508,10 @@
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -616,6 +625,13 @@
Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ Label label;
+ uint32_t custom_data;
+ };
+
Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
@@ -648,6 +664,8 @@
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 8744cc8..86f4cd2 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm_vixl.h"
+#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "art_method.h"
#include "code_generator_utils.h"
@@ -24,6 +25,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics_arm_vixl.h"
+#include "linker/arm/relative_patcher_thumb2.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -77,6 +79,20 @@
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
+// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
+// For the Baker read barrier implementation using link-generated thunks we need to split
+// the offset explicitly.
+constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
+
+// Flags controlling the use of link-time generated thunks for Baker read barriers.
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
+constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
+
+// The reserved entrypoint register for link-time generated thunks.
+const vixl32::Register kBakerCcEntrypointRegister = r4;
+
#ifdef __
#error "ARM Codegen VIXL macro-assembler macro already defined."
#endif
@@ -88,6 +104,56 @@
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
+static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
+ HInstruction* instruction) {
+ DCHECK(temps->IsAvailable(ip));
+ temps->Exclude(ip);
+ DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
+ DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
+ linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
+ DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
+ DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
+ instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
+}
+
+static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
+ ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
+ __ bind(patch_label);
+ vixl32::Label placeholder_label;
+ __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
+ __ bind(&placeholder_label);
+}
+
+class EmitAdrCode {
+ public:
+ EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
+ : assembler_(assembler), rd_(rd), label_(label) {
+ ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+ adr_location_ = assembler->GetCursorOffset();
+ assembler->adr(EncodingSize(Wide), rd, label);
+ }
+
+ ~EmitAdrCode() {
+ DCHECK(label_->IsBound());
+ // The ADR emitted by the assembler does not set the Thumb mode bit we need.
+ // TODO: Maybe extend VIXL to allow ADR for return address?
+ uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_);
+ // Expecting ADR encoding T3 with `(offset & 1) == 0`.
+ DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26.
+ DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23.
+ DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15.
+ DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`.
+ // Add the Thumb mode bit.
+ raw_adr[2] |= 0x01u;
+ }
+
+ private:
+ ArmVIXLMacroAssembler* const assembler_;
+ vixl32::Register rd_;
+ vixl32::Label* const label_;
+ int32_t adr_location_;
+};
+
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
// for each live D registers they treat two corresponding S registers as live ones.
//
@@ -2012,6 +2078,7 @@
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -5289,7 +5356,18 @@
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
// We need a temporary register for the read barrier marking slow
// path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // loads we need a temporary only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
}
@@ -5756,11 +5834,35 @@
Location::RequiresRegister(),
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
- // Also need for String compression feature.
- if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
- || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation() &&
+ instruction->GetIndex()->IsConstant()) {
+ // Array loads with constant index are treated as field loads.
+ // If link-time thunks for the Baker read barrier are enabled, for AOT
+ // constant index loads we need a temporary only if the offset is too big.
+ uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
+ uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
+ offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot);
+ if (offset >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation() &&
+ !instruction->GetIndex()->IsConstant()) {
+ // We need a non-scratch temporary for the array data pointer.
+ locations->AddTemp(Location::RequiresRegister());
+ // And we always need the reserved entrypoint register.
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+ // Also need a temporary for String compression feature.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5871,8 +5973,20 @@
Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
- codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true);
+ DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+ if (index.IsConstant()) {
+ // Array load with a constant index can be treated as a field load.
+ data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type);
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ locations->GetTemp(0),
+ /* needs_null_check */ false);
+ } else {
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(
+ instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ }
} else {
vixl32::Register out = OutputRegister(instruction);
if (index.IsConstant()) {
@@ -6762,6 +6876,13 @@
// For non-Baker read barrier we have a temp-clobbering call.
}
}
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+ (load_kind == HLoadClass::LoadKind::kReferrersClass &&
+ !Runtime::Current()->UseJitCompilation())) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -6938,6 +7059,9 @@
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
+ if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7100,6 +7224,9 @@
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
+ }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7998,48 +8125,96 @@
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- //
- // Note that we do not actually check the value of
- // `GetIsGcMarking()` to decide whether to mark the loaded GC
- // root or not. Instead, we load into `temp` the read barrier
- // mark entry point corresponding to register `root`. If `temp`
- // is null, it means that `GetIsGcMarking()` is false, and vice
- // versa.
- //
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
- // }
+ if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded GC root or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk
+ // checks the reference and jumps to the entrypoint if needed.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) {
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
- Location temp = LocationFrom(lr);
- SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
- instruction, root, /* entrypoint */ temp);
- codegen_->AddSlowPath(slow_path);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+ vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
- // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- const int32_t entry_point_offset =
- CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
- // Loading the entrypoint does not require a load acquire since it is only changed when
- // threads are suspended or running a checkpoint.
- GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
+ vixl::EmissionCheckScope guard(GetVIXLAssembler(),
+ 4 * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ static_assert(
+ BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 32-bit instructions (8B) before the return address label.");
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset));
+ EmitPlaceholderBne(codegen_, bne_label);
+ __ Bind(&return_address);
+ } else {
+ // Note that we do not actually check the value of
+ // `GetIsGcMarking()` to decide whether to mark the loaded GC
+ // root or not. Instead, we load into `temp` the read barrier
+ // mark entry point corresponding to register `root`. If `temp`
+ // is null, it means that `GetIsGcMarking()` is false, and vice
+ // versa.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking()
+ // // Slow path.
+ // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call.
+ // }
- // The entrypoint is null when the GC is not marking, this prevents one load compared to
- // checking GetIsGcMarking.
- __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`.
+ Location temp = LocationFrom(lr);
+ SlowPathCodeARMVIXL* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(
+ instruction, root, /* entrypoint */ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset);
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ }
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -8057,6 +8232,16 @@
}
}
+void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
+ if (!Runtime::Current()->UseJitCompilation()) {
+ locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
+ }
+ }
+}
+
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8066,6 +8251,75 @@
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
+ if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
+
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ DCHECK(!base.Is(kBakerCcEntrypointRegister));
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
+ base.GetCode(),
+ obj.GetCode());
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 32-bit instruction (4B) before the return address label; "
+ " 2 32-bit instructions (8B) for heap poisoning.");
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref = *(obj + offset)
Location no_index = Location::NoLocation();
ScaleFactor no_scale_factor = TIMES_1;
@@ -8086,9 +8340,73 @@
static_assert(
sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ ScaleFactor scale_factor = TIMES_4;
+
+ if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
+ !Runtime::Current()->UseJitCompilation()) {
+ // Note that we do not actually check the value of `GetIsGcMarking()`
+ // to decide whether to mark the loaded reference or not. Instead, we
+ // load into `temp` (actually kBakerCcEntrypointRegister) the read
+ // barrier mark introspection entrypoint. If `temp` is null, it means
+ // that `GetIsGcMarking()` is false, and vice versa.
+ //
+ // We use link-time generated thunks for the slow path. That thunk checks
+ // the holder and jumps to the entrypoint if needed. If the holder is not
+ // gray, it creates a fake dependency and returns to the LDR instruction.
+ //
+ // temp = Thread::Current()->pReadBarrierMarkIntrospection
+ // lr = &gray_return_address;
+ // if (temp != nullptr) {
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // GcRoot<mirror::Object> reference = data[index];
+ // gray_return_address:
+
+ DCHECK(index.IsValid());
+ vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt);
+ vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer.
+ DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ uint32_t custom_data =
+ linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
+ vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+
+ // entrypoint_reg =
+ // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset));
+ __ Add(data_reg, obj, Operand(data_offset));
+
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(kBakerCcEntrypointRegister, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 32-bit instruction (4B) before the return address label; "
+ " 2 32-bit instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ __ Bind(&return_address);
+ return;
+ }
+
// /* HeapReference<Object> */ ref =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
- ScaleFactor scale_factor = TIMES_4;
GenerateReferenceLoadWithBakerReadBarrier(
instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
}
@@ -8497,6 +8815,11 @@
return &patches->back();
}
+vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ return &baker_read_barrier_patches_.back().label;
+}
+
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral(
const DexFile& dex_file,
dex::StringIndex string_index) {
@@ -8578,7 +8901,8 @@
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
boot_image_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
- /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
+ /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+ baker_read_barrier_patches_.size();
linker_patches->reserve(size);
EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
linker_patches);
@@ -8612,6 +8936,10 @@
target_type.dex_file,
target_type.type_index.index_));
}
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
+ info.custom_data));
+ }
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 1e9669d..9d56cc3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -572,6 +572,11 @@
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
uint32_t element_offset);
+
+ // Add a new baker read barrier patch and return the label to be bound
+ // before the BNE instruction.
+ vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+
VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index);
VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file,
@@ -589,6 +594,10 @@
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Maybe add the reserved entrypoint register as a temporary for field load. This temp
+ // is added only for AOT compilation if link-time generated thunks for fields are enabled.
+ void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -713,6 +722,13 @@
VIXLUInt32Literal*,
TypeReferenceValueComparator>;
+ struct BakerReadBarrierPatchInfo {
+ explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { }
+
+ vixl::aarch32::Label label;
+ uint32_t custom_data;
+ };
+
VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map);
VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method,
MethodToLiteralMap* map);
@@ -750,6 +766,8 @@
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+ // Baker read barrier patch info.
+ ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
// Patches for string literals in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 750f9cc..c784171 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1648,6 +1648,8 @@
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fd8a37a..77d870b 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -2026,6 +2026,8 @@
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
+ arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}