ARM: Use rMR for Baker RB introspection marking.
The marking register (r8 on ARM) is known to be 1 when
entering the introspection marking entrypoint, so we can
clobber it, use it as a temporaray register (instead or r4)
in the runtime entrypoint, and reload the 1 before
returning. The immediate benefits are minor, see below,
but this shall allow further improvements, for example we
could try to change rMR to r4 which would reduce code size
of every marking register check by 2 bytes.
ARM boot image (boot*.oat) size in aosp_taimen-userdebug:
- before: 17861724
- after: 17858088 (-3636)
Test: Pixel 2 XL boots.
Test: m test-art-host-gtest
Test: testrunner.py --target --optimizing --32
Test: Repeat the above tests with heap poisoning enabled.
Bug: 36141117
Change-Id: I0f625dec3a6b3ee1786f7e5f4377be42b9bc37d3
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7350b14..58ce9aa 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -107,16 +107,6 @@
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
-static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
- HInstruction* instruction) {
- DCHECK(temps->IsAvailable(ip));
- temps->Exclude(ip);
- DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
- DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
- DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
- instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
-}
-
static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
__ bind(patch_label);
@@ -5973,8 +5963,6 @@
if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
} else {
locations->AddTemp(Location::RequiresRegister());
}
@@ -6087,11 +6075,11 @@
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
+ Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, maybe_temp, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -6390,8 +6378,6 @@
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
!Runtime::Current()->UseJitCompilation() &&
instruction->GetIndex()->IsConstant()) {
@@ -6404,16 +6390,10 @@
if (offset >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
- !Runtime::Current()->UseJitCompilation() &&
- !instruction->GetIndex()->IsConstant()) {
- // We need a non-scratch temporary for the array data pointer.
- locations->AddTemp(Location::RequiresRegister());
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
} else {
+ // If using introspection, we need a non-scratch temporary for the array data pointer.
+ // Otherwise, we need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
locations->AddTemp(Location::RequiresRegister());
}
} else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -6526,20 +6506,22 @@
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
out_loc,
obj,
data_offset,
- locations->GetTemp(0),
+ maybe_temp,
/* needs_null_check */ false);
} else {
+ Location temp = locations->GetTemp(0);
codegen_->GenerateArrayLoadWithBakerReadBarrier(
instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
}
@@ -7447,13 +7429,6 @@
// For non-Baker read barrier we have a temp-clobbering call.
}
}
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- if (load_kind == HLoadClass::LoadKind::kBssEntry ||
- (load_kind == HLoadClass::LoadKind::kReferrersClass &&
- !Runtime::Current()->UseJitCompilation())) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -7687,9 +7662,6 @@
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7866,9 +7838,6 @@
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
- }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -8829,7 +8798,7 @@
// return_address:
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -8897,16 +8866,6 @@
MaybeGenerateMarkingRegisterCheck(/* code */ 18);
}
-void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
- if (!Runtime::Current()->UseJitCompilation()) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
-}
-
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8944,7 +8903,6 @@
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
- DCHECK(!base.Is(kBakerCcEntrypointRegister));
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
@@ -8954,7 +8912,7 @@
DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -9037,10 +8995,9 @@
vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
- DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -9927,16 +9884,16 @@
}
// Load the read barrier introspection entrypoint in register `entrypoint`
-static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
- vixl32::Register entrypoint) {
+static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
// The register where the read barrier introspection entrypoint is loaded
- // is fixed: `kBakerCcEntrypointRegister` (R4).
- DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
+ // is the marking register. We clobber it here and the entrypoint restores it to 1.
+ vixl32::Register entrypoint = mr;
// entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(ip.GetCode(), 12u);
const int32_t entry_point_offset =
Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
__ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+ return entrypoint;
}
void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
@@ -9975,8 +9932,7 @@
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
raw_ldr_offset;
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
if (width == BakerReadBarrierWidth::kWide) {
MemOperand ldr_half_address(lr, ldr_offset + 2);
__ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
@@ -10016,8 +9972,7 @@
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
__ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
// a switch case target based on the index register.
__ Mov(ip, base_reg); // Move the base register to ip0.
@@ -10050,8 +10005,7 @@
" the highest bits and the 'forwarding address' state to have all bits set");
__ Cmp(ip, Operand(0xc0000000));
__ B(hs, &forwarding_address);
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)