summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc84
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h9
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc2
-rw-r--r--dex2oat/linker/arm/relative_patcher_thumb2_test.cc57
-rw-r--r--runtime/arch/arm/asm_support_arm.h4
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S195
-rw-r--r--runtime/oat.h4
7 files changed, 161 insertions, 194 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 7350b146f9..58ce9aa9f0 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -107,16 +107,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
-static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
- HInstruction* instruction) {
- DCHECK(temps->IsAvailable(ip));
- temps->Exclude(ip);
- DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
- DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
- DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
- instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
-}
-
static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
__ bind(patch_label);
@@ -5973,8 +5963,6 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
} else {
locations->AddTemp(Location::RequiresRegister());
}
@@ -6087,11 +6075,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
+ Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, maybe_temp, /* needs_null_check */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -6390,8 +6378,6 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
!Runtime::Current()->UseJitCompilation() &&
instruction->GetIndex()->IsConstant()) {
@@ -6404,16 +6390,10 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
if (offset >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
- !Runtime::Current()->UseJitCompilation() &&
- !instruction->GetIndex()->IsConstant()) {
- // We need a non-scratch temporary for the array data pointer.
- locations->AddTemp(Location::RequiresRegister());
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
} else {
+ // If using introspection, we need a non-scratch temporary for the array data pointer.
+ // Otherwise, we need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
locations->AddTemp(Location::RequiresRegister());
}
} else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -6526,20 +6506,22 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
out_loc,
obj,
data_offset,
- locations->GetTemp(0),
+ maybe_temp,
/* needs_null_check */ false);
} else {
+ Location temp = locations->GetTemp(0);
codegen_->GenerateArrayLoadWithBakerReadBarrier(
instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
}
@@ -7447,13 +7429,6 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- if (load_kind == HLoadClass::LoadKind::kBssEntry ||
- (load_kind == HLoadClass::LoadKind::kReferrersClass &&
- !Runtime::Current()->UseJitCompilation())) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -7687,9 +7662,6 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
// TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
// that the the kPrimNot result register is the same as the first argument register.
locations->SetCustomSlowPathCallerSaves(caller_saves);
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7866,9 +7838,6 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
- }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -8829,7 +8798,7 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// return_address:
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -8897,16 +8866,6 @@ void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
MaybeGenerateMarkingRegisterCheck(/* code */ 18);
}
-void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
- if (!Runtime::Current()->UseJitCompilation()) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
-}
-
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
@@ -8944,7 +8903,6 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
vixl32::Register base = obj;
if (offset >= kReferenceLoadMinFarOffset) {
base = RegisterFrom(temp);
- DCHECK(!base.Is(kBakerCcEntrypointRegister));
static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
__ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
offset &= (kReferenceLoadMinFarOffset - 1u);
@@ -8954,7 +8912,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
DCHECK(!narrow);
}
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
uint32_t custom_data = EncodeBakerReadBarrierFieldData(base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -9037,10 +8995,9 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
- DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
+ temps.Exclude(ip);
uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
@@ -9927,16 +9884,16 @@ static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
}
// Load the read barrier introspection entrypoint in register `entrypoint`
-static void LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler,
- vixl32::Register entrypoint) {
+static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
// The register where the read barrier introspection entrypoint is loaded
- // is fixed: `kBakerCcEntrypointRegister` (R4).
- DCHECK(entrypoint.Is(kBakerCcEntrypointRegister));
+ // is the marking register. We clobber it here and the entrypoint restores it to 1.
+ vixl32::Register entrypoint = mr;
// entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
DCHECK_EQ(ip.GetCode(), 12u);
const int32_t entry_point_offset =
Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
__ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+ return entrypoint;
}
void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
@@ -9975,8 +9932,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb
__ Bind(&slow_path);
const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
raw_ldr_offset;
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
if (width == BakerReadBarrierWidth::kWide) {
MemOperand ldr_half_address(lr, ldr_offset + 2);
__ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
@@ -10016,8 +9972,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb
MemOperand ldr_address(lr, ldr_offset + 2);
__ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
// i.e. Rm+32 because the scale in imm2 is 2.
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
__ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
// a switch case target based on the index register.
__ Mov(ip, base_reg); // Move the base register to ip0.
@@ -10050,8 +10005,7 @@ void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assemb
" the highest bits and the 'forwarding address' state to have all bits set");
__ Cmp(ip, Operand(0xc0000000));
__ B(hs, &forwarding_address);
- vixl32::Register ep_reg(kBakerCcEntrypointRegister);
- LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
// Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
// to art_quick_read_barrier_mark_introspection_gc_roots.
int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide)
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 6b9919ab15..d5b739bd7c 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -113,9 +113,6 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
arraysize(kRuntimeParameterFpuRegistersVIXL);
-// The reserved entrypoint register for link-time generated thunks.
-const vixl::aarch32::Register kBakerCcEntrypointRegister = vixl32::r4;
-
class LoadClassSlowPathARMVIXL;
class CodeGeneratorARMVIXL;
@@ -611,10 +608,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
- // Maybe add the reserved entrypoint register as a temporary for field load. This temp
- // is added only for AOT compilation if link-time generated thunks for fields are enabled.
- void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
-
// Generate a GC root reference load:
//
// root <- *(obj + offset)
@@ -816,7 +809,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
kBitsForBakerReadBarrierWidth>;
static void CheckValidReg(uint32_t reg) {
- DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != kBakerCcEntrypointRegister.GetCode()) << reg;
+ DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != mr.GetCode()) << reg;
}
static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 29aecbc097..5287b4b2fa 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1802,8 +1802,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
- arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
diff --git a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
index e7b11bd16b..3fe97e146c 100644
--- a/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
+++ b/dex2oat/linker/arm/relative_patcher_thumb2_test.cc
@@ -625,18 +625,23 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) {
ASSERT_LT(GetMethodOffset(1u), 0xfcu);
}
+const uint32_t kBakerValidRegs[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 9, 10, 11, // r8 (rMR), IP, SP, LR and PC are reserved.
+};
+
+const uint32_t kBakerValidRegsNarrow[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+};
+
void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) {
- uint32_t valid_regs[] = {
- 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
- 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
- };
DCHECK_ALIGNED(offset, 4u);
DCHECK_LT(offset, 4 * KB);
constexpr size_t kMethodCodeSize = 8u;
constexpr size_t kLiteralOffset = 0u;
uint32_t method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
- for (uint32_t holder_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
+ for (uint32_t holder_reg : kBakerValidRegs) {
uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
@@ -655,8 +660,8 @@ void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref
// All thunks are at the end.
uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
- for (uint32_t holder_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
+ for (uint32_t holder_reg : kBakerValidRegs) {
++method_idx;
uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12);
@@ -725,20 +730,16 @@ void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref
}
void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) {
- uint32_t valid_regs[] = {
- 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
- 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
- };
DCHECK_ALIGNED(offset, 4u);
DCHECK_LT(offset, 32u);
constexpr size_t kMethodCodeSize = 6u;
constexpr size_t kLiteralOffset = 0u;
uint32_t method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
if (base_reg >= 8u) {
continue;
}
- for (uint32_t holder_reg : valid_regs) {
+ for (uint32_t holder_reg : kBakerValidRegs) {
uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
@@ -757,11 +758,11 @@ void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t r
// All thunks are at the end.
uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
if (base_reg >= 8u) {
continue;
}
- for (uint32_t holder_reg : valid_regs) {
+ for (uint32_t holder_reg : kBakerValidRegs) {
++method_idx;
uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg;
@@ -1021,10 +1022,6 @@ TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast
}
TEST_F(Thumb2RelativePatcherTest, BakerArray) {
- uint32_t valid_regs[] = {
- 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
- 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
- };
auto ldr = [](uint32_t base_reg) {
uint32_t index_reg = (base_reg == 0u) ? 1u : 0u;
uint32_t ref_reg = (base_reg == 2) ? 3u : 2u;
@@ -1033,7 +1030,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) {
constexpr size_t kMethodCodeSize = 8u;
constexpr size_t kLiteralOffset = 0u;
uint32_t method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
++method_idx;
const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)});
ASSERT_EQ(kMethodCodeSize, raw_code.size());
@@ -1049,7 +1046,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) {
// All thunks are at the end.
uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
method_idx = 0u;
- for (uint32_t base_reg : valid_regs) {
+ for (uint32_t base_reg : kBakerValidRegs) {
++method_idx;
uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)});
@@ -1106,14 +1103,10 @@ TEST_F(Thumb2RelativePatcherTest, BakerArray) {
}
TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
- uint32_t valid_regs[] = {
- 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
- 8, 9, 10, 11, // IP, SP, LR and PC are reserved.
- };
constexpr size_t kMethodCodeSize = 8u;
constexpr size_t kLiteralOffset = 4u;
uint32_t method_idx = 0u;
- for (uint32_t root_reg : valid_regs) {
+ for (uint32_t root_reg : kBakerValidRegs) {
++method_idx;
uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
@@ -1130,7 +1123,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
// All thunks are at the end.
uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
method_idx = 0u;
- for (uint32_t root_reg : valid_regs) {
+ for (uint32_t root_reg : kBakerValidRegs) {
++method_idx;
uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12);
@@ -1165,14 +1158,10 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) {
}
TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
- uint32_t valid_regs[] = {
- 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address.
- // Not appplicable to high registers.
- };
constexpr size_t kMethodCodeSize = 6u;
constexpr size_t kLiteralOffset = 2u;
uint32_t method_idx = 0u;
- for (uint32_t root_reg : valid_regs) {
+ for (uint32_t root_reg : kBakerValidRegsNarrow) {
++method_idx;
uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0});
@@ -1189,7 +1178,7 @@ TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) {
// All thunks are at the end.
uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment);
method_idx = 0u;
- for (uint32_t root_reg : valid_regs) {
+ for (uint32_t root_reg : kBakerValidRegsNarrow) {
++method_idx;
uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset);
uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg;
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index ac17303cf9..7123ae73b4 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -32,8 +32,8 @@
#define BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET 0x20
// The offsets from art_quick_read_barrier_mark_introspection to the GC root entrypoints,
// i.e. art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
-#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0x80
-#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xc0
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET 0xc0
+#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET 0xe0
// The offset from art_quick_read_barrier_mark_introspection to the array switch cases,
// i.e. art_quick_read_barrier_mark_introspection_arrays.
#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 0fd239a244..526960b79d 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2362,23 +2362,19 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
// Helper macros for Baker CC read barrier mark introspection (BRBMI).
-.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
+.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
\macro_for_register r0
\macro_for_register r1
\macro_for_register r2
\macro_for_register r3
- \macro_for_reserved_register // R4 is reserved for the entrypoint address.
+ \macro_for_register r4
\macro_for_register r5
\macro_for_register r6
\macro_for_register r7
- \macro_for_register r8
+ \macro_for_reserved_register // r8 (rMR) is the marking register.
\macro_for_register r9
\macro_for_register r10
\macro_for_register r11
-.endm
-
-.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
- BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
\macro_for_reserved_register // IP is reserved.
\macro_for_reserved_register // SP is reserved.
\macro_for_reserved_register // LR is reserved.
@@ -2386,16 +2382,13 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
.endm
.macro BRBMI_RETURN_SWITCH_CASE reg
+ .balign 8
.Lmark_introspection_return_switch_case_\reg:
+ mov rMR, #1
mov \reg, ip
bx lr
.endm
-.macro BRBMI_BAD_RETURN_SWITCH_CASE
-.Lmark_introspection_return_switch_case_bad:
- BRBMI_BKPT_FILL_4B
-.endm
-
.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
.byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
.endm
@@ -2458,9 +2451,9 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
// If reference is null, just return it in the right register.
cmp ip, #0
beq .Lmark_introspection_return\label_suffix
- // Use R4 as temp and check the mark bit of the reference.
- ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+ // Use rMR as temp and check the mark bit of the reference.
+ ldr rMR, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ tst rMR, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
beq .Lmark_introspection_unmarked\label_suffix
.Lmark_introspection_return\label_suffix:
.endm
@@ -2473,7 +2466,7 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
// the highest bits and the "forwarding address" state to have all bits set.
#error "Unexpected lock word state shift or forwarding address state value."
#endif
- cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+ cmp rMR, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
bhs .Lmark_introspection_forwarding_address\label_suffix
.endm
@@ -2483,41 +2476,50 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
// Shift left by the forwarding address shift. This clears out the state bits since they are
// in the top 2 bits of the lock word.
- lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+ lsl ip, rMR, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
b .Lmark_introspection_return\label_suffix
.endm
.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
// Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
- ldrh r4, [lr, #(-1 + \ldr_offset + 2)]
+ ldrh rMR, [lr, #(-1 + \ldr_offset + 2)]
.endm
.macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
// Load the 16-bit instruction. Adjust for the thumb state in LR.
- ldrh r4, [lr, #(-1 + \ldr_offset)]
+ ldrh rMR, [lr, #(-1 + \ldr_offset)]
.endm
-.macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
- .balign 64
+.macro BRBMI_EXTRACT_RETURN_REG_wide
+ lsr rMR, rMR, #12 // Extract `ref_reg`.
+.endm
+
+.macro BRBMI_EXTRACT_RETURN_REG_narrow
+ and rMR, rMR, #7 // Extract `ref_reg`.
+.endm
+
+.macro BRBMI_LOAD_AND_EXTRACT_RETURN_REG ldr_offset, label_suffix
+ BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \ldr_offset
+ BRBMI_EXTRACT_RETURN_REG\label_suffix
+.endm
+
+.macro BRBMI_GC_ROOT gc_root_ldr_offset, label_suffix
+ .balign 32
.thumb_func
.type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
.hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
.global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
- BRBMI_RUNTIME_CALL
- // Load the LDR (or the half of it) that contains Rt.
- BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
- b .Lmark_introspection_extract_register_and_return\label_suffix
- // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
- // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
- // the 6 byte forwarding address extraction here across the 32-byte boundary.
- BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
- // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
- // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
- // branch) shall take the rest of the 32-byte section (within a cache line).
+ BRBMI_LOAD_AND_EXTRACT_RETURN_REG \gc_root_ldr_offset, \label_suffix
+.endm
+
+.macro BRBMI_FIELD_SLOW_PATH ldr_offset, label_suffix
+ .balign 16
+.Lmark_introspection_unmarked\label_suffix:
+ // Note: Generates exactly 16 bytes of code.
BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
- BRBMI_RUNTIME_CALL
- b .Lmark_introspection_return\label_suffix
+ BRBMI_LOAD_AND_EXTRACT_RETURN_REG \ldr_offset, \label_suffix
+ b .Lmark_introspection_runtime_call
.endm
/*
@@ -2540,9 +2542,12 @@ art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
* not do the gray bit check.
*
* For field accesses and array loads with a constant index the thunk loads
- * the reference into IP using introspection and calls the main entrypoint,
- * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
- * the passed reference is poisoned.
+ * the reference into IP using introspection and calls the main entrypoint
+ * ("wide", for 32-bit LDR) art_quick_read_barrier_mark_introspection or
+ * the "narrow" entrypoint (for 16-bit LDR). The latter is at a known
+ * offset (BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)
+ * from the main entrypoint and the thunk adjusts the entrypoint pointer.
+ * With heap poisoning enabled, the passed reference is poisoned.
*
* For array accesses with non-constant index, the thunk inserts the bits
* 0-5 of the LDR instruction to the entrypoint address, effectively
@@ -2560,53 +2565,61 @@ art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
* (And even with heap poisoning enabled, GC roots are not poisoned.)
* To re-use the same entrypoint pointer in generated code, we make sure
* that the gc root entrypoint (a copy of the entrypoint with a different
- * offset for introspection loads) is located at a known offset (128 bytes,
- * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
- * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
- * the root register to IP and jumps to the customized entrypoint,
- * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
- * performs all the fast-path checks, so we need just the slow path.
+ * offset for introspection loads) is located at a known offset (0xc0/0xe0
+ * bytes, or BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET/
+ * BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET) from the
+ * main entrypoint and the GC root thunk adjusts the entrypoint pointer,
+ * moves the root register to IP and jumps to the customized entrypoint,
+ * art_quick_read_barrier_mark_introspection_gc_roots_{wide,narrow}.
+ * The thunk also performs all the fast-path checks, so we need just the
+ * slow path.
*
* The code structure is
- * art_quick_read_barrier_mark_introspection:
+ * art_quick_read_barrier_mark_introspection: // @0x00
* Up to 32 bytes code for main entrypoint fast-path code for fields
* (and array elements with constant offset) with LDR encoding T3;
* jumps to the switch in the "narrow" entrypoint.
- * Padding to 32 bytes if needed.
- * art_quick_read_barrier_mark_introspection_narrow:
+ * art_quick_read_barrier_mark_introspection_narrow: // @0x20
* Up to 48 bytes code for fast path code for fields (and array
* elements with constant offset) with LDR encoding T1, ending in the
* return switch instruction TBB and the table with switch offsets.
- * Padding to 80 bytes if needed.
- * .Lmark_introspection_return_switch_case_r0:
- * Exactly 48 bytes of code for the return switch cases (12 cases,
- * including BKPT for the reserved registers).
- * Ends at 128 bytes total.
- * art_quick_read_barrier_mark_introspection_gc_roots_wide:
- * GC root entrypoint code for LDR encoding T3 (28 bytes).
- * Forwarding address extraction for LDR encoding T3 (6 bytes).
- * Slow path for main entrypoint for LDR encoding T3 (30 bytes).
- * Ends at 192 bytes total.
- * art_quick_read_barrier_mark_introspection_gc_roots_narrow:
- * GC root entrypoint code for LDR encoding T1 (28 bytes).
- * Forwarding address extraction for LDR encoding T1 (6 bytes).
- * Slow path for main entrypoint for LDR encoding T1 (30 bytes).
- * Ends at 256 bytes total.
- * art_quick_read_barrier_mark_introspection_arrays:
+ * .Lmark_introspection_return_switch_case_r0: // @0x50
+ * Exactly 88 bytes of code for the return switch cases (8 bytes per
+ * case, 11 cases; no code for reserved registers).
+ * .Lmark_introspection_forwarding_address_narrow: // @0xa8
+ * Exactly 6 bytes to extract the forwarding address and jump to the
+ * "narrow" entrypoint fast path.
+ * .Lmark_introspection_return_switch_case_bad: // @0xae
+ * Exactly 2 bytes, bkpt for unexpected return register.
+ * .Lmark_introspection_unmarked_narrow: // @0xb0
+ * Exactly 16 bytes for "narrow" entrypoint slow path.
+ * art_quick_read_barrier_mark_introspection_gc_roots_wide: // @0xc0
+ * GC root entrypoint code for LDR encoding T3 (10 bytes); loads and
+ * extracts the return register and jumps to the runtime call.
+ * .Lmark_introspection_forwarding_address_wide: // @0xca
+ * Exactly 6 bytes to extract the forwarding address and jump to the
+ * "wide" entrypoint fast path.
+ * .Lmark_introspection_unmarked_wide: // @0xd0
+ * Exactly 16 bytes for "wide" entrypoint slow path.
+ * art_quick_read_barrier_mark_introspection_gc_roots_narrow: // @0xe0
+ * GC root entrypoint code for LDR encoding T1 (8 bytes); loads and
+ * extracts the return register and falls through to the runtime call.
+ * .Lmark_introspection_runtime_call: // @0xe8
+ * Exactly 24 bytes for the runtime call to MarkReg() and jump to the
+ * return switch.
+ * art_quick_read_barrier_mark_introspection_arrays: // @0x100
* Exactly 128 bytes for array load switch cases (16x2 instructions).
*/
.balign 512
ENTRY art_quick_read_barrier_mark_introspection
- // At this point, IP contains the reference, R4 can be freely used.
- // (R4 is reserved for the entrypoint address.)
+ // At this point, IP contains the reference, rMR is clobbered by the thunk
+ // and can be freely used as it will be set back to 1 before returning.
// For heap poisoning, the reference is poisoned, so unpoison it first.
UNPOISON_HEAP_REF ip
- // Check for null or marked, lock word is loaded into IP.
+ // Check for null or marked, lock word is loaded into rMR.
BRBMI_CHECK_NULL_AND_MARKED _wide
- // Load the half of the instruction that contains Rt.
- BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
-.Lmark_introspection_extract_register_and_return_wide:
- lsr r4, r4, #12 // Extract `ref_reg`.
+ // Load and extract the return register from the instruction.
+ BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
b .Lmark_introspection_return_switch
.balign 32
@@ -2615,25 +2628,45 @@ ENTRY art_quick_read_barrier_mark_introspection
.hidden art_quick_read_barrier_mark_introspection_narrow
.global art_quick_read_barrier_mark_introspection_narrow
art_quick_read_barrier_mark_introspection_narrow:
- // At this point, IP contains the reference, R4 can be freely used.
- // (R4 is reserved for the entrypoint address.)
+ // At this point, IP contains the reference, rMR is clobbered by the thunk
+ // and can be freely used as it will be set back to 1 before returning.
// For heap poisoning, the reference is poisoned, so unpoison it first.
UNPOISON_HEAP_REF ip
- // Check for null or marked, lock word is loaded into R4.
+ // Check for null or marked, lock word is loaded into rMR.
BRBMI_CHECK_NULL_AND_MARKED _narrow
- // Load the 16-bit instruction.
- BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
-.Lmark_introspection_extract_register_and_return_narrow:
- and r4, r4, #7 // Extract `ref_reg`.
+ // Load and extract the return register from the instruction.
+ BRBMI_LOAD_AND_EXTRACT_RETURN_REG BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
.Lmark_introspection_return_switch:
- tbb [pc, r4] // Jump to the switch case.
+ tbb [pc, rMR] // Jump to the switch case.
.Lmark_introspection_return_table:
BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
- .balign 16
- BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
+ BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE, /* no code */
+
+ .balign 8
+ BRBMI_EXTRACT_FORWARDING_ADDRESS _narrow // 6 bytes
+.Lmark_introspection_return_switch_case_bad:
+ bkpt // 2 bytes
+
+ BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET, _narrow
+
+ // 8 bytes for the loading and extracting of the return register.
+ BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
+ // 2 bytes for near branch to the runtime call.
+ b .Lmark_introspection_runtime_call
+
+ BRBMI_EXTRACT_FORWARDING_ADDRESS _wide // Not even 4-byte aligned.
+
+ BRBMI_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET, _wide
+
+ // 8 bytes for the loading and extracting of the return register.
+ BRBMI_GC_ROOT BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
+ // And the runtime call and branch to the switch taking exactly 24 bytes
+ // (22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near branch)
+ // shall take the rest of the 32-byte section (within a cache line).
+.Lmark_introspection_runtime_call:
+ BRBMI_RUNTIME_CALL
+ b .Lmark_introspection_return_switch
- BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
- BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
.balign 256
.thumb_func
diff --git a/runtime/oat.h b/runtime/oat.h
index 01d391401d..0318606f87 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- // Last oat version changed reason: Retrieve Class* and String* from .data.bimg.rel.ro .
- static constexpr uint8_t kOatVersion[] = { '1', '4', '0', '\0' };
+ // Last oat version changed reason: Use rMR as temp in Baker RB introspection marking.
+ static constexpr uint8_t kOatVersion[] = { '1', '4', '1', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";