diff options
author | 2024-09-06 15:14:28 +0100 | |
---|---|---|
committer | 2024-09-10 10:05:09 +0000 | |
commit | e736081033cf19eab3e0d7e868a9c205a8894c51 (patch) | |
tree | cb18ef4356f63803b125a7306f14d97e19cd5cf7 /compiler/optimizing | |
parent | c9e493db2a94d4fd99036897cd021224e9506f43 (diff) |
Improve Unsafe.get* code generation on arm64.
Test: 858-checker-unsafe
Bug: 364963560
Change-Id: I9f7014be462e70931a650b58169b784238344546
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 68 |
3 files changed, 54 insertions, 26 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 5b7f880589..6ffd1aa686 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -103,12 +103,6 @@ uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value; // generates less code/data with a small num_entries. static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; -// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle -// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-time generated thunks we need to split -// the offset explicitly. -constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; - inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 82c9abbef4..9f226e9e63 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -56,6 +56,12 @@ static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize); // must be blocked. static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize; +// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle +// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-time generated thunks we need to split +// the offset explicitly. +static constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; + static const vixl::aarch64::Register kParameterCoreRegisters[] = { vixl::aarch64::x1, vixl::aarch64::x2, diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 80ec1b629a..71ef84e1aa 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -700,6 +700,13 @@ void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) { MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value())); } +static bool ReadBarrierNeedsTemp(bool is_volatile, HInvoke* invoke) { + return is_volatile || + !invoke->InputAt(2)->IsLongConstant() || + invoke->InputAt(2)->AsLongConstant()->GetValue() >= kReferenceLoadMinFarOffset; +} + + static void GenUnsafeGet(HInvoke* invoke, DataType::Type type, bool is_volatile, @@ -712,7 +719,6 @@ static void GenUnsafeGet(HInvoke* invoke, Location base_loc = locations->InAt(1); Register base = WRegisterFrom(base_loc); // Object pointer. Location offset_loc = locations->InAt(2); - Register offset = XRegisterFrom(offset_loc); // Long offset. Location trg_loc = locations->Out(); Register trg = RegisterFrom(trg_loc, type); @@ -721,16 +727,35 @@ static void GenUnsafeGet(HInvoke* invoke, Register temp = WRegisterFrom(locations->GetTemp(0)); MacroAssembler* masm = codegen->GetVIXLAssembler(); // Piggy-back on the field load path using introspection for the Baker read barrier. - __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits. - codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - MemOperand(temp.X()), - /* needs_null_check= */ false, - is_volatile); + if (offset_loc.IsConstant()) { + uint32_t offset = Int64FromLocation(offset_loc); + Location maybe_temp = ReadBarrierNeedsTemp(is_volatile, invoke) + ? locations->GetTemp(0) : Location::NoLocation(); + DCHECK_EQ(locations->GetTempCount(), ReadBarrierNeedsTemp(is_volatile, invoke)); + codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, + trg_loc, + base.W(), + offset, + maybe_temp, + /* needs_null_check= */ false, + is_volatile); + } else { + __ Add(temp, base, WRegisterFrom(offset_loc)); // Offset should not exceed 32 bits. + codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + MemOperand(temp.X()), + /* needs_null_check= */ false, + is_volatile); + } } else { // Other cases. - MemOperand mem_op(base.X(), offset); + MemOperand mem_op; + if (offset_loc.IsConstant()) { + mem_op = MemOperand(base.X(), Int64FromLocation(offset_loc)); + } else { + mem_op = MemOperand(base.X(), XRegisterFrom(offset_loc)); + } if (is_volatile) { codegen->LoadAcquire(invoke, type, trg, mem_op, /* needs_null_check= */ true); } else { @@ -746,7 +771,8 @@ static void GenUnsafeGet(HInvoke* invoke, static void CreateUnsafeGetLocations(ArenaAllocator* allocator, HInvoke* invoke, - CodeGeneratorARM64* codegen) { + CodeGeneratorARM64* codegen, + bool is_volatile = false) { bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke); LocationSummary* locations = new (allocator) LocationSummary(invoke, @@ -756,13 +782,15 @@ static void CreateUnsafeGetLocations(ArenaAllocator* allocator, kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier load in order to use - // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). - locations->AddTemp(FixedTempLocation()); + if (ReadBarrierNeedsTemp(is_volatile, invoke)) { + // We need a temporary register for the read barrier load in order to use + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). + locations->AddTemp(FixedTempLocation()); + } } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } @@ -793,28 +821,28 @@ void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGet(HInvoke* invoke) { CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLong(HInvoke* invoke) { CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReference(HInvoke* invoke) { CreateUnsafeGetLocations(allocator_, invoke, codegen_); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) { - CreateUnsafeGetLocations(allocator_, invoke, codegen_); + CreateUnsafeGetLocations(allocator_, invoke, codegen_, /* is_volatile= */ true); } void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetByte(HInvoke* invoke) { CreateUnsafeGetLocations(allocator_, invoke, codegen_); |