diff options
author | 2023-01-03 16:20:50 +0000 | |
---|---|---|
committer | 2023-01-04 16:00:48 +0000 | |
commit | d4229601e0fb46b0a013b52370aeda3887aea8e9 (patch) | |
tree | b11bf51ee8d9554bbad440ea4157c81e4eb860e7 /compiler | |
parent | 5497f749b4d35c7b0767d21cb487e2fee293cd07 (diff) |
Add a write barrier elimination pass
We can eliminate redundant write barriers as we don't need several
for the same receiver. For example:
```
MyObject o;
o.inner_obj = io;
o.inner_obj2 = io2;
o.inner_obj3 = io3;
```
We can keep the write barrier for `inner_obj` and remove the other
two.
Note that we cannot perform this optimization across
invokes, suspend check, or instructions that can throw.
Local improvements (pixel 5, speed compile):
* System server: -280KB (-0.56%)
* SystemUIGoogle: -330KB (-1.16%)
* AGSA: -3876KB (-1.19%)
Bug: 260843353
Fixes: 260843353
Change-Id: Ibf98efbe891ee00e46125853c3e97ae30aa3ff30
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/Android.bp | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 34 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 9 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 82 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 17 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 60 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 17 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 23 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 73 | ||||
-rw-r--r-- | compiler/optimizing/optimization.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/optimization.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/write_barrier_elimination.cc | 161 | ||||
-rw-r--r-- | compiler/optimizing/write_barrier_elimination.h | 56 |
19 files changed, 519 insertions, 107 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 117e8dc6b0..cab7fbeb2f 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -91,6 +91,7 @@ art_cc_defaults { "optimizing/ssa_phi_elimination.cc", "optimizing/stack_map_stream.cc", "optimizing/superblock_cloner.cc", + "optimizing/write_barrier_elimination.cc", "trampolines/trampoline_compiler.cc", "utils/assembler.cc", "utils/jni_macro_assembler.cc", diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7fb3b2408b..5a5d36d879 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -43,6 +43,7 @@ #include "mirror/var_handle.h" #include "offsets.h" #include "optimizing/common_arm64.h" +#include "optimizing/nodes.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" @@ -1426,12 +1427,12 @@ void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* l } } -void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { +void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool emit_null_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register card = temps.AcquireX(); Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. vixl::aarch64::Label done; - if (value_can_be_null) { + if (emit_null_check) { __ Cbz(value, &done); } // Load the address of the card table into `card`. @@ -1453,7 +1454,7 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&done); } } @@ -2229,7 +2230,8 @@ void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); bool is_predicated = instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet(); @@ -2269,8 +2271,12 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, } } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - codegen_->MarkGCCard(obj, Register(value), value_can_be_null); + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { + codegen_->MarkGCCard( + obj, + Register(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_predicated) { @@ -2935,7 +2941,11 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(array, value.W(), /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -3957,7 +3967,10 @@ void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction } void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } // Temp is used for read barrier. @@ -6220,7 +6233,10 @@ void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 434263104f..deba88b860 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -329,7 +329,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void HandleCondition(HCondition* instruction); @@ -617,7 +618,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Emit a write barrier. void MarkGCCard(vixl::aarch64::Register object, vixl::aarch64::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 002ca79802..51d6a46ddb 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -5803,8 +5803,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a __ CompareAndBranchIfNonZero(temp1, &fail); } -void LocationsBuilderARMVIXL::HandleFieldSet( - HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderARMVIXL::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5827,8 +5828,12 @@ void LocationsBuilderARMVIXL::HandleFieldSet( // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (needs_write_barrier) { - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - locations->AddTemp(Location::RequiresRegister()); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } else if (generate_volatile) { // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive @@ -5849,7 +5854,8 @@ void LocationsBuilderARMVIXL::HandleFieldSet( void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5965,10 +5971,16 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); vixl32::Register card = RegisterFrom(locations->GetTemp(1)); - codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + RegisterFrom(value), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -6241,11 +6253,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, } void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -6278,11 +6293,14 @@ void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instr } void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderARMVIXL::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -6764,8 +6782,10 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RequiresRegister()); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Temporary registers for the write barrier or register poisoning. + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorARMVIXL::VisitArraySet. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } @@ -6917,7 +6937,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } } - codegen_->MarkGCCard(temp1, temp2, array, value, /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + codegen_->MarkGCCard(temp1, temp2, array, value, /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsReferenced()); @@ -7148,9 +7172,9 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, vixl32::Register card, vixl32::Register object, vixl32::Register value, - bool value_can_be_null) { + bool emit_null_check) { vixl32::Label is_null; - if (value_can_be_null) { + if (emit_null_check) { __ CompareAndBranchIfZero(value, &is_null, /* is_far_target=*/ false); } // Load the address of the card table into `card`. @@ -7173,7 +7197,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 20fb7708fb..872a17b285 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -310,7 +310,9 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { void HandleIntegerRotate(LocationSummary* locations); void HandleLongRotate(LocationSummary* locations); void HandleShift(HBinaryOperation* operation); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); Location ArithmeticZeroOrFpuRegister(HInstruction* input); @@ -379,7 +381,8 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min); @@ -543,7 +546,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register card, vixl::aarch32::Register object, vixl::aarch32::Register value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 5daa73ea35..58cb56d243 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -35,6 +35,7 @@ #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -5741,13 +5742,10 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke DCHECK_EQ(size, linker_patches->size()); } -void CodeGeneratorX86::MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null) { +void CodeGeneratorX86::MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -5772,7 +5770,7 @@ void CodeGeneratorX86::MarkGCCard(Register temp, // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } @@ -5876,7 +5874,9 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } } -void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { +void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = @@ -5913,10 +5913,13 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + if (write_barrier_kind != WriteBarrierKind::kDontEmit) { + locations->AddTemp(Location::RequiresRegister()); + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } else if (kPoisonHeapReferences) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -5927,7 +5930,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, Address field_addr, Register base, bool is_volatile, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); bool needs_write_barrier = @@ -6040,10 +6044,15 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (needs_write_barrier) { + if (needs_write_barrier && write_barrier_kind != WriteBarrierKind::kDontEmit) { Register temp = locations->GetTemp(0).AsRegister<Register>(); Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<Register>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<Register>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -6053,7 +6062,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -6078,7 +6088,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, field_addr, base, is_volatile, - value_can_be_null); + value_can_be_null, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -6094,19 +6105,25 @@ void InstructionCodeGeneratorX86::VisitStaticFieldGet(HStaticFieldGet* instructi } void LocationsBuilderX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo()); + HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind()); } void InstructionCodeGeneratorX86::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86::VisitPredicatedInstanceFieldGet( @@ -6367,10 +6384,12 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); + // Used by reference poisoning or emitting write barrier. + locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -6487,9 +6506,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { } } - Register card = locations->GetTemp(1).AsRegister<Register>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<Register>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<Register>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 02b967fdd4..9f09e171fe 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -197,7 +197,9 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleInvoke(HInvoke* invoke); void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); - void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldSet(HInstruction* instruction, + const FieldInfo& field_info, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); bool CpuHasAvxFeatureFlag(); bool CpuHasAvx2FeatureFlag(); @@ -250,7 +252,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { Address field_addr, Register base, bool is_volatile, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); private: // Generate code for the given suspend check. If not null, `successor` @@ -280,7 +283,8 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); // Generate a heap reference load using one register `out`: @@ -520,11 +524,8 @@ class CodeGeneratorX86 : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Emit a write barrier. - void MarkGCCard(Register temp, - Register card, - Register object, - Register value, - bool value_can_be_null); + void MarkGCCard( + Register temp, Register card, Register object, Register value, bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index d7867110db..2d7dc441ea 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -36,6 +36,7 @@ #include "mirror/class-inl.h" #include "mirror/object_reference.h" #include "mirror/var_handle.h" +#include "optimizing/nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/assembler.h" @@ -5173,6 +5174,9 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); } } + + // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of + // InstructionCodeGeneratorX86_64::HandleFieldSet. if (needs_write_barrier) { // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); @@ -5234,7 +5238,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap) { + bool byte_swap, + WriteBarrierKind write_barrier_kind) { LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(value_index); @@ -5352,10 +5357,16 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) { + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index)) && + write_barrier_kind != WriteBarrierKind::kDontEmit) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>(); - codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); + codegen_->MarkGCCard( + temp, + card, + base, + value.AsRegister<CpuRegister>(), + value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck); } if (is_volatile) { @@ -5365,7 +5376,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null) { + bool value_can_be_null, + WriteBarrierKind write_barrier_kind) { DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = instruction->GetLocations(); @@ -5390,7 +5402,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, base, is_volatile, /*is_atomic=*/ false, - value_can_be_null); + value_can_be_null, + /*byte_swap=*/ false, + write_barrier_kind); if (is_predicated) { __ Bind(&pred_is_null); @@ -5402,7 +5416,10 @@ void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instructio } void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet( @@ -5442,7 +5459,10 @@ void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { } void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { - HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); + HandleFieldSet(instruction, + instruction->GetFieldInfo(), + instruction->GetValueCanBeNull(), + instruction->GetWriteBarrierKind()); } void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { @@ -5673,9 +5693,12 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { } if (needs_write_barrier) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. + // Used by reference poisoning or emitting write barrier. locations->AddTemp(Location::RequiresRegister()); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + // Only used when emitting a write barrier. + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5793,9 +5816,16 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { } } - CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); - codegen_->MarkGCCard( - temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false); + if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) { + DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck) + << " Already null checked so we shouldn't do it again."; + CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->MarkGCCard(temp, + card, + array, + value.AsRegister<CpuRegister>(), + /* emit_null_check= */ false); + } if (can_value_be_null) { DCHECK(do_store.IsLinked()); @@ -5994,9 +6024,9 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null) { + bool emit_null_check) { NearLabel is_null; - if (value_can_be_null) { + if (emit_null_check) { __ testl(value, value); __ j(kEqual, &is_null); } @@ -6021,7 +6051,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); - if (value_can_be_null) { + if (emit_null_check) { __ Bind(&is_null); } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index ffc2641dbc..1fac62fa34 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -251,7 +251,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool is_volatile, bool is_atomic, bool value_can_be_null, - bool byte_swap = false); + bool byte_swap, + WriteBarrierKind write_barrier_kind); void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr); @@ -274,7 +275,8 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, - bool value_can_be_null); + bool value_can_be_null, + WriteBarrierKind write_barrier_kind); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); @@ -436,7 +438,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { CpuRegister card, CpuRegister object, CpuRegister value, - bool value_can_be_null); + bool emit_null_check); void GenerateMemoryBarrier(MemBarrierKind kind); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index f6076525d8..96eaa61209 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -483,6 +483,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("can_trigger_gc") << std::boolalpha << array_set->GetSideEffects().Includes(SideEffects::CanTriggerGC()) << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << array_set->GetWriteBarrierKind(); } void VisitCompare(HCompare* compare) override { @@ -552,7 +553,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); - StartAttributeStream("predicated") << std::boolalpha << iset->GetIsPredicatedSet(); + StartAttributeStream("predicated") + << std::boolalpha << iset->GetIsPredicatedSet() << std::noboolalpha; + StartAttributeStream("write_barrier_kind") << iset->GetWriteBarrierKind(); } void VisitStaticFieldGet(HStaticFieldGet* sget) override { @@ -567,6 +570,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sset->GetFieldType(); + StartAttributeStream("write_barrier_kind") << sset->GetWriteBarrierKind(); } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 88c125d4a9..e319634fbe 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -4041,13 +4041,16 @@ static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) { InstructionCodeGeneratorX86* instr_codegen = down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor()); // Store the value to the field - instr_codegen->HandleFieldSet(invoke, - value_index, - value_type, - Address(reference, offset, TIMES_1, 0), - reference, - is_volatile, - /* value_can_be_null */ true); + instr_codegen->HandleFieldSet( + invoke, + value_index, + value_type, + Address(reference, offset, TIMES_1, 0), + reference, + is_volatile, + /* value_can_be_null */ true, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 98702e64b9..be15c07aed 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -3985,16 +3985,19 @@ static void GenerateVarHandleSet(HInvoke* invoke, Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0); // Store the value to the field. - codegen->GetInstructionCodegen()->HandleFieldSet(invoke, - value_index, - last_temp_index, - value_type, - dst, - CpuRegister(target.object), - is_volatile, - is_atomic, - /*value_can_be_null=*/ true, - byte_swap); + codegen->GetInstructionCodegen()->HandleFieldSet( + invoke, + value_index, + last_temp_index, + value_type, + dst, + CpuRegister(target.object), + is_volatile, + is_atomic, + /*value_can_be_null=*/true, + byte_swap, + // Value can be null, and this write barrier is not being relied on for other sets. + WriteBarrierKind::kEmitWithNullCheck); // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that. diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index cbb55918cf..f33b0d8e96 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -6403,6 +6403,27 @@ class HPredicatedInstanceFieldGet final : public HExpression<2> { const FieldInfo field_info_; }; +enum class WriteBarrierKind { + // Emit the write barrier, with a runtime optimization which checks if the value that it is being + // set is null. + kEmitWithNullCheck, + // Emit the write barrier, without the runtime null check optimization. This could be set because: + // A) It is a write barrier for an ArraySet (which does the optimization with the type check, so + // it never does the optimization at the write barrier stage) + // B) We know that the input can't be null + // C) This write barrier is actually several write barriers coalesced into one. Potentially we + // could ask if every value is null for a runtime optimization at the cost of compile time / code + // size. At the time of writing it was deemed not worth the effort. + kEmitNoNullCheck, + // Skip emitting the write barrier. This could be set because: + // A) The write barrier is not needed (e.g. it is not a reference, or the value is the null + // constant) + // B) This write barrier was coalesced into another one so there's no need to emit it. + kDontEmit, + kLast = kDontEmit +}; +std::ostream& operator<<(std::ostream& os, WriteBarrierKind rhs); + class HInstanceFieldSet final : public HExpression<2> { public: HInstanceFieldSet(HInstruction* object, @@ -6427,6 +6448,7 @@ class HInstanceFieldSet final : public HExpression<2> { dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagIsPredicatedSet>(false); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, object); SetRawInputAt(1, value); } @@ -6447,6 +6469,12 @@ class HInstanceFieldSet final : public HExpression<2> { void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } bool GetIsPredicatedSet() const { return GetPackedFlag<kFlagIsPredicatedSet>(); } void SetIsPredicatedSet(bool value = true) { SetPackedFlag<kFlagIsPredicatedSet>(value); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } DECLARE_INSTRUCTION(InstanceFieldSet); @@ -6456,11 +6484,17 @@ class HInstanceFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsPredicatedSet = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKind = kFlagIsPredicatedSet + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfInstanceFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfInstanceFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayGet final : public HExpression<2> { @@ -6581,6 +6615,8 @@ class HArraySet final : public HExpression<3> { SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false); + // ArraySets never do the null check optimization at the write barrier stage. + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitNoNullCheck); SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); @@ -6653,6 +6689,16 @@ class HArraySet final : public HExpression<3> { : SideEffects::None(); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitNoNullCheck) + << "We shouldn't go back to the original value."; + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We never do the null check optimization for ArraySets."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(ArraySet); protected: @@ -6668,11 +6714,16 @@ class HArraySet final : public HExpression<3> { // Cached information for the reference_type_info_ so that codegen // does not need to inspect the static type. static constexpr size_t kFlagStaticTypeOfArrayIsObjectArray = kFlagValueCanBeNull + 1; - static constexpr size_t kNumberOfArraySetPackedBits = - kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKind = kFlagStaticTypeOfArrayIsObjectArray + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfArraySetPackedBits = kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ExpectedComponentTypeField = BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; + + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HArrayLength final : public HExpression<1> { @@ -7470,6 +7521,7 @@ class HStaticFieldSet final : public HExpression<2> { declaring_class_def_index, dex_file) { SetPackedFlag<kFlagValueCanBeNull>(true); + SetPackedField<WriteBarrierKindField>(WriteBarrierKind::kEmitWithNullCheck); SetRawInputAt(0, cls); SetRawInputAt(1, value); } @@ -7485,6 +7537,13 @@ class HStaticFieldSet final : public HExpression<2> { bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); } void ClearValueCanBeNull() { SetPackedFlag<kFlagValueCanBeNull>(false); } + WriteBarrierKind GetWriteBarrierKind() { return GetPackedField<WriteBarrierKindField>(); } + void SetWriteBarrierKind(WriteBarrierKind kind) { + DCHECK(kind != WriteBarrierKind::kEmitWithNullCheck) + << "We shouldn't go back to the original value."; + SetPackedField<WriteBarrierKindField>(kind); + } + DECLARE_INSTRUCTION(StaticFieldSet); protected: @@ -7492,11 +7551,17 @@ class HStaticFieldSet final : public HExpression<2> { private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; - static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKind = kFlagValueCanBeNull + 1; + static constexpr size_t kWriteBarrierKindSize = + MinimumBitsToStore(static_cast<size_t>(WriteBarrierKind::kLast)); + static constexpr size_t kNumberOfStaticFieldSetPackedBits = + kWriteBarrierKind + kWriteBarrierKindSize; static_assert(kNumberOfStaticFieldSetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); const FieldInfo field_info_; + using WriteBarrierKindField = + BitField<WriteBarrierKind, kWriteBarrierKind, kWriteBarrierKindSize>; }; class HStringBuilderAppend final : public HVariableInputSizeInstruction { diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index e0c5933657..73a47517bf 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -55,6 +55,7 @@ #include "select_generator.h" #include "sharpening.h" #include "side_effects_analysis.h" +#include "write_barrier_elimination.h" // Decide between default or alternative pass name. @@ -95,6 +96,8 @@ const char* OptimizationPassName(OptimizationPass pass) { return ConstructorFenceRedundancyElimination::kCFREPassName; case OptimizationPass::kScheduling: return HInstructionScheduling::kInstructionSchedulingPassName; + case OptimizationPass::kWriteBarrierElimination: + return WriteBarrierElimination::kWBEPassName; #ifdef ART_ENABLE_CODEGEN_arm case OptimizationPass::kInstructionSimplifierArm: return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName; @@ -268,6 +271,9 @@ ArenaVector<HOptimization*> ConstructOptimizations( case OptimizationPass::kLoadStoreElimination: opt = new (allocator) LoadStoreElimination(graph, stats, pass_name); break; + case OptimizationPass::kWriteBarrierElimination: + opt = new (allocator) WriteBarrierElimination(graph, stats, pass_name); + break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 76960bf314..c3ba17563e 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -84,6 +84,7 @@ enum class OptimizationPass { kScheduling, kSelectGenerator, kSideEffectsAnalysis, + kWriteBarrierElimination, #ifdef ART_ENABLE_CODEGEN_arm kInstructionSimplifierArm, kCriticalNativeAbiFixupArm, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 807c78e62a..dbf247cd64 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -51,6 +51,7 @@ #include "linker/linker_patch.h" #include "nodes.h" #include "oat_quick_method_header.h" +#include "optimizing/write_barrier_elimination.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" @@ -899,6 +900,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); } else { RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); } RegisterAllocator::Strategy regalloc_strategy = @@ -992,6 +995,10 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( optimizations); RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer); + { + PassScope scope(WriteBarrierElimination::kWBEPassName, &pass_observer); + WriteBarrierElimination(graph, compilation_stats_.get()).Run(); + } AllocateRegisters(graph, codegen.get(), diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 100441aaff..698a1471c3 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -118,6 +118,8 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kPossibleWriteBarrier, + kRemovedWriteBarrier, kBitstringTypeCheck, kJitOutOfMemoryForCommit, kFullLSEAllocationRemoved, diff --git a/compiler/optimizing/write_barrier_elimination.cc b/compiler/optimizing/write_barrier_elimination.cc new file mode 100644 index 0000000000..9023cc24b7 --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.cc @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "write_barrier_elimination.h" + +#include "base/arena_allocator.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "optimizing/nodes.h" + +namespace art HIDDEN { + +class WBEVisitor : public HGraphVisitor { + public: + WBEVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArenaStack()), + current_write_barriers_(scoped_allocator_.Adapter(kArenaAllocWBE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) override { + // We clear the map to perform this optimization only in the same block. Doing it across blocks + // would entail non-trivial merging of states. + current_write_barriers_.clear(); + HGraphVisitor::VisitBasicBlock(block); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* obj = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(obj); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsInstanceFieldSet()); + DCHECK(it->second->AsInstanceFieldSet()->GetWriteBarrierKind() != + WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsInstanceFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({obj, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { + DCHECK(!instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())); + + if (instruction->GetFieldType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + HInstruction* cls = HuntForOriginalReference(instruction->InputAt(0)); + auto it = current_write_barriers_.find(cls); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsStaticFieldSet()); + DCHECK(it->second->AsStaticFieldSet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + it->second->AsStaticFieldSet()->SetWriteBarrierKind(WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({cls, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitArraySet(HArraySet* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + + if (instruction->GetComponentType() != DataType::Type::kReference || + instruction->GetValue()->IsNullConstant()) { + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + return; + } + + HInstruction* arr = HuntForOriginalReference(instruction->InputAt(0)); + MaybeRecordStat(stats_, MethodCompilationStat::kPossibleWriteBarrier); + auto it = current_write_barriers_.find(arr); + if (it != current_write_barriers_.end()) { + DCHECK(it->second->IsArraySet()); + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + DCHECK_EQ(it->second->GetBlock(), instruction->GetBlock()); + // We never skip the null check in ArraySets so that value is already set. + DCHECK(it->second->AsArraySet()->GetWriteBarrierKind() == WriteBarrierKind::kEmitNoNullCheck); + instruction->SetWriteBarrierKind(WriteBarrierKind::kDontEmit); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedWriteBarrier); + } else { + const bool inserted = current_write_barriers_.insert({arr, instruction}).second; + DCHECK(inserted); + DCHECK(instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit); + } + } + + void VisitInstruction(HInstruction* instruction) override { + if (instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC())) { + ClearCurrentValues(); + } + } + + private: + void ClearCurrentValues() { current_write_barriers_.clear(); } + + HInstruction* HuntForOriginalReference(HInstruction* ref) const { + // An original reference can be transformed by instructions like: + // i0 NewArray + // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress. + // i2 ArraySet(i1, index, value) + DCHECK(ref != nullptr); + while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) { + ref = ref->InputAt(0); + } + return ref; + } + + ScopedArenaAllocator scoped_allocator_; + + // Stores a map of <Receiver, InstructionWhereTheWriteBarrierIs>. + // `InstructionWhereTheWriteBarrierIs` is used for DCHECKs only. + ScopedArenaHashMap<HInstruction*, HInstruction*> current_write_barriers_; + + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(WBEVisitor); +}; + +bool WriteBarrierElimination::Run() { + WBEVisitor wbe_visitor(graph_, stats_); + wbe_visitor.VisitReversePostOrder(); + return true; +} + +} // namespace art diff --git a/compiler/optimizing/write_barrier_elimination.h b/compiler/optimizing/write_barrier_elimination.h new file mode 100644 index 0000000000..a3769e7421 --- /dev/null +++ b/compiler/optimizing/write_barrier_elimination.h @@ -0,0 +1,56 @@ +/* + * Copyright (C) 2022 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ + +#include "base/macros.h" +#include "optimization.h" + +namespace art HIDDEN { + +// Eliminates unnecessary write barriers from InstanceFieldSet, StaticFieldSet, and ArraySet. +// +// We can eliminate redundant write barriers as we don't need several for the same receiver. For +// example: +// MyObject o; +// o.inner_obj = io; +// o.inner_obj2 = io2; +// o.inner_obj3 = io3; +// We can keep the write barrier for `inner_obj` and remove the other two. +// +// In order to do this, we set the WriteBarrierKind of the instruction. The instruction's kind are +// set to kEmitNoNullCheck (if this write barrier coalesced other write barriers, we don't want to +// perform the null check optimization), or to kDontEmit (if the write barrier as a whole is not +// needed). +class WriteBarrierElimination : public HOptimization { + public: + WriteBarrierElimination(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kWBEPassName) + : HOptimization(graph, name, stats) {} + + bool Run() override; + + static constexpr const char* kWBEPassName = "write_barrier_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(WriteBarrierElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_WRITE_BARRIER_ELIMINATION_H_ |