diff options
Diffstat (limited to 'compiler/optimizing')
39 files changed, 1290 insertions, 482 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 529fc9e261..d2357a5d05 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1845,8 +1845,8 @@ void BoundsCheckElimination::Run() { // that value dominated by that instruction fits in that range. Range of that // value can be narrowed further down in the dominator tree. BCEVisitor visitor(graph_, side_effects_, induction_analysis_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* current = graph_->GetReversePostOrder()[i]; if (visitor.IsAddedBlock(current)) { // Skip added blocks. Their effects are already taken care of. continue; @@ -1855,8 +1855,11 @@ void BoundsCheckElimination::Run() { // Skip forward to the current block in case new basic blocks were inserted // (which always appear earlier in reverse post order) to avoid visiting the // same basic block twice. - for ( ; !it.Done() && it.Current() != current; it.Advance()) { - } + size_t new_size = graph_->GetReversePostOrder().size(); + DCHECK_GE(new_size, size); + i += new_size - size; + DCHECK_EQ(current, graph_->GetReversePostOrder()[i]); + size = new_size; } // Perform cleanup. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0f8cdbb19b..8b450e11dc 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -671,9 +671,9 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, return; } ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); - for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { - if (it.Current()->IsLoopHeader()) { - HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + for (HBasicBlock* block : graph.GetReversePostOrder()) { + if (block->IsLoopHeader()) { + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9f92b20929..be65f89ef1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -601,11 +601,21 @@ class ArraySetSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { public: - ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj) - : SlowPathCodeARM(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location ref) + : SlowPathCodeARM(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -613,9 +623,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -634,40 +644,213 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - DCHECK_NE(reg, SP); - DCHECK_NE(reg, LR); - DCHECK_NE(reg, PC); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); // IP is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(reg, IP); - DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg; + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // R0 <- obj + // R0 <- ref // R0 <- ReadBarrierMark(R0) - // obj <- R0 + // ref <- R0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); // This runtime call does not require a stack map. arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ b(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1, + Register temp2) + : SlowPathCodeARM(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(temp1_, IP); + __ Mov(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + DCHECK_NE(ref_reg, SP); + DCHECK_NE(ref_reg, LR); + DCHECK_NE(ref_reg, PC); + // IP is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_reg, IP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // R0 <- ref + // R0 <- ReadBarrierMark(R0) + // ref <- R0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg); + // This runtime call does not require a stack map. + arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + Label done; + __ cmp(temp1_, ShifterOperand(ref_reg)); + __ b(&done, EQ); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = IP; // Pointer to actual memory. + Register tmp = temp2_; // Value in memory. + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Label loop_head, exit_loop; + __ Bind(&loop_head); + + __ ldrex(tmp, tmp_ptr); + + __ subs(tmp, tmp, ShifterOperand(expected)); + + __ it(NE); + __ clrex(NE); + + __ b(&exit_loop, NE); + + __ strex(tmp, value, tmp_ptr); + __ cmp(tmp, ShifterOperand(1)); + __ b(&loop_head, EQ); + + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ b(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + const Register temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { public: @@ -5773,7 +5956,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ movt(temp, /* placeholder */ 0u); __ BindTrackedLabel(&labels->add_pc_label); __ add(temp, temp, ShifterOperand(PC)); - GenerateGcRootFieldLoad(load, out_loc, temp, 0); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -6644,7 +6827,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6689,8 +6874,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. if (index.IsValid()) { - // Load types involving an "index": ArrayGet and - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = @@ -6698,9 +6884,9 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use - // a register pair as index ("long offset"), of which only the low - // part contains data. + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. Register index_reg = index.IsRegisterPair() ? index.AsRegisterPairLow<Register>() : index.AsRegister<Register>(); @@ -6716,8 +6902,21 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i __ MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + SlowPathCodeARM* slow_path; + if (always_update_field) { + DCHECK(temp2 != nullptr); + // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM( + instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4d59b47861..3d46aab31f 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -283,12 +283,12 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers if requires_read_barrier is true. + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, uint32_t offset, - bool requires_read_barrier = kEmitCompilerReadBarrier); + bool requires_read_barrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, @@ -508,6 +508,18 @@ class CodeGeneratorARM : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires an extra temporary register, which must be provided as a + // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -515,7 +527,9 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 9e59d8cc38..b53750966d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -589,11 +589,21 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj) - : SlowPathCodeARM64(instruction), obj_(obj) { + ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location ref) + : SlowPathCodeARM64(instruction), ref_(ref) { DCHECK(kEmitCompilerReadBarrier); } @@ -602,7 +612,8 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg())); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -621,40 +632,207 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(obj_.reg(), LR); - DCHECK_NE(obj_.reg(), WSP); - DCHECK_NE(obj_.reg(), WZR); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); // IP0 is used internally by the ReadBarrierMarkRegX entry point // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(obj_.reg(), IP0); - DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg(); + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in W0): // - // W0 <- obj + // W0 <- ref // W0 <- ReadBarrierMark(W0) - // obj <- W0 + // ref <- W0 // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg()); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); // This runtime call does not require a stack map. arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ B(GetExitLabel()); } private: - const Location obj_; + // The location (register) of the marked object reference. + const Location ref_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathARM64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp) + : SlowPathCodeARM64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathARM64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = WRegisterFrom(ref_); + DCHECK(locations->CanCall()); + DCHECK(ref_.IsRegister()) << ref_; + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use IP to save the old reference, as IP is + // used internally by the ReadBarrierMarkRegX entry point, and we + // need the old reference after the call to that entry point. + DCHECK_NE(LocationFrom(temp_).reg(), IP0); + __ Mov(temp_.W(), ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + DCHECK_NE(ref_.reg(), LR); + DCHECK_NE(ref_.reg(), WSP); + DCHECK_NE(ref_.reg(), WZR); + // IP0 is used internally by the ReadBarrierMarkRegX entry point + // as a temporary, it cannot be the entry point's input/output. + DCHECK_NE(ref_.reg(), IP0); + DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in W0): + // + // W0 <- ref + // W0 <- ReadBarrierMark(W0) + // ref <- W0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); + // This runtime call does not require a stack map. + arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LDXR/CMP/BNE sequence of instructions in the compare-and-set + // (CAS) operation below would abort the CAS, leaving the field + // as-is. + vixl::aarch64::Label done; + __ Cmp(temp_.W(), ref_reg); + __ B(eq, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + + // Convenience aliases. + Register base = obj_.W(); + Register offset = XRegisterFrom(field_offset_); + Register expected = temp_.W(); + Register value = ref_reg; + Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. + Register tmp_value = temps.AcquireW(); // Value in memory. + + __ Add(tmp_ptr, base.X(), Operand(offset)); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->PoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + arm64_codegen->GetAssembler()->PoisonHeapReference(value); + } + } + + // do { + // tmp_value = [tmp_ptr] - expected; + // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); + + vixl::aarch64::Label loop_head, comparison_failed, exit_loop; + __ Bind(&loop_head); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); + __ Cmp(tmp_value, expected); + __ B(&comparison_failed, ne); + __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); + __ Cbnz(tmp_value, &loop_head); + __ B(&exit_loop); + __ Bind(&comparison_failed); + __ Clrex(); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -768,7 +946,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset_, 0U); + DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); } } @@ -4098,7 +4276,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value(), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -4143,7 +4321,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, out.X(), offset, - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -4180,7 +4358,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { out_loc, out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex()), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -4319,8 +4497,9 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { GenerateGcRootFieldLoad(load, load->GetLocations()->Out(), temp, - /* placeholder */ 0u, - ldr_label); + /* offset placeholder */ 0u, + ldr_label, + kEmitCompilerReadBarrier); SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); codegen_->AddSlowPath(slow_path); @@ -5174,7 +5353,7 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0U; + size_t no_scale_factor = 0u; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -5225,7 +5404,8 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire) { + bool use_load_acquire, + bool always_update_field) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -5278,7 +5458,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The actual reference load. if (index.IsValid()) { - // Load types involving an "index". + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. if (use_load_acquire) { // UnsafeGetObjectVolatile intrinsic case. // Register `index` is not an index in an object array, but an @@ -5287,9 +5469,9 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* DCHECK(instruction->GetLocations()->Intrinsified()); DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0U); - DCHECK_EQ(scale_factor, 0U); - DCHECK_EQ(needs_null_check, 0U); + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); + DCHECK_EQ(needs_null_check, 0u); // /* HeapReference<Object> */ ref = *(obj + index) MemOperand field = HeapOperand(obj, XRegisterFrom(index)); LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); @@ -5300,10 +5482,10 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); Load(type, ref_reg, HeapOperand(obj, computed_offset)); } else { - Register temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); - temps.Release(temp2); + Register temp3 = temps.AcquireW(); + __ Add(temp3, obj, offset); + Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp3); } } } else { @@ -5320,8 +5502,19 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); // Slow path marking the object `ref` when it is gray. - SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + SlowPathCodeARM64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports + // address of the form `obj + field_offset`, where `obj` is a + // register and `field_offset` is a register. Thus `offset` and + // `scale_factor` above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, 0u); /* "times 1" */ + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64( + instruction, ref, obj, /* field_offset */ index, temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref); + } AddSlowPath(slow_path); // if (rb_state == ReadBarrier::gray_ptr_) diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index eb28ecb427..7f54b4b6b2 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -289,13 +289,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { // // root <- *(obj + offset) // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, vixl::aarch64::Register obj, uint32_t offset, - vixl::aarch64::Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + vixl::aarch64::Label* fixup_label, + bool requires_read_barrier); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -594,6 +594,13 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check); // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -602,7 +609,8 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire); + bool use_load_acquire, + bool always_update_field = false); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index cac0543da3..b9e049ae48 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -3357,7 +3357,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + kEmitCompilerReadBarrier); break; } case HLoadClass::LoadKind::kDexCacheViaMethod: { @@ -3369,7 +3370,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset); // /* GcRoot<mirror::Class> */ out = out[type_index] size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - GenerateGcRootFieldLoad(cls, out_loc, out, offset); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier); generate_null_check = !cls->IsInDexCache(); break; } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 1cd6184dd4..b0fa03899b 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -339,7 +339,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { Location root, vixl::aarch32::Register obj, uint32_t offset, - bool requires_read_barrier = kEmitCompilerReadBarrier); + bool requires_read_barrier); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 02c1c3b69f..efd33c7025 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -426,11 +426,25 @@ class ArraySetSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -438,9 +452,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Register reg = obj_.AsRegister<Register>(); + Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -455,44 +469,211 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(reg); + __ MaybeUnpoisonHeapReference(ref_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); - DCHECK_NE(reg, ESP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in EAX): // - // EAX <- obj + // EAX <- ref // EAX <- ReadBarrierMark(EAX) - // obj <- EAX + // ref <- EAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction, + Location ref, + Register obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + Register temp) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp_(temp) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + DCHECK_NE(ref_reg, ESP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in EAX): + // + // EAX <- ref + // EAX <- ReadBarrierMark(EAX) + // ref <- EAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp_, ref_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save EAX beforehand, and move the + // expected value (stored in `temp_`) into EAX. + __ pushl(EAX); + __ movl(EAX, temp_); + + // Convenience aliases. + Register base = obj_; + Register expected = EAX; + Register value = ref_reg; + + bool base_equals_value = (base == value); + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value` to a temporary register. This way, poisoning + // `value` won't invalidate `base`. + value = temp_; + __ movl(value, base); + } + + // Check that the register allocator did not assign the location + // of `expected` (EAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value, expected); + DCHECK_NE(base, expected); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(value); + } + + __ LockCmpxchgl(field_addr_, value); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(value); + } + // No need to unpoison `expected` (EAX), as it is be overwritten below. + } + + // Restore EAX. + __ popl(EAX); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const Register temp_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { public: @@ -5897,7 +6078,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -5929,7 +6110,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address::Absolute(address), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -5957,7 +6138,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), - /*fixup_label*/ nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -6099,7 +6280,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -6831,7 +7012,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruct // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()), @@ -6896,7 +7077,9 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location ref, Register obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + Register* temp) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6953,8 +7136,15 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index e7d9a43f58..1b51999546 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -259,12 +259,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + Label* fixup_label, + bool requires_read_barrier); // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not. // `is_wide` specifies whether it is long/double or not. @@ -499,13 +499,24 @@ class CodeGeneratorX86 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires a temporary register, which must be provided as a + // non-null pointer (`temp`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + Register* temp = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 4b64c1b6ff..fcabeeae5d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -445,11 +445,25 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); }; -// Slow path marking an object during a read barrier. +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { public: - ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison) - : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) { + ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, + Location ref, + bool unpoison_ref_before_marking) + : SlowPathCode(instruction), + ref_(ref), + unpoison_ref_before_marking_(unpoison_ref_before_marking) { DCHECK(kEmitCompilerReadBarrier); } @@ -457,10 +471,10 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - CpuRegister cpu_reg = obj_.AsRegister<CpuRegister>(); - Register reg = cpu_reg.AsRegister(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg)); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || @@ -475,44 +489,218 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { << instruction_->DebugName(); __ Bind(GetEntryLabel()); - if (unpoison_) { + if (unpoison_ref_before_marking_) { // Object* ref = ref_addr->AsMirrorPtr() - __ MaybeUnpoisonHeapReference(cpu_reg); + __ MaybeUnpoisonHeapReference(ref_cpu_reg); } // No need to save live registers; it's taken care of by the // entrypoint. Also, there is no need to update the stack mask, // as this runtime call will not trigger a garbage collection. CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); - DCHECK_NE(reg, RSP); - DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg; + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; // "Compact" slow path, saving two moves. // // Instead of using the standard runtime calling convention (input // and output in R0): // - // RDI <- obj + // RDI <- ref // RAX <- ReadBarrierMark(RDI) - // obj <- RAX + // ref <- RAX // - // we just use rX (the register holding `obj`) as input and output + // we just use rX (the register containing `ref`) as input and output // of a dedicated entrypoint: // // rX <- ReadBarrierMarkRegX(rX) // int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg); + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); // This runtime call does not require a stack map. x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); __ jmp(GetExitLabel()); } private: - const Location obj_; - const bool unpoison_; + // The location (register) of the marked object reference. + const Location ref_; + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); }; +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathX86_64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { + public: + ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& field_addr, + bool unpoison_ref_before_marking, + CpuRegister temp1, + CpuRegister temp2) + : SlowPathCode(instruction), + ref_(ref), + obj_(obj), + field_addr_(field_addr), + unpoison_ref_before_marking_(unpoison_ref_before_marking), + temp1_(temp1), + temp2_(temp2) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); + Register ref_reg = ref_cpu_reg.AsRegister(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + + __ Bind(GetEntryLabel()); + if (unpoison_ref_before_marking_) { + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_cpu_reg); + } + + // Save the old (unpoisoned) reference. + __ movl(temp1_, ref_cpu_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + DCHECK_NE(ref_reg, RSP); + DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in R0): + // + // RDI <- ref + // RAX <- ReadBarrierMark(RDI) + // ref <- RAX + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); + // This runtime call does not require a stack map. + x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*field_addr`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // LOCK CMPXCHGL instruction in the compare-and-set (CAS) + // operation below would abort the CAS, leaving the field as-is. + NearLabel done; + __ cmpl(temp1_, ref_cpu_reg); + __ j(kEqual, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achived + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + // This operation is implemented with a 32-bit LOCK CMPXLCHG + // instruction, which requires the expected value (the old + // reference) to be in EAX. Save RAX beforehand, and move the + // expected value (stored in `temp1_`) into EAX. + __ movq(temp2_, CpuRegister(RAX)); + __ movl(CpuRegister(RAX), temp1_); + + // Convenience aliases. + CpuRegister base = obj_; + CpuRegister expected = CpuRegister(RAX); + CpuRegister value = ref_cpu_reg; + + bool base_equals_value = (base.AsRegister() == value.AsRegister()); + Register value_reg = ref_reg; + if (kPoisonHeapReferences) { + if (base_equals_value) { + // If `base` and `value` are the same register location, move + // `value_reg` to a temporary register. This way, poisoning + // `value_reg` won't invalidate `base`. + value_reg = temp1_.AsRegister(); + __ movl(CpuRegister(value_reg), base); + } + + // Check that the register allocator did not assign the location + // of `expected` (RAX) to `value` nor to `base`, so that heap + // poisoning (when enabled) works as intended below. + // - If `value` were equal to `expected`, both references would + // be poisoned twice, meaning they would not be poisoned at + // all, as heap poisoning uses address negation. + // - If `base` were equal to `expected`, poisoning `expected` + // would invalidate `base`. + DCHECK_NE(value_reg, expected.AsRegister()); + DCHECK_NE(base.AsRegister(), expected.AsRegister()); + + __ PoisonHeapReference(expected); + __ PoisonHeapReference(CpuRegister(value_reg)); + } + + __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); + + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. + if (kPoisonHeapReferences) { + if (base_equals_value) { + // `value_reg` has been moved to a temporary register, no need + // to unpoison it. + } else { + __ UnpoisonHeapReference(CpuRegister(value_reg)); + } + // No need to unpoison `expected` (RAX), as it is be overwritten below. + } + + // Restore RAX. + __ movq(CpuRegister(RAX), temp2_); + + __ Bind(&done); + __ jmp(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const CpuRegister obj_; + // The address of the marked reference field. The base of this address must be `obj_`. + const Address field_addr_; + + // Should the reference in `ref_` be unpoisoned prior to marking it? + const bool unpoison_ref_before_marking_; + + const CpuRegister temp1_; + const CpuRegister temp2_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { public: @@ -4122,7 +4310,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( instruction, out, base, offset, /* needs_null_check */ true); if (is_volatile) { @@ -4569,7 +4757,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this - // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. + // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); } else { @@ -5318,7 +5506,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); break; } @@ -5343,7 +5531,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, address, - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); } else { // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address). @@ -5351,7 +5539,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { GenerateGcRootFieldLoad(cls, out_loc, Address(out, 0), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); } generate_null_check = !cls->IsInDexCache(); @@ -5379,7 +5567,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { cls, out_loc, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())), - /*fixup_label*/nullptr, + /* fixup_label */ nullptr, requires_read_barrier); generate_null_check = !cls->IsInDexCache(); break; @@ -5496,7 +5684,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { /* no_rip */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ - GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kEmitCompilerReadBarrier); SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); @@ -6264,7 +6452,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instr // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison */ false); + instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(), @@ -6330,7 +6518,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction Location ref, CpuRegister obj, const Address& src, - bool needs_null_check) { + bool needs_null_check, + bool always_update_field, + CpuRegister* temp1, + CpuRegister* temp2) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6387,8 +6578,16 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. // Slow path marking the object `ref` when it is gray. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison */ true); + SlowPathCode* slow_path; + if (always_update_field) { + DCHECK(temp1 != nullptr); + DCHECK(temp2 != nullptr); + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( + instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + instruction, ref, /* unpoison_ref_before_marking */ true); + } AddSlowPath(slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 57ef83f621..8b19dad0d0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -253,12 +253,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // // root <- *address // - // while honoring read barriers (if any). + // while honoring read barriers if `requires_read_barrier` is true. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, const Address& address, - Label* fixup_label = nullptr, - bool requires_read_barrier = kEmitCompilerReadBarrier); + Label* fixup_label, + bool requires_read_barrier); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); @@ -434,13 +434,25 @@ class CodeGeneratorX86_64 : public CodeGenerator { uint32_t data_offset, Location index, bool needs_null_check); - // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at address `src`, held by + // object `obj`, into `ref`, and mark it if needed. The base of + // address `src` must be `obj`. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). This operation + // requires two temporary registers, which must be provided as + // non-null pointers (`temp1` and `temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, CpuRegister obj, const Address& src, - bool needs_null_check); + bool needs_null_check, + bool always_update_field = false, + CpuRegister* temp1 = nullptr, + CpuRegister* temp2 = nullptr); // Generate a read barrier for a heap reference within `instruction` // using a slow path. diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index adfe09ba9f..9de521ad8d 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -18,6 +18,7 @@ #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/stl_util.h" #include "ssa_phi_elimination.h" namespace art { @@ -168,8 +169,7 @@ bool HDeadCodeElimination::SimplifyIfs() { bool simplified_one_or_more_ifs = false; bool rerun_dominance_and_loop_analysis = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { HInstruction* last = block->GetLastInstruction(); HInstruction* first = block->GetFirstInstruction(); if (last->IsIf() && @@ -271,20 +271,22 @@ bool HDeadCodeElimination::SimplifyIfs() { } void HDeadCodeElimination::ConnectSuccessiveBlocks() { - // Order does not matter. - for (HReversePostOrderIterator it(*graph_); !it.Done();) { - HBasicBlock* block = it.Current(); - if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) { - it.Advance(); - continue; - } - HBasicBlock* successor = block->GetSingleSuccessor(); - if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { - it.Advance(); - continue; + // Order does not matter. Skip the entry block by starting at index 1 in reverse post order. + for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) { + HBasicBlock* block = graph_->GetReversePostOrder()[i]; + DCHECK(!block->IsEntryBlock()); + while (block->GetLastInstruction()->IsGoto()) { + HBasicBlock* successor = block->GetSingleSuccessor(); + if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) { + break; + } + DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor)); + block->MergeWith(successor); + --size; + DCHECK_EQ(size, graph_->GetReversePostOrder().size()); + DCHECK_EQ(block, graph_->GetReversePostOrder()[i]); + // Reiterate on this block in case it can be merged with its new successor. } - block->MergeWith(successor); - // Reiterate on this block in case it can be merged with its new successor. } } @@ -300,8 +302,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { // Remove all dead blocks. Iterate in post order because removal needs the // block's chain of dominators and nested loops need to be updated from the // inside out. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { int id = block->GetBlockId(); if (!live_blocks.IsBitSet(id)) { MaybeRecordDeadBlock(block); @@ -332,8 +333,7 @@ bool HDeadCodeElimination::RemoveDeadBlocks() { void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that // a dead instruction depending on another dead instruction is removed. - for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { - HBasicBlock* block = b.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { // Traverse this block's instructions in backward order and remove // the unused ones. HBackwardInstructionIterator i(block->GetInstructions()); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 1e86b75075..f5931a2f81 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -411,8 +411,8 @@ void GlobalValueNumberer::Run() { // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 1d1921a246..f2602fbf8c 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -103,8 +103,7 @@ void HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. - for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { - HBasicBlock* graph_block = it_graph.Current(); + for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) { // Don't analyze irreducible loops. if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) { VisitLoop(graph_block->GetLoopInformation()); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9faa98a388..cc420b3260 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1219,16 +1219,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - HReversePostOrderIterator it(*callee_graph); - it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. size_t number_of_instructions = 0; bool can_inline_environment = total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + // Skip the entry block, it does not contain instructions that prevent inlining. + for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 613e00843f..c8c4ca76fd 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -81,8 +81,7 @@ void HInstructionBuilder::InitializeBlockLocals() { // locals (guaranteed by HGraphBuilder) and that all try blocks have been // visited already (from HTryBoundary scoping and reverse post order). bool catch_block_visited = false; - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : graph_->GetReversePostOrder()) { if (current == current_block_) { catch_block_visited = true; } else if (current->IsTryBlock()) { @@ -276,8 +275,8 @@ bool HInstructionBuilder::Build() { FindNativeDebugInfoLocations(native_debug_info_locations); } - for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) { - current_block_ = block_it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + current_block_ = block; uint32_t block_dex_pc = current_block_->GetDexPc(); InitializeBlockLocals(); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 3bb1c1dc21..e4d280f26d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -124,20 +124,16 @@ void InstructionSimplifier::Run() { void InstructionSimplifierVisitor::Run() { // Iterate in reverse post order to open up more simplifications to users // of instructions that got simplified. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) { + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // The simplification of an instruction to another instruction may yield // possibilities for other simplifications. So although we perform a reverse // post order visit, we sometimes need to revisit an instruction index. - simplification_occurred_ = false; - VisitBasicBlock(it.Current()); - if (simplification_occurred_ && - (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) { - // New simplifications may be applicable to the instruction at the - // current index, so don't advance the iterator. - continue; - } + do { + simplification_occurred_ = false; + VisitBasicBlock(block); + } while (simplification_occurred_ && + (simplifications_at_current_position_ < kMaxSamePositionSimplifications)); simplifications_at_current_position_ = 0; - it.Advance(); } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 8327a4c244..fc6ff7b197 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -133,8 +133,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { void IntrinsicsRecognizer::Run() { ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* inst = inst_it.Current(); diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 96a6ecbee9..8790c1e4f1 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -652,9 +652,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -663,7 +663,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. @@ -891,8 +891,13 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -901,36 +906,65 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + // Temporary registers used in CAS. In the object case + // (UnsafeCASObject intrinsic), these are also used for + // card-marking, and possibly for (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Pointer. locations->AddTemp(Location::RequiresRegister()); // Temp 1. } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) { DCHECK_NE(type, Primitive::kPrimLong); ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); // Boolean result. + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); // Boolean result. - Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. - Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). - Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. - Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Location offset_loc = locations->InAt(2); + Register offset = offset_loc.AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value = locations->InAt(4).AsRegister<Register>(); // Value. - Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. - Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + Location tmp_ptr_loc = locations->GetTemp(0); + Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>(); // Pointer to actual memory. + Register tmp = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null); + codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + tmp_ptr_loc, + /* needs_null_check */ false, + /* always_update_field */ true, + &tmp); + } } // Prevent reordering with prior memory operations. @@ -942,12 +976,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ add(tmp_ptr, base, ShifterOperand(offset)); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->PoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not poison `value_lo`, as it is the same register as - // `expected_lo`, which has just been poisoned. + __ PoisonHeapReference(expected); + if (value == expected) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. } else { - codegen->GetAssembler()->PoisonHeapReference(value_lo); + __ PoisonHeapReference(value); } } @@ -959,37 +993,29 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); - __ ldrex(tmp_lo, tmp_ptr); + __ ldrex(tmp, tmp_ptr); - __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + __ subs(tmp, tmp, ShifterOperand(expected)); __ it(EQ, ItState::kItT); - __ strex(tmp_lo, value_lo, tmp_ptr, EQ); - __ cmp(tmp_lo, ShifterOperand(1), EQ); + __ strex(tmp, value, tmp_ptr, EQ); + __ cmp(tmp, ShifterOperand(1), EQ); __ b(&loop_head, EQ); __ dmb(ISH); - __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ rsbs(out, tmp, ShifterOperand(1)); __ it(CC); __ mov(out, ShifterOperand(0), CC); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(expected_lo); - if (value_lo == expected_lo) { - // Do not unpoison `value_lo`, as it is the same register as - // `expected_lo`, which has just been unpoisoned. + __ UnpoisonHeapReference(expected); + if (value == expected) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. } else { - codegen->GetAssembler()->UnpoisonHeapReference(value_lo); + __ UnpoisonHeapReference(value); } } } @@ -998,33 +1024,23 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e2c1802fdc..db1c022868 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -863,9 +863,9 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, + /* offset */ 0u, /* index */ offset_loc, - /* scale_factor */ 0U, + /* scale_factor */ 0u, temp, /* needs_null_check */ false, is_volatile); @@ -880,7 +880,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == Primitive::kPrimNot) { DCHECK(trg.IsW()); - codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); } } } @@ -890,9 +890,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -901,7 +901,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1086,8 +1086,13 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke, Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -1096,20 +1101,29 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. - Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + // operations to potentially clobber the output. Likewise when + // emitting a (Baker) read barrier, which may call. + Location::OutputOverlap overlaps = + ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Temporary register for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { MacroAssembler* masm = codegen->GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); - Register out = WRegisterFrom(locations->Out()); // Boolean result. + Location out_loc = locations->Out(); + Register out = WRegisterFrom(out_loc); // Boolean result. Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Location offset_loc = locations->InAt(2); + Register offset = XRegisterFrom(offset_loc); // Long offset. Register expected = RegisterFrom(locations->InAt(3), type); // Expected. Register value = RegisterFrom(locations->InAt(4), type); // Value. @@ -1118,6 +1132,27 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Register temp = WRegisterFrom(locations->GetTemp(0)); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + /* scale_factor */ 0u, + temp, + /* needs_null_check */ false, + /* use_load_acquire */ false, + /* always_update_field */ true); + } } UseScratchRegisterScope temps(masm); @@ -1145,14 +1180,6 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat vixl::aarch64::Label loop_head, exit_loop; __ Bind(&loop_head); - // TODO: When `type == Primitive::kPrimNot`, add a read barrier for - // the reference stored in the object before attempting the CAS, - // similar to the one in the art::Unsafe_compareAndSwapObject JNI - // implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). - DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); @@ -1179,14 +1206,9 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -1194,22 +1216,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + GenCas(invoke, Primitive::kPrimNot, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index f41e4d95b5..aae3899847 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2056,9 +2056,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2076,7 +2076,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } } else { locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } } @@ -2255,10 +2255,16 @@ void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2278,7 +2284,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty // Force a byte register for the output. locations->SetOut(Location::RegisterLocation(EAX)); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. // Need a byte register for marking. locations->AddTemp(Location::RegisterLocation(ECX)); @@ -2294,14 +2301,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2317,7 +2319,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code Location out = locations->Out(); DCHECK_EQ(out.AsRegister<Register>(), EAX); + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + Location temp1_loc = locations->GetTemp(0); + Register temp1 = temp1_loc.AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register expected = locations->InAt(3).AsRegister<Register>(); // Ensure `expected` is in EAX (required by the CMPXCHG instruction). DCHECK_EQ(expected, EAX); @@ -2325,11 +2338,20 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), - locations->GetTemp(1).AsRegister<Register>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + temp1_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp2); + } bool base_equals_value = (base == value); if (kPoisonHeapReferences) { @@ -2337,7 +2359,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // If `base` and `value` are the same register location, move // `value` to a temporary register. This way, poisoning // `value` won't invalidate `base`. - value = locations->GetTemp(0).AsRegister<Register>(); + value = temp1; __ movl(value, base); } @@ -2356,19 +2378,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); + __ LockCmpxchgl(field_addr, value); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); @@ -2392,8 +2407,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // Ensure the expected value is in EAX (required by the CMPXCHG // instruction). DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), - locations->InAt(4).AsRegister<Register>()); + __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); } else if (type == Primitive::kPrimLong) { // Ensure the expected value is in EAX:EDX and that the new // value is in EBX:ECX (required by the CMPXCHG8B instruction). @@ -2401,7 +2415,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX); DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX); DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX); - __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0)); + __ LockCmpxchg8b(field_addr); } else { LOG(FATAL) << "Unexpected CAS type " << type; } @@ -2409,7 +2423,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we // don't need scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); } @@ -2424,14 +2438,9 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4b0afca122..cdef22f6de 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2172,9 +2172,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. @@ -2183,7 +2183,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), - can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { @@ -2333,10 +2333,16 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + Primitive::Type type, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -2347,7 +2353,8 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type ty locations->SetOut(Location::RequiresRegister()); if (type == Primitive::kPrimNot) { - // Need temp registers for card-marking. + // Need temporary registers for card-marking, and possibly for + // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } @@ -2362,14 +2369,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - if (kEmitCompilerReadBarrier) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { return; } @@ -2386,16 +2388,37 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // Ensure `expected` is in RAX (required by the CMPXCHG instruction). DCHECK_EQ(expected.AsRegister(), RAX); CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + Location out_loc = locations->Out(); + CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (type == Primitive::kPrimNot) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>(); + // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? - codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), - locations->GetTemp(1).AsRegister<CpuRegister>(), - base, - value, - value_can_be_null); + codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null); + + // The address of the field within the holding object. + Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + field_addr, + /* needs_null_check */ false, + /* always_update_field */ true, + &temp1, + &temp2); + } bool base_equals_value = (base.AsRegister() == value.AsRegister()); Register value_reg = value.AsRegister(); @@ -2404,7 +2427,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // If `base` and `value` are the same register location, move // `value_reg` to a temporary register. This way, poisoning // `value_reg` won't invalidate `base`. - value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister(); + value_reg = temp1.AsRegister(); __ movl(CpuRegister(value_reg), base); } @@ -2423,19 +2446,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } - // TODO: Add a read barrier for the reference stored in the object - // before attempting the CAS, similar to the one in the - // art::Unsafe_compareAndSwapObject JNI implementation. - // - // Note that this code is not (yet) used when read barriers are - // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). - DCHECK(!kEmitCompilerReadBarrier); - __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); + __ LockCmpxchgl(field_addr, CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); @@ -2468,7 +2484,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c // LOCK CMPXCHG has full barrier semantics, and we don't need // scheduling barriers at this time. - // Convert ZF into the boolean result. + // Convert ZF into the Boolean result. __ setcc(kZero, out); __ movzxb(out, out); } @@ -2483,14 +2499,9 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic is missing a read barrier, and - // therefore sometimes does not work as expected (b/25883050). - // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS). - // - // TODO(rpl): Implement read barrier support in GenCAS and re-enable - // this intrinsic. - DCHECK(!kEmitCompilerReadBarrier); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index a0ded74d6d..eb2d18dd88 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -15,6 +15,7 @@ */ #include "licm.h" + #include "side_effects_analysis.h" namespace art { @@ -90,8 +91,7 @@ void LICM::Run() { } // Post order visit to visit inner loops before outer loops. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->IsLoopHeader()) { // Only visit the loop when we reach the header. continue; diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc index 3af212fa48..80cecd41dc 100644 --- a/compiler/optimizing/linear_order.cc +++ b/compiler/optimizing/linear_order.cc @@ -94,8 +94,7 @@ void LinearizeGraph(const HGraph* graph, // for it. ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(), allocator->Adapter(kArenaAllocLinearOrder)); - for (HReversePostOrderIterator it(*graph); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph->GetReversePostOrder()) { size_t number_of_forward_predecessors = block->GetPredecessors().size(); if (block->IsLoopHeader()) { number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h index cdbdd0714b..7122d67be9 100644 --- a/compiler/optimizing/linear_order.h +++ b/compiler/optimizing/linear_order.h @@ -30,16 +30,12 @@ namespace art { // // for (HBasicBlock* block : linear_order) // linear order // -// for (HBasicBlock* block : LinearPostOrder(linear_order)) // linear post order +// for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order // void LinearizeGraph(const HGraph* graph, ArenaAllocator* allocator, ArenaVector<HBasicBlock*>* linear_order); -inline auto LinearPostOrder(const ArenaVector<HBasicBlock*>& linear_order) { - return MakeIterationRange(linear_order.rbegin(), linear_order.rend()); -} - } // namespace art #endif // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index bd74368e17..37b58ded59 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -56,8 +56,7 @@ static void TestCode(const uint16_t* data, const char* expected) { liveness.Analyze(); std::ostringstream buffer; - for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph->GetBlocks()) { buffer << "Block " << block->GetBlockId() << std::endl; size_t ssa_values = liveness.GetNumberOfSsaValues(); BitVector* live_in = liveness.GetLiveInSet(*block); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 820fa29597..b91e9e6868 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -1046,8 +1046,8 @@ void LoadStoreElimination::Run() { return; } HeapLocationCollector heap_location_collector(graph_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - heap_location_collector.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + heap_location_collector.VisitBasicBlock(block); } if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { // Bail out if there are too many heap locations to deal with. @@ -1065,8 +1065,8 @@ void LoadStoreElimination::Run() { } heap_location_collector.BuildAliasingMatrix(); LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - lse_visitor.VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + lse_visitor.VisitBasicBlock(block); } lse_visitor.RemoveInstructions(); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 59cc0091bf..45c7eb1a46 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -179,16 +179,16 @@ GraphAnalysisResult HGraph::BuildDominatorTree() { } void HGraph::ClearDominanceInformation() { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->ClearDominanceInformation(); + for (HBasicBlock* block : GetReversePostOrder()) { + block->ClearDominanceInformation(); } reverse_post_order_.clear(); } void HGraph::ClearLoopInformation() { SetHasIrreducibleLoops(false); - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - it.Current()->SetLoopInformation(nullptr); + for (HBasicBlock* block : GetReversePostOrder()) { + block->SetLoopInformation(nullptr); } } @@ -275,8 +275,7 @@ void HGraph::ComputeDominanceInformation() { bool update_occurred = true; while (update_occurred) { update_occurred = false; - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { update_occurred |= UpdateDominatorOfSuccessor(block, successor); } @@ -287,8 +286,7 @@ void HGraph::ComputeDominanceInformation() { // Make sure that there are no remaining blocks whose dominator information // needs to be updated. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { for (HBasicBlock* successor : block->GetSuccessors()) { DCHECK(!UpdateDominatorOfSuccessor(block, successor)); } @@ -297,8 +295,7 @@ void HGraph::ComputeDominanceInformation() { // Populate `dominated_blocks_` information after computing all dominators. // The potential presence of irreducible loops requires to do it after. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (!block->IsEntryBlock()) { block->GetDominator()->AddDominatedBlock(block); } @@ -375,8 +372,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { void HGraph::ComputeTryBlockInformation() { // Iterate in reverse post order to propagate try membership information from // predecessors to their successors. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetReversePostOrder()) { if (block->IsEntryBlock() || block->IsCatchBlock()) { // Catch blocks after simplification have only exceptional predecessors // and hence are never in tries. @@ -446,8 +442,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const { // We iterate post order to ensure we visit inner loops before outer loops. // `PopulateRecursive` needs this guarantee to know whether a natural loop // contains an irreducible loop. - for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetPostOrder()) { if (block->IsLoopHeader()) { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because @@ -1134,8 +1129,8 @@ void HGraphVisitor::VisitInsertionOrder() { } void HGraphVisitor::VisitReversePostOrder() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } } @@ -1986,10 +1981,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Update the environments in this graph to have the invoke's environment // as parent. { - HReversePostOrderIterator it(*this); - it.Advance(); // Skip the entry block, we do not need to update the entry's suspend check. - for (; !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + // Skip the entry block, we do not need to update the entry's suspend check. + for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) { for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { @@ -2070,8 +2063,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // Do a reverse post order of the blocks in the callee and do (1), (2), (3) // and (4) to the blocks that apply. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { - HBasicBlock* current = it.Current(); + for (HBasicBlock* current : GetReversePostOrder()) { if (current != exit_block_ && current != entry_block_ && current != first) { DCHECK(current->GetTryCatchInformation() == nullptr); DCHECK(current->GetGraph() == this); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 257ccea799..6a45149509 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -25,6 +25,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/array_ref.h" +#include "base/iteration_range.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" #include "dex_file.h" @@ -460,10 +461,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return reverse_post_order_; } + ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() { + DCHECK(GetReversePostOrder()[0] == entry_block_); + return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1); + } + + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const { + return ReverseRange(GetReversePostOrder()); + } + const ArenaVector<HBasicBlock*>& GetLinearOrder() const { return linear_order_; } + IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const { + return ReverseRange(GetLinearOrder()); + } + bool HasBoundsChecks() const { return has_bounds_checks_; } @@ -6618,58 +6632,6 @@ class HGraphDelegateVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor); }; -class HInsertionOrderIterator : public ValueObject { - public: - explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {} - - bool Done() const { return index_ == graph_.GetBlocks().size(); } - HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator); -}; - -class HReversePostOrderIterator : public ValueObject { - public: - explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == graph_.GetReversePostOrder().size(); } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; } - void Advance() { ++index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator); -}; - -class HPostOrderIterator : public ValueObject { - public: - explicit HPostOrderIterator(const HGraph& graph) - : graph_(graph), index_(graph_.GetReversePostOrder().size()) { - // Check that reverse post order of the graph has been built. - DCHECK(!graph.GetReversePostOrder().empty()); - } - - bool Done() const { return index_ == 0; } - HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; } - void Advance() { --index_; } - - private: - const HGraph& graph_; - size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator); -}; - // Iterator over the blocks that art part of the loop. Includes blocks part // of an inner loop. The order in which the blocks are iterated is on their // block id. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 7b66ef3627..0db60882db 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -20,8 +20,7 @@ namespace art { void PrepareForRegisterAllocation::Run() { // Order does not matter. - for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // No need to visit the phis. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index d93c9ddc39..d588deaace 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -123,8 +123,7 @@ void ReferenceTypePropagation::ValidateTypes() { // TODO: move this to the graph checker. if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { HInstruction* instr = iti.Current(); if (instr->GetType() == Primitive::kPrimNot) { @@ -158,8 +157,8 @@ void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + VisitBasicBlock(block); } ProcessWorklist(); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 961077419e..aa0d3710fa 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -758,7 +758,7 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { } void RegisterAllocatorGraphColor::ProcessInstructions() { - for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { // Note that we currently depend on this ordering, since some helper // code is designed for linear scan register allocation. for (HBackwardInstructionIterator instr_it(block->GetInstructions()); diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 4e69bc8999..1a391ce9bb 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -163,7 +163,7 @@ void RegisterAllocatorLinearScan::BlockRegisters(size_t start, size_t end, bool void RegisterAllocatorLinearScan::AllocateRegistersInternal() { // Iterate post-order, to ensure the list is sorted, and the last added interval // is the one with the lowest start position. - for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) { + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done(); back_it.Advance()) { ProcessInstruction(back_it.Current()); diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index e409035d9d..46d0d0eb65 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -76,8 +76,7 @@ void HSelectGenerator::Run() { // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. // Otherwise the order does not matter. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; // Find elements of the diamond pattern. diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 1dc69867b4..6d82e8e06d 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -26,8 +26,7 @@ void SideEffectsAnalysis::Run() { // In DEBUG mode, ensure side effects are properly initialized to empty. if (kIsDebugBuild) { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { SideEffects effects = GetBlockEffects(block); DCHECK(effects.DoesNothing()); if (block->IsLoopHeader()) { @@ -38,9 +37,7 @@ void SideEffectsAnalysis::Run() { } // Do a post order visit to ensure we visit a loop header after its loop body. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - + for (HBasicBlock* block : graph_->GetPostOrder()) { SideEffects effects = SideEffects::None(); // Update `effects` with the side effects of all instructions in this block. for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 03807ba1ee..ae1e369999 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -25,8 +25,8 @@ namespace art { void SsaBuilder::FixNullConstantType() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* equality_instr = it.Current(); if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) { continue; @@ -57,8 +57,8 @@ void SsaBuilder::FixNullConstantType() { void SsaBuilder::EquivalentPhisCleanup() { // The order doesn't matter here. - for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) { - for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) { + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); HPhi* next = phi->GetNextEquivalentPhiWithSameType(); if (next != nullptr) { @@ -79,8 +79,7 @@ void SsaBuilder::EquivalentPhisCleanup() { } void SsaBuilder::FixEnvironmentPhis() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { HPhi* phi = it_phis.Current()->AsPhi(); // If the phi is not dead, or has no environment uses, there is nothing to do. @@ -228,8 +227,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { void SsaBuilder::RunPrimitiveTypePropagation() { ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder)); - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { if (block->IsLoopHeader()) { for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HPhi* phi = phi_it.Current()->AsPhi(); diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 76cf8fe1ae..e8e12e1a55 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -139,7 +139,7 @@ static void RecursivelyProcessInputs(HInstruction* current, void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. - for (HBasicBlock* block : LinearPostOrder(graph_->GetLinearOrder())) { + for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) { BitVector* kill = GetKillSet(*block); BitVector* live_in = GetLiveInSet(*block); @@ -256,15 +256,13 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() { do { changed = false; - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - const HBasicBlock& block = *it.Current(); - + for (const HBasicBlock* block : graph_->GetPostOrder()) { // The live_in set depends on the kill set (which does not // change in this loop), and the live_out set. If the live_out // set does not change, there is no need to update the live_in set. - if (UpdateLiveOut(block) && UpdateLiveIn(block)) { + if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) { if (kIsDebugBuild) { - CheckNoLiveInIrreducibleLoop(block); + CheckNoLiveInIrreducibleLoop(*block); } changed = true; } diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index b1ec99ab8e..aec7a3c555 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -34,8 +34,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination)); // Add to the worklist phis referenced by non-phi instructions. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HPhi* phi = inst_it.Current()->AsPhi(); if (phi->IsDead()) { @@ -84,8 +83,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { // Remove phis that are not live. Visit in post order so that phis // that are not inputs of loop phis can be removed when they have // no users left (dead phis might use dead phis). - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetPostOrder()) { HInstruction* current = block->GetFirstPhi(); HInstruction* next = nullptr; HPhi* phi; @@ -119,8 +117,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { void SsaRedundantPhiElimination::Run() { // Add all phis in the worklist. Order does not matter for correctness, and // neither will necessarily converge faster. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { worklist_.push_back(inst_it.Current()->AsPhi()); } |