diff options
Diffstat (limited to 'compiler')
22 files changed, 255 insertions, 56 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 51fbaea519..08670a0d82 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1305,4 +1305,18 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); } +uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetSlowPathFlagOffset().Uint32Value(); +} + +uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); +} + } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 6e75e3bb2e..82a54d2ed1 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -469,6 +469,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void GenerateNop() = 0; + uint32_t GetReferenceSlowFlagOffset() const; + uint32_t GetReferenceDisableFlagOffset() const; + protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 50892a9d48..bdbafcdf6a 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4308,16 +4308,18 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. - if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { - int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); - __ movl(temp, Address(ESP, stack_offset)); - return temp; + if (slow_path != nullptr) { + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; + } } return location.AsRegister<Register>(); } -void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: @@ -4366,6 +4368,11 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, break; } } + return callee_method; +} + +void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { + Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index fe7d3ed85c..98dc8ca280 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -398,6 +398,7 @@ class CodeGeneratorX86 : public CodeGenerator { MethodReference target_method) OVERRIDE; // Generate a call to a static or direct method. + Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; // Generate a call to a virtual method. void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 56c5b06945..30eca2c103 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -762,10 +762,9 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati } } -void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp) { // All registers are assumed to be correctly set up. - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: @@ -815,6 +814,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo break; } } + return callee_method; +} + +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + Location temp) { + // All registers are assumed to be correctly set up. + Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index d9908bb961..7cf12459b0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -394,6 +394,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method) OVERRIDE; + Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 568b8a8df6..2038c88e55 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -549,26 +549,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - if (IsPass(LICM::kLoopInvariantCodeMotionPassName) - || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName) - || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName) - || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName) - || IsPass(RegisterAllocator::kRegisterAllocatorPassName) - || IsPass(HGraphBuilder::kBuilderPassName)) { - HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); - if (info == nullptr) { - StartAttributeStream("loop") << "none"; + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info == nullptr) { + StartAttributeStream("loop") << "none"; + } else { + StartAttributeStream("loop") << "B" << loop_info->GetHeader()->GetBlockId(); + HLoopInformation* outer = loop_info->GetPreHeader()->GetLoopInformation(); + if (outer != nullptr) { + StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId(); } else { - StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); - HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation(); - if (outer != nullptr) { - StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId(); - } else { - StartAttributeStream("outer_loop") << "none"; - } - StartAttributeStream("irreducible") - << std::boolalpha << info->IsIrreducible() << std::noboolalpha; + StartAttributeStream("outer_loop") << "none"; } + StartAttributeStream("irreducible") + << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } if ((IsPass(HGraphBuilder::kBuilderPassName) diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index d0d52bf6cc..1e86b75075 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -454,11 +454,16 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { if (!set->IsEmpty()) { if (block->IsLoopHeader()) { - if (block->GetLoopInformation()->IsIrreducible()) { + if (block->GetLoopInformation()->ContainsIrreducibleLoop()) { // To satisfy our linear scan algorithm, no instruction should flow in an irreducible - // loop header. + // loop header. We clear the set at entry of irreducible loops and any loop containing + // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction + // across the irreducible loop. + // Note that, if we're not compiling OSR, we could still do GVN and introduce + // phis at irreducible loop headers. We decided it was not worth the complexity. set->Clear(); } else { + DCHECK(!block->GetLoopInformation()->IsIrreducible()); DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); set->Kill(side_effects_.GetLoopEffects(block)); } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index d0edecae22..5c4736109f 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2631,8 +2631,65 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } +void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) { + if (kEmitCompilerReadBarrier) { + // Do not intrinsify this call with the read barrier configuration. + return; + } + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { + DCHECK(!kEmitCompilerReadBarrier); + LocationSummary* locations = invoke->GetLocations(); + X86Assembler* assembler = GetAssembler(); + + Register obj = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + + // Load ArtMethod first. + HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(invoke_direct != nullptr); + Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( + invoke_direct, locations->GetTemp(0)); + DCHECK(temp_loc.Equals(locations->GetTemp(0))); + Register temp = temp_loc.AsRegister<Register>(); + + // Now get declaring class. + __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); + + uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); + uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); + DCHECK_NE(slow_path_flag_offset, 0u); + DCHECK_NE(disable_flag_offset, 0u); + DCHECK_NE(slow_path_flag_offset, disable_flag_offset); + + // Check static flags preventing us for using intrinsic. + if (slow_path_flag_offset == disable_flag_offset + 1) { + __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } else { + __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Fast path. + __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ Bind(slow_path->GetExitLabel()); +} + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4ee2368ff5..a65e54cf28 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2719,7 +2719,64 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) +void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) { + if (kEmitCompilerReadBarrier) { + // Do not intrinsify this call with the read barrier configuration. + return; + } + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { + DCHECK(!kEmitCompilerReadBarrier); + LocationSummary* locations = invoke->GetLocations(); + X86_64Assembler* assembler = GetAssembler(); + + CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + + // Load ArtMethod first. + HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); + DCHECK(invoke_direct != nullptr); + Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( + invoke_direct, locations->GetTemp(0)); + DCHECK(temp_loc.Equals(locations->GetTemp(0))); + CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); + + // Now get declaring class. + __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); + + uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); + uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); + DCHECK_NE(slow_path_flag_offset, 0u); + DCHECK_NE(disable_flag_offset, 0u); + DCHECK_NE(slow_path_flag_offset, disable_flag_offset); + + // Check static flags preventing us for using intrinsic. + if (slow_path_flag_offset == disable_flag_offset + 1) { + __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } else { + __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); + __ j(kNotEqual, slow_path->GetEntryLabel()); + } + + // Fast path. + __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ Bind(slow_path->GetExitLabel()); +} + UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 5a0b89c90a..7543cd6c54 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -101,16 +101,6 @@ void LICM::Run() { SideEffects loop_effects = side_effects_.GetLoopEffects(block); HBasicBlock* pre_header = loop_info->GetPreHeader(); - bool contains_irreducible_loop = false; - if (graph_->HasIrreducibleLoops()) { - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) { - contains_irreducible_loop = true; - break; - } - } - } - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { HBasicBlock* inner = it_loop.Current(); DCHECK(inner->IsInLoop()); @@ -123,11 +113,12 @@ void LICM::Run() { visited->SetBit(inner->GetBlockId()); } - if (contains_irreducible_loop) { + if (loop_info->ContainsIrreducibleLoop()) { // We cannot licm in an irreducible loop, or in a natural loop containing an // irreducible loop. continue; } + DCHECK(!loop_info->IsIrreducible()); // We can move an instruction that can throw only if it is the first // throwing instruction in the loop. Note that the first potentially diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 1e6bf07e42..60329ccff2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -446,8 +446,10 @@ void HGraph::SimplifyCFG() { } GraphAnalysisResult HGraph::AnalyzeLoops() const { - // Order does not matter. - for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { + // We iterate post order to ensure we visit inner loops before outer loops. + // `PopulateRecursive` needs this guarantee to know whether a natural loop + // contains an irreducible loop. + for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { if (block->IsCatchBlock()) { @@ -580,6 +582,14 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { blocks_.SetBit(block->GetBlockId()); block->SetInLoop(this); + if (block->IsLoopHeader()) { + // We're visiting loops in post-order, so inner loops must have been + // populated already. + DCHECK(block->GetLoopInformation()->IsPopulated()); + if (block->GetLoopInformation()->IsIrreducible()) { + contains_irreducible_loop_ = true; + } + } for (HBasicBlock* predecessor : block->GetPredecessors()) { PopulateRecursive(predecessor); } @@ -683,6 +693,7 @@ void HLoopInformation::Populate() { } if (is_irreducible_loop) { irreducible_ = true; + contains_irreducible_loop_ = true; graph->SetHasIrreducibleLoops(true); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 934d355e82..12ea059d3f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -650,6 +650,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { : header_(header), suspend_check_(nullptr), irreducible_(false), + contains_irreducible_loop_(false), back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)), // Make bit vector growable, as the number of blocks may change. blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) { @@ -657,6 +658,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { } bool IsIrreducible() const { return irreducible_; } + bool ContainsIrreducibleLoop() const { return contains_irreducible_loop_; } void Dump(std::ostream& os); @@ -727,6 +729,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { bool HasBackEdgeNotDominatedByHeader() const; + bool IsPopulated() const { + return blocks_.GetHighestBitSet() != -1; + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -735,6 +741,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { HBasicBlock* header_; HSuspendCheck* suspend_check_; bool irreducible_; + bool contains_irreducible_loop_; ArenaVector<HBasicBlock*> back_edges_; ArenaBitVector blocks_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index b1f9cbcdfa..4405b803e0 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1773,7 +1773,9 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, // therefore will not have a location for that instruction for `to`. // Because the instruction is a constant or the ArtMethod, we don't need to // do anything: it will be materialized in the irreducible loop. - DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)) + << defined_by->DebugName() << ":" << defined_by->GetId() + << " " << from->GetBlockId() << " -> " << to->GetBlockId(); return; } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 5534aeac29..36e0d993d1 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -309,17 +309,8 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) { - // To satisfy our liveness algorithm, we need to ensure loop headers of - // irreducible loops do not have any live-in instructions, except constants - // and the current method, which can be trivially re-materialized. - for (uint32_t idx : live_in->Indexes()) { - HInstruction* instruction = GetInstructionFromSsaIndex(idx); - DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName(); - DCHECK(!instruction->IsParameterValue()); - DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant()) - << instruction->DebugName(); - } + if (kIsDebugBuild) { + CheckNoLiveInIrreducibleLoop(*block); } size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range @@ -344,6 +335,9 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() { // change in this loop), and the live_out set. If the live_out // set does not change, there is no need to update the live_in set. if (UpdateLiveOut(block) && UpdateLiveIn(block)) { + if (kIsDebugBuild) { + CheckNoLiveInIrreducibleLoop(block); + } changed = true; } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 1141fd1c76..1fcba8bc77 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -1260,6 +1260,23 @@ class SsaLivenessAnalysis : public ValueObject { return instruction->GetType() == Primitive::kPrimNot; } + void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const { + if (!block.IsLoopHeader() || !block.GetLoopInformation()->IsIrreducible()) { + return; + } + BitVector* live_in = GetLiveInSet(block); + // To satisfy our liveness algorithm, we need to ensure loop headers of + // irreducible loops do not have any live-in instructions, except constants + // and the current method, which can be trivially re-materialized. + for (uint32_t idx : live_in->Indexes()) { + HInstruction* instruction = GetInstructionFromSsaIndex(idx); + DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName(); + DCHECK(!instruction->IsParameterValue()); + DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant()) + << instruction->DebugName(); + } + } + HGraph* const graph_; CodeGenerator* const codegen_; ArenaVector<BlockInfo*> block_infos_; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 2203646e77..84cdb7d4d3 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1030,6 +1030,14 @@ void X86Assembler::xchgl(Register reg, const Address& address) { } +void X86Assembler::cmpb(const Address& address, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x80); + EmitOperand(7, address); + EmitUint8(imm.value() & 0xFF); +} + + void X86Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 8567ad2a17..bc46e9f7c9 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -479,6 +479,7 @@ class X86Assembler FINAL : public Assembler { void xchgl(Register dst, Register src); void xchgl(Register reg, const Address& address); + void cmpb(const Address& address, const Immediate& imm); void cmpw(const Address& address, const Immediate& imm); void cmpl(Register reg, const Immediate& imm); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 1d1df6e447..28043c9380 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -389,4 +389,10 @@ TEST_F(AssemblerX86Test, NearLabel) { DriverStr(expected, "near_label"); } +TEST_F(AssemblerX86Test, Cmpb) { + GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0)); + const char* expected = "cmpb $0, 128(%EDI)\n"; + DriverStr(expected, "cmpb"); +} + } // namespace art diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 32eb4a37bf..5e7b587e40 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1224,6 +1224,16 @@ void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) { } +void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); + EmitOptionalRex32(address); + EmitUint8(0x80); + EmitOperand(7, address); + EmitUint8(imm.value() & 0xFF); +} + + void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_int32()); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 92c7d0ab99..720a402b5f 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -506,6 +506,7 @@ class X86_64Assembler FINAL : public Assembler { void xchgq(CpuRegister dst, CpuRegister src); void xchgl(CpuRegister reg, const Address& address); + void cmpb(const Address& address, const Immediate& imm); void cmpw(const Address& address, const Immediate& imm); void cmpl(CpuRegister reg, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index afe9207eb1..9dccc9f21f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1637,4 +1637,11 @@ TEST_F(AssemblerX86_64Test, Repecmpsq) { DriverStr(expected, "Repecmpsq"); } +TEST_F(AssemblerX86_64Test, Cmpb) { + GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128), + x86_64::Immediate(0)); + const char* expected = "cmpb $0, 128(%RDI)\n"; + DriverStr(expected, "cmpb"); +} + } // namespace art |