summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator.cc14
-rw-r--r--compiler/optimizing/code_generator.h3
-rw-r--r--compiler/optimizing/code_generator_x86.cc19
-rw-r--r--compiler/optimizing/code_generator_x86.h1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc12
-rw-r--r--compiler/optimizing/code_generator_x86_64.h1
-rw-r--r--compiler/optimizing/graph_visualizer.cc29
-rw-r--r--compiler/optimizing/gvn.cc9
-rw-r--r--compiler/optimizing/intrinsics_x86.cc59
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc59
-rw-r--r--compiler/optimizing/licm.cc13
-rw-r--r--compiler/optimizing/nodes.cc15
-rw-r--r--compiler/optimizing/nodes.h7
-rw-r--r--compiler/optimizing/register_allocator.cc4
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc16
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h17
-rw-r--r--compiler/utils/x86/assembler_x86.cc8
-rw-r--r--compiler/utils/x86/assembler_x86.h1
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc6
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc10
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h1
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc7
22 files changed, 255 insertions, 56 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 51fbaea519..08670a0d82 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1305,4 +1305,18 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
locations->AddTemp(Location::RequiresRegister());
}
+uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+ DCHECK(klass->IsInitialized());
+ return klass->GetSlowPathFlagOffset().Uint32Value();
+}
+
+uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
+ DCHECK(klass->IsInitialized());
+ return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 6e75e3bb2e..82a54d2ed1 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -469,6 +469,9 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual void GenerateNop() = 0;
+ uint32_t GetReferenceSlowFlagOffset() const;
+ uint32_t GetReferenceDisableFlagOffset() const;
+
protected:
// Method patch info used for recording locations of required linker patches and
// target methods. The target method can be used for various purposes, whether for
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 50892a9d48..bdbafcdf6a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4308,16 +4308,18 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr
// save one load. However, since this is just an intrinsic slow path we prefer this
// simple and more robust approach rather that trying to determine if that's the case.
SlowPathCode* slow_path = GetCurrentSlowPath();
- DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
- if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
- int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
- __ movl(temp, Address(ESP, stack_offset));
- return temp;
+ if (slow_path != nullptr) {
+ if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+ int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+ __ movl(temp, Address(ESP, stack_offset));
+ return temp;
+ }
}
return location.AsRegister<Register>();
}
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -4366,6 +4368,11 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
break;
}
}
+ return callee_method;
+}
+
+void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+ Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index fe7d3ed85c..98dc8ca280 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -398,6 +398,7 @@ class CodeGeneratorX86 : public CodeGenerator {
MethodReference target_method) OVERRIDE;
// Generate a call to a static or direct method.
+ Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
// Generate a call to a virtual method.
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 56c5b06945..30eca2c103 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -762,10 +762,9 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati
}
}
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
- Location temp) {
+Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp) {
// All registers are assumed to be correctly set up.
-
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
@@ -815,6 +814,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
break;
}
}
+ return callee_method;
+}
+
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+ Location temp) {
+ // All registers are assumed to be correctly set up.
+ Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d9908bb961..7cf12459b0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -394,6 +394,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
MethodReference target_method) OVERRIDE;
+ Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 568b8a8df6..2038c88e55 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -549,26 +549,19 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
}
- if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
- || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)
- || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName)
- || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName)
- || IsPass(RegisterAllocator::kRegisterAllocatorPassName)
- || IsPass(HGraphBuilder::kBuilderPassName)) {
- HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
- if (info == nullptr) {
- StartAttributeStream("loop") << "none";
+ HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ if (loop_info == nullptr) {
+ StartAttributeStream("loop") << "none";
+ } else {
+ StartAttributeStream("loop") << "B" << loop_info->GetHeader()->GetBlockId();
+ HLoopInformation* outer = loop_info->GetPreHeader()->GetLoopInformation();
+ if (outer != nullptr) {
+ StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
} else {
- StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
- HLoopInformation* outer = info->GetPreHeader()->GetLoopInformation();
- if (outer != nullptr) {
- StartAttributeStream("outer_loop") << "B" << outer->GetHeader()->GetBlockId();
- } else {
- StartAttributeStream("outer_loop") << "none";
- }
- StartAttributeStream("irreducible")
- << std::boolalpha << info->IsIrreducible() << std::noboolalpha;
+ StartAttributeStream("outer_loop") << "none";
}
+ StartAttributeStream("irreducible")
+ << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
}
if ((IsPass(HGraphBuilder::kBuilderPassName)
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index d0d52bf6cc..1e86b75075 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -454,11 +454,16 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
if (!set->IsEmpty()) {
if (block->IsLoopHeader()) {
- if (block->GetLoopInformation()->IsIrreducible()) {
+ if (block->GetLoopInformation()->ContainsIrreducibleLoop()) {
// To satisfy our linear scan algorithm, no instruction should flow in an irreducible
- // loop header.
+ // loop header. We clear the set at entry of irreducible loops and any loop containing
+ // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction
+ // across the irreducible loop.
+ // Note that, if we're not compiling OSR, we could still do GVN and introduce
+ // phis at irreducible loop headers. We decided it was not worth the complexity.
set->Clear();
} else {
+ DCHECK(!block->GetLoopInformation()->IsIrreducible());
DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader());
set->Kill(side_effects_.GetLoopEffects(block));
}
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d0edecae22..5c4736109f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2631,8 +2631,65 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke)
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
+void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
+ if (kEmitCompilerReadBarrier) {
+ // Do not intrinsify this call with the read barrier configuration.
+ return;
+ }
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
+ DCHECK(!kEmitCompilerReadBarrier);
+ LocationSummary* locations = invoke->GetLocations();
+ X86Assembler* assembler = GetAssembler();
+
+ Register obj = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ // Load ArtMethod first.
+ HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(invoke_direct != nullptr);
+ Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+ invoke_direct, locations->GetTemp(0));
+ DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+ Register temp = temp_loc.AsRegister<Register>();
+
+ // Now get declaring class.
+ __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+ uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+ uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+ DCHECK_NE(slow_path_flag_offset, 0u);
+ DCHECK_NE(disable_flag_offset, 0u);
+ DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+ // Check static flags preventing us for using intrinsic.
+ if (slow_path_flag_offset == disable_flag_offset + 1) {
+ __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ } else {
+ __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
+ // Fast path.
+ __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ Bind(slow_path->GetExitLabel());
+}
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4ee2368ff5..a65e54cf28 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2719,7 +2719,64 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok
GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
-UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
+void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+ if (kEmitCompilerReadBarrier) {
+ // Do not intrinsify this call with the read barrier configuration.
+ return;
+ }
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
+ DCHECK(!kEmitCompilerReadBarrier);
+ LocationSummary* locations = invoke->GetLocations();
+ X86_64Assembler* assembler = GetAssembler();
+
+ CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ // Load ArtMethod first.
+ HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
+ DCHECK(invoke_direct != nullptr);
+ Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
+ invoke_direct, locations->GetTemp(0));
+ DCHECK(temp_loc.Equals(locations->GetTemp(0)));
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+
+ // Now get declaring class.
+ __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+ uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
+ uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
+ DCHECK_NE(slow_path_flag_offset, 0u);
+ DCHECK_NE(disable_flag_offset, 0u);
+ DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
+
+ // Check static flags preventing us for using intrinsic.
+ if (slow_path_flag_offset == disable_flag_offset + 1) {
+ __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ } else {
+ __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
+
+ // Fast path.
+ __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ Bind(slow_path->GetExitLabel());
+}
+
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 5a0b89c90a..7543cd6c54 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -101,16 +101,6 @@ void LICM::Run() {
SideEffects loop_effects = side_effects_.GetLoopEffects(block);
HBasicBlock* pre_header = loop_info->GetPreHeader();
- bool contains_irreducible_loop = false;
- if (graph_->HasIrreducibleLoops()) {
- for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
- if (it_loop.Current()->GetLoopInformation()->IsIrreducible()) {
- contains_irreducible_loop = true;
- break;
- }
- }
- }
-
for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
HBasicBlock* inner = it_loop.Current();
DCHECK(inner->IsInLoop());
@@ -123,11 +113,12 @@ void LICM::Run() {
visited->SetBit(inner->GetBlockId());
}
- if (contains_irreducible_loop) {
+ if (loop_info->ContainsIrreducibleLoop()) {
// We cannot licm in an irreducible loop, or in a natural loop containing an
// irreducible loop.
continue;
}
+ DCHECK(!loop_info->IsIrreducible());
// We can move an instruction that can throw only if it is the first
// throwing instruction in the loop. Note that the first potentially
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 1e6bf07e42..60329ccff2 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -446,8 +446,10 @@ void HGraph::SimplifyCFG() {
}
GraphAnalysisResult HGraph::AnalyzeLoops() const {
- // Order does not matter.
- for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
+ // We iterate post order to ensure we visit inner loops before outer loops.
+ // `PopulateRecursive` needs this guarantee to know whether a natural loop
+ // contains an irreducible loop.
+ for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
if (block->IsLoopHeader()) {
if (block->IsCatchBlock()) {
@@ -580,6 +582,14 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
blocks_.SetBit(block->GetBlockId());
block->SetInLoop(this);
+ if (block->IsLoopHeader()) {
+ // We're visiting loops in post-order, so inner loops must have been
+ // populated already.
+ DCHECK(block->GetLoopInformation()->IsPopulated());
+ if (block->GetLoopInformation()->IsIrreducible()) {
+ contains_irreducible_loop_ = true;
+ }
+ }
for (HBasicBlock* predecessor : block->GetPredecessors()) {
PopulateRecursive(predecessor);
}
@@ -683,6 +693,7 @@ void HLoopInformation::Populate() {
}
if (is_irreducible_loop) {
irreducible_ = true;
+ contains_irreducible_loop_ = true;
graph->SetHasIrreducibleLoops(true);
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 934d355e82..12ea059d3f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -650,6 +650,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
: header_(header),
suspend_check_(nullptr),
irreducible_(false),
+ contains_irreducible_loop_(false),
back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)),
// Make bit vector growable, as the number of blocks may change.
blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) {
@@ -657,6 +658,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
}
bool IsIrreducible() const { return irreducible_; }
+ bool ContainsIrreducibleLoop() const { return contains_irreducible_loop_; }
void Dump(std::ostream& os);
@@ -727,6 +729,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
bool HasBackEdgeNotDominatedByHeader() const;
+ bool IsPopulated() const {
+ return blocks_.GetHighestBitSet() != -1;
+ }
+
private:
// Internal recursive implementation of `Populate`.
void PopulateRecursive(HBasicBlock* block);
@@ -735,6 +741,7 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> {
HBasicBlock* header_;
HSuspendCheck* suspend_check_;
bool irreducible_;
+ bool contains_irreducible_loop_;
ArenaVector<HBasicBlock*> back_edges_;
ArenaBitVector blocks_;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index b1f9cbcdfa..4405b803e0 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1773,7 +1773,9 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
// therefore will not have a location for that instruction for `to`.
// Because the instruction is a constant or the ArtMethod, we don't need to
// do anything: it will be materialized in the irreducible loop.
- DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by));
+ DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by))
+ << defined_by->DebugName() << ":" << defined_by->GetId()
+ << " " << from->GetBlockId() << " -> " << to->GetBlockId();
return;
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 5534aeac29..36e0d993d1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -309,17 +309,8 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (block->IsLoopHeader()) {
- if (kIsDebugBuild && block->GetLoopInformation()->IsIrreducible()) {
- // To satisfy our liveness algorithm, we need to ensure loop headers of
- // irreducible loops do not have any live-in instructions, except constants
- // and the current method, which can be trivially re-materialized.
- for (uint32_t idx : live_in->Indexes()) {
- HInstruction* instruction = GetInstructionFromSsaIndex(idx);
- DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
- DCHECK(!instruction->IsParameterValue());
- DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
- << instruction->DebugName();
- }
+ if (kIsDebugBuild) {
+ CheckNoLiveInIrreducibleLoop(*block);
}
size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
// For all live_in instructions at the loop header, we need to create a range
@@ -344,6 +335,9 @@ void SsaLivenessAnalysis::ComputeLiveInAndLiveOutSets() {
// change in this loop), and the live_out set. If the live_out
// set does not change, there is no need to update the live_in set.
if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+ if (kIsDebugBuild) {
+ CheckNoLiveInIrreducibleLoop(block);
+ }
changed = true;
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 1141fd1c76..1fcba8bc77 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -1260,6 +1260,23 @@ class SsaLivenessAnalysis : public ValueObject {
return instruction->GetType() == Primitive::kPrimNot;
}
+ void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const {
+ if (!block.IsLoopHeader() || !block.GetLoopInformation()->IsIrreducible()) {
+ return;
+ }
+ BitVector* live_in = GetLiveInSet(block);
+ // To satisfy our liveness algorithm, we need to ensure loop headers of
+ // irreducible loops do not have any live-in instructions, except constants
+ // and the current method, which can be trivially re-materialized.
+ for (uint32_t idx : live_in->Indexes()) {
+ HInstruction* instruction = GetInstructionFromSsaIndex(idx);
+ DCHECK(instruction->GetBlock()->IsEntryBlock()) << instruction->DebugName();
+ DCHECK(!instruction->IsParameterValue());
+ DCHECK(instruction->IsCurrentMethod() || instruction->IsConstant())
+ << instruction->DebugName();
+ }
+ }
+
HGraph* const graph_;
CodeGenerator* const codegen_;
ArenaVector<BlockInfo*> block_infos_;
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 2203646e77..84cdb7d4d3 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1030,6 +1030,14 @@ void X86Assembler::xchgl(Register reg, const Address& address) {
}
+void X86Assembler::cmpb(const Address& address, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x80);
+ EmitOperand(7, address);
+ EmitUint8(imm.value() & 0xFF);
+}
+
+
void X86Assembler::cmpw(const Address& address, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8567ad2a17..bc46e9f7c9 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -479,6 +479,7 @@ class X86Assembler FINAL : public Assembler {
void xchgl(Register dst, Register src);
void xchgl(Register reg, const Address& address);
+ void cmpb(const Address& address, const Immediate& imm);
void cmpw(const Address& address, const Immediate& imm);
void cmpl(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 1d1df6e447..28043c9380 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -389,4 +389,10 @@ TEST_F(AssemblerX86Test, NearLabel) {
DriverStr(expected, "near_label");
}
+TEST_F(AssemblerX86Test, Cmpb) {
+ GetAssembler()->cmpb(x86::Address(x86::EDI, 128), x86::Immediate(0));
+ const char* expected = "cmpb $0, 128(%EDI)\n";
+ DriverStr(expected, "cmpb");
+}
+
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 32eb4a37bf..5e7b587e40 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1224,6 +1224,16 @@ void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
}
+void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ CHECK(imm.is_int32());
+ EmitOptionalRex32(address);
+ EmitUint8(0x80);
+ EmitOperand(7, address);
+ EmitUint8(imm.value() & 0xFF);
+}
+
+
void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
CHECK(imm.is_int32());
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 92c7d0ab99..720a402b5f 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -506,6 +506,7 @@ class X86_64Assembler FINAL : public Assembler {
void xchgq(CpuRegister dst, CpuRegister src);
void xchgl(CpuRegister reg, const Address& address);
+ void cmpb(const Address& address, const Immediate& imm);
void cmpw(const Address& address, const Immediate& imm);
void cmpl(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index afe9207eb1..9dccc9f21f 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1637,4 +1637,11 @@ TEST_F(AssemblerX86_64Test, Repecmpsq) {
DriverStr(expected, "Repecmpsq");
}
+TEST_F(AssemblerX86_64Test, Cmpb) {
+ GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
+ x86_64::Immediate(0));
+ const char* expected = "cmpb $0, 128(%RDI)\n";
+ DriverStr(expected, "cmpb");
+}
+
} // namespace art