diff options
author | 2021-07-09 17:06:03 +0100 | |
---|---|---|
committer | 2022-06-08 14:06:51 +0000 | |
commit | c54cc7cb18262fe77ed8da6a56acbf00d5bc5a71 (patch) | |
tree | 1567e04711f45ddee7ab65c27a38fa9f7f17f7d5 /compiler/optimizing | |
parent | 4ea489a240cf6d241a7b0313c2981bf5f32f7d7f (diff) |
Revert^2 "ART: Removes SuspendCheck for plain
loops with a low trip count."
This change removes SuspendCheck for plain loops with a low trip count.
The SuspendCheck in the codegen makes sure that the thread can be
interrupted during execution for GC. Not being able to do so might
decrease the responsiveness of GC in the case when a very long loop
or a long recursion is being executed.
However, for plain loops with a small trip count, the removal of
SuspendCheck should not affect the GC's responsiveness by a large
margin. Consequently, since the thread won't be interrupted for
plain loops, it is assumed that the performance might increase
by removing SuspendCheck.
Also add explicit checks to existing code to ensure that SuspendCheck
is never null when it is used.
This reverts commit 8f6b99fba2d043265a84d599a967d52f66738ad6
Reason for revert: Included fix for CHAGuardVisitor::HoistGuard crash
by disabling codegen for optimized SuspendCheck nodes instead of
removing the SuspendCheck node itself.
Test: art/test.py -v -j12 --host --64 -t 2233-checker\
-remove-loop-suspend-check --run-test --optimizing
Change-Id: Id6296aded91e1cf49b8f3f339dc83613cbedf876
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/bounds_check_elimination.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/cha_guard_optimization.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/graph_checker.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 20 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 25 |
9 files changed, 112 insertions, 14 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index dad3c818fa..0b11a6fd1b 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1818,6 +1818,7 @@ class BCEVisitor : public HGraphVisitor { HInstruction* condition, bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); + DCHECK(suspend != nullptr); block->InsertInstructionBefore(condition, block->GetLastInstruction()); DeoptimizationKind kind = is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index c6232ef661..d231593792 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, block->RemoveInstruction(deopt); HInstruction* suspend = loop_info->GetSuspendCheck(); + DCHECK(suspend != nullptr); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 27eabafb8f..8bd4406332 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1123,7 +1123,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, for (HBasicBlock* block : graph.GetReversePostOrder()) { if (block->IsLoopHeader()) { HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); - if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2a0b481b2d..2f8c0b22e7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1974,6 +1974,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ B(codegen_->GetLabelOf(successor)); + } + return; + } + if (codegen_->CanUseImplicitSuspendCheck()) { __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister)); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index d1769cea0d..eda6363dda 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -674,13 +674,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } - if (loop_information->GetSuspendCheck() == nullptr) { - AddError(StringPrintf( - "Loop with header %d does not have a suspend check.", - loop_header->GetBlockId())); + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf("Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); } - if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { AddError(StringPrintf( "Loop header %d does not have the loop suspend check as the first instruction.", loop_header->GetBlockId())); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 2d7c20825c..604d3d2522 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -681,6 +681,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) { } // +// This optimization applies to loops with plain simple operations +// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count +// value. +// +bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (!graph_->SuspendChecksAreAllowedToNoOp()) { + return false; + } + + int64_t trip_count = analysis_info->GetTripCount(); + + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return false; + } + + int64_t instruction_count = analysis_info->GetNumberOfInstructions(); + int64_t total_instruction_count = trip_count * instruction_count; + + // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this + // optimization from being applied to loops that have calls. + bool can_optimize = + total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck && + !analysis_info->HasInstructionsPreventingScalarOpts(); + + if (!can_optimize) { + return false; + } + + // If we should do the optimization, disable codegen for the SuspendCheck. + if (generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + HBasicBlock* header = loop_info->GetHeader(); + HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck(); + // As other optimizations depend on SuspendCheck + // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of + // removing the SuspendCheck instruction. + instruction->SetIsNoOp(true); + } + + return true; +} + +// // Optimization. // @@ -824,7 +868,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); + return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node); } // @@ -928,7 +972,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g return true; } -bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { +bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { HLoopInformation* loop_info = node->loop_info; int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); LoopAnalysisInfo analysis_info(loop_info); @@ -941,10 +985,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && - !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) && + !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) { return false; } + // Try the suspend check removal even for non-clonable loops. Also this + // optimization doesn't interfere with other scalar loop optimizations so it can + // be done prior to them. + bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info); + // Run 'IsLoopClonable' the last as it might be time-consuming. if (!LoopClonerHelper::IsLoopClonable(loop_info)) { return false; @@ -952,7 +1002,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { return TryFullUnrolling(&analysis_info) || TryPeelingForLoopInvariantExitsElimination(&analysis_info) || - TryUnrollingForBranchPenaltyReduction(&analysis_info); + TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check; } // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index b17861648f..0535c74e91 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -47,6 +47,11 @@ class HLoopOptimization : public HOptimization { static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + // The maximum number of total instructions (trip_count * instruction_count), + // where the optimization of removing SuspendChecks from the loop header could + // be performed. + static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128; + private: /** * A single loop inside the loop hierarchy representation. @@ -179,8 +184,19 @@ class HLoopOptimization : public HOptimization { // should be actually applied. bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); - // Tries to apply scalar loop peeling and unrolling. - bool TryPeelingAndUnrolling(LoopNode* node); + // Tries to remove SuspendCheck for plain loops with a low trip count. The + // SuspendCheck in the codegen makes sure that the thread can be interrupted + // during execution for GC. Not being able to do so might decrease the + // responsiveness of GC when a very long loop or a long recursion is being + // executed. However, for plain loops with a small trip count, the removal of + // SuspendCheck should not affect the GC's responsiveness by a large margin. + // Consequently, since the thread won't be interrupted for plain loops, it is + // assumed that the performance might increase by removing SuspendCheck. + bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop optimizations. + bool TryLoopScalarOpts(LoopNode* node); // // Vectorization analysis and synthesis. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d35ed1c543..90c8f748a5 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -3047,6 +3047,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc()); new_header->AddInstruction(suspend_check); new_body->AddInstruction(new (allocator_) HGoto()); + DCHECK(loop->GetSuspendCheck() != nullptr); suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment( loop->GetSuspendCheck()->GetEnvironment(), header); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7a0059f616..0767bf5580 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -678,6 +678,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return cha_single_implementation_list_; } + // In case of OSR we intend to use SuspendChecks as an entry point to the + // function; for debuggable graphs we might deoptimize to interpreter from + // SuspendChecks. In these cases we should always generate code for them. + bool SuspendChecksAreAllowedToNoOp() const { + return !IsDebuggable() && !IsCompilingOsr(); + } + void AddCHASingleImplementationDependency(ArtMethod* method) { cha_single_implementation_list_.insert(method); } @@ -719,7 +726,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false); } - uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; } void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } @@ -6714,9 +6721,10 @@ class HBoundsCheck final : public HExpression<2> { class HSuspendCheck final : public HExpression<0> { public: - explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) + explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false) : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { + SetPackedFlag<kFlagIsNoOp>(is_no_op); } bool IsClonable() const override { return true; } @@ -6725,6 +6733,10 @@ class HSuspendCheck final : public HExpression<0> { return true; } + void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); } + bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); } + + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } SlowPathCode* GetSlowPath() const { return slow_path_; } @@ -6733,6 +6745,15 @@ class HSuspendCheck final : public HExpression<0> { protected: DEFAULT_COPY_CONSTRUCTOR(SuspendCheck); + // True if the HSuspendCheck should not emit any code during codegen. It is + // not possible to simply remove this instruction to disable codegen, as + // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on + // HSuspendCheck being present in every loop. + static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1; + static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); + private: // Only used for code generation, in order to share the same slow path between back edges // of a same loop. |