diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/bounds_check_elimination.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/cha_guard_optimization.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/graph_checker.cc | 11 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 58 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 20 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 25 |
9 files changed, 112 insertions, 14 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index dad3c818fa..0b11a6fd1b 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1818,6 +1818,7 @@ class BCEVisitor : public HGraphVisitor { HInstruction* condition, bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); + DCHECK(suspend != nullptr); block->InsertInstructionBefore(condition, block->GetLastInstruction()); DeoptimizationKind kind = is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index c6232ef661..d231593792 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -200,6 +200,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, block->RemoveInstruction(deopt); HInstruction* suspend = loop_info->GetSuspendCheck(); + DCHECK(suspend != nullptr); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 27eabafb8f..8bd4406332 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1123,7 +1123,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, for (HBasicBlock* block : graph.GetReversePostOrder()) { if (block->IsLoopHeader()) { HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); - if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + if (suspend_check != nullptr && !suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { loop_headers.push_back(suspend_check); } } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2a0b481b2d..2f8c0b22e7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1974,6 +1974,13 @@ bool CodeGeneratorARM64::CanUseImplicitSuspendCheck() const { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { + if (instruction->IsNoOp()) { + if (successor != nullptr) { + __ B(codegen_->GetLabelOf(successor)); + } + return; + } + if (codegen_->CanUseImplicitSuspendCheck()) { __ Ldr(kImplicitSuspendCheckRegister, MemOperand(kImplicitSuspendCheckRegister)); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index d1769cea0d..eda6363dda 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -674,13 +674,14 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { loop_information->GetPreHeader()->GetSuccessors().size())); } - if (loop_information->GetSuspendCheck() == nullptr) { - AddError(StringPrintf( - "Loop with header %d does not have a suspend check.", - loop_header->GetBlockId())); + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() == nullptr) { + AddError(StringPrintf("Loop with header %d does not have a suspend check.", + loop_header->GetBlockId())); } - if (loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { + if (!GetGraph()->SuspendChecksAreAllowedToNoOp() && + loop_information->GetSuspendCheck() != loop_header->GetFirstInstructionDisregardMoves()) { AddError(StringPrintf( "Loop header %d does not have the loop suspend check as the first instruction.", loop_header->GetBlockId())); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 2d7c20825c..604d3d2522 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -681,6 +681,50 @@ void HLoopOptimization::CalculateAndSetTryCatchKind(LoopNode* node) { } // +// This optimization applies to loops with plain simple operations +// (I.e. no calls to java code or runtime) with a known small trip_count * instr_count +// value. +// +bool HLoopOptimization::TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (!graph_->SuspendChecksAreAllowedToNoOp()) { + return false; + } + + int64_t trip_count = analysis_info->GetTripCount(); + + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return false; + } + + int64_t instruction_count = analysis_info->GetNumberOfInstructions(); + int64_t total_instruction_count = trip_count * instruction_count; + + // The inclusion of the HasInstructionsPreventingScalarOpts() prevents this + // optimization from being applied to loops that have calls. + bool can_optimize = + total_instruction_count <= HLoopOptimization::kMaxTotalInstRemoveSuspendCheck && + !analysis_info->HasInstructionsPreventingScalarOpts(); + + if (!can_optimize) { + return false; + } + + // If we should do the optimization, disable codegen for the SuspendCheck. + if (generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + HBasicBlock* header = loop_info->GetHeader(); + HSuspendCheck* instruction = header->GetLoopInformation()->GetSuspendCheck(); + // As other optimizations depend on SuspendCheck + // (e.g: CHAGuardVisitor::HoistGuard), disable its codegen instead of + // removing the SuspendCheck instruction. + instruction->SetIsNoOp(true); + } + + return true; +} + +// // Optimization. // @@ -824,7 +868,7 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); + return TryOptimizeInnerLoopFinite(node) || TryLoopScalarOpts(node); } // @@ -928,7 +972,7 @@ bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool g return true; } -bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { +bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) { HLoopInformation* loop_info = node->loop_info; int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); LoopAnalysisInfo analysis_info(loop_info); @@ -941,10 +985,16 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && - !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false) && + !TryToRemoveSuspendCheckFromLoopHeader(&analysis_info, /*generate_code*/ false)) { return false; } + // Try the suspend check removal even for non-clonable loops. Also this + // optimization doesn't interfere with other scalar loop optimizations so it can + // be done prior to them. + bool removed_suspend_check = TryToRemoveSuspendCheckFromLoopHeader(&analysis_info); + // Run 'IsLoopClonable' the last as it might be time-consuming. if (!LoopClonerHelper::IsLoopClonable(loop_info)) { return false; @@ -952,7 +1002,7 @@ bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { return TryFullUnrolling(&analysis_info) || TryPeelingForLoopInvariantExitsElimination(&analysis_info) || - TryUnrollingForBranchPenaltyReduction(&analysis_info); + TryUnrollingForBranchPenaltyReduction(&analysis_info) || removed_suspend_check; } // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index b17861648f..0535c74e91 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -47,6 +47,11 @@ class HLoopOptimization : public HOptimization { static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + // The maximum number of total instructions (trip_count * instruction_count), + // where the optimization of removing SuspendChecks from the loop header could + // be performed. + static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128; + private: /** * A single loop inside the loop hierarchy representation. @@ -179,8 +184,19 @@ class HLoopOptimization : public HOptimization { // should be actually applied. bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); - // Tries to apply scalar loop peeling and unrolling. - bool TryPeelingAndUnrolling(LoopNode* node); + // Tries to remove SuspendCheck for plain loops with a low trip count. The + // SuspendCheck in the codegen makes sure that the thread can be interrupted + // during execution for GC. Not being able to do so might decrease the + // responsiveness of GC when a very long loop or a long recursion is being + // executed. However, for plain loops with a small trip count, the removal of + // SuspendCheck should not affect the GC's responsiveness by a large margin. + // Consequently, since the thread won't be interrupted for plain loops, it is + // assumed that the performance might increase by removing SuspendCheck. + bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop optimizations. + bool TryLoopScalarOpts(LoopNode* node); // // Vectorization analysis and synthesis. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d35ed1c543..90c8f748a5 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -3047,6 +3047,7 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc()); new_header->AddInstruction(suspend_check); new_body->AddInstruction(new (allocator_) HGoto()); + DCHECK(loop->GetSuspendCheck() != nullptr); suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment( loop->GetSuspendCheck()->GetEnvironment(), header); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7a0059f616..0767bf5580 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -678,6 +678,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return cha_single_implementation_list_; } + // In case of OSR we intend to use SuspendChecks as an entry point to the + // function; for debuggable graphs we might deoptimize to interpreter from + // SuspendChecks. In these cases we should always generate code for them. + bool SuspendChecksAreAllowedToNoOp() const { + return !IsDebuggable() && !IsCompilingOsr(); + } + void AddCHASingleImplementationDependency(ArtMethod* method) { cha_single_implementation_list_.insert(method); } @@ -719,7 +726,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return ReferenceTypeInfo::Create(handle_cache_.GetObjectClassHandle(), /* is_exact= */ false); } - uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + uint32_t GetNumberOfCHAGuards() const { return number_of_cha_guards_; } void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } @@ -6714,9 +6721,10 @@ class HBoundsCheck final : public HExpression<2> { class HSuspendCheck final : public HExpression<0> { public: - explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) + explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc, bool is_no_op = false) : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { + SetPackedFlag<kFlagIsNoOp>(is_no_op); } bool IsClonable() const override { return true; } @@ -6725,6 +6733,10 @@ class HSuspendCheck final : public HExpression<0> { return true; } + void SetIsNoOp(bool is_no_op) { SetPackedFlag<kFlagIsNoOp>(is_no_op); } + bool IsNoOp() const { return GetPackedFlag<kFlagIsNoOp>(); } + + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } SlowPathCode* GetSlowPath() const { return slow_path_; } @@ -6733,6 +6745,15 @@ class HSuspendCheck final : public HExpression<0> { protected: DEFAULT_COPY_CONSTRUCTOR(SuspendCheck); + // True if the HSuspendCheck should not emit any code during codegen. It is + // not possible to simply remove this instruction to disable codegen, as + // other optimizations (e.g: CHAGuardVisitor::HoistGuard) depend on + // HSuspendCheck being present in every loop. + static constexpr size_t kFlagIsNoOp = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfSuspendCheckPackedBits = kFlagIsNoOp + 1; + static_assert(kNumberOfSuspendCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); + private: // Only used for code generation, in order to share the same slow path between back edges // of a same loop. |