diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/debug/elf_debug_info_writer.h | 3 | ||||
| -rw-r--r-- | compiler/debug/elf_debug_line_writer.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_builder.cc | 32 | ||||
| -rw-r--r-- | compiler/optimizing/loop_analysis.cc | 56 | ||||
| -rw-r--r-- | compiler/optimizing/loop_analysis.h | 53 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 118 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.h | 15 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 13 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler_stats.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_unit_test.h | 6 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_builder.cc | 79 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_builder.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_test.cc | 78 |
16 files changed, 292 insertions, 194 deletions
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index f2a942f34a..bda7108c74 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -208,8 +208,7 @@ class ElfCompilationUnitWriter { std::vector<DexRegisterMap> dex_reg_maps; if (accessor.HasCodeItem() && mi->code_info != nullptr) { code_info.reset(new CodeInfo(mi->code_info)); - for (size_t s = 0; s < code_info->GetNumberOfStackMaps(); ++s) { - const StackMap stack_map = code_info->GetStackMapAt(s); + for (StackMap stack_map : code_info->GetStackMaps()) { dex_reg_maps.push_back(code_info->GetDexRegisterMapOf(stack_map)); } } diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index a7adab5506..3d78943cd0 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -101,9 +101,7 @@ class ElfDebugLineWriter { // Use stack maps to create mapping table from pc to dex. const CodeInfo code_info(mi->code_info); pc2dex_map.reserve(code_info.GetNumberOfStackMaps()); - for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) { - StackMap stack_map = code_info.GetStackMapAt(s); - DCHECK(stack_map.IsValid()); + for (StackMap stack_map : code_info.GetStackMaps()) { const uint32_t pc = stack_map.GetNativePcOffset(isa); const int32_t dex = stack_map.GetDexPc(); pc2dex_map.push_back({pc, dex}); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 7d918c47ca..731accd692 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -1308,29 +1308,25 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference); // Replacing the NewInstance might render it redundant. Keep a list of these - // to be visited once it is clear whether it is has remaining uses. + // to be visited once it is clear whether it has remaining uses. if (arg_this->IsNewInstance()) { ssa_builder_->AddUninitializedString(arg_this->AsNewInstance()); + // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. + for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { + if ((*current_locals_)[vreg] == arg_this) { + (*current_locals_)[vreg] = invoke; + } + } } else { - // The only reason a HPhi can flow in a String.<init> is when there is an - // irreducible loop, which will create HPhi for all dex registers at loop entry. DCHECK(arg_this->IsPhi()); - // TODO(b/109666561): Re-enable. - // DCHECK(graph_->HasIrreducibleLoops()); - // Don't bother compiling a method in that situation. While we could look at all - // phis related to the HNewInstance, it's not worth the trouble. - MaybeRecordStat(compilation_stats_, - MethodCompilationStat::kNotCompiledIrreducibleAndStringInit); - return false; + // We can get a phi as input of a String.<init> if there is a loop between the + // allocation and the String.<init> call. As we don't know which other phis might alias + // with `arg_this`, we keep a record of these phis and will analyze their inputs and + // uses once the inputs and users are populated (in ssa_builder.cc). + // Note: we only do this for phis, as it is a somewhat more expensive operation than + // what we're doing above when the input is the `HNewInstance`. + ssa_builder_->AddUninitializedStringPhi(arg_this->AsPhi(), invoke); } - - // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. - for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { - if ((*current_locals_)[vreg] == arg_this) { - (*current_locals_)[vreg] = invoke; - } - } - return true; } diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc index a2124455e2..efb23e7d3e 100644 --- a/compiler/optimizing/loop_analysis.cc +++ b/compiler/optimizing/loop_analysis.cc @@ -17,19 +17,34 @@ #include "loop_analysis.h" #include "base/bit_vector-inl.h" +#include "induction_var_range.h" namespace art { void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results) { + LoopAnalysisInfo* analysis_results, + int64_t trip_count) { + analysis_results->trip_count_ = trip_count; + for (HBlocksInLoopIterator block_it(*loop_info); !block_it.Done(); block_it.Advance()) { HBasicBlock* block = block_it.Current(); + // Check whether one of the successor is loop exit. for (HBasicBlock* successor : block->GetSuccessors()) { if (!loop_info->Contains(*successor)) { analysis_results->exits_num_++; + + // We track number of invariant loop exits which correspond to HIf instruction and + // can be eliminated by loop peeling; other control flow instruction are ignored and will + // not cause loop peeling to happen as they either cannot be inside a loop, or by + // definition cannot be loop exits (unconditional instructions), or are not beneficial for + // the optimization. + HIf* hif = block->GetLastInstruction()->AsIf(); + if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { + analysis_results->invariant_exits_num_++; + } } } @@ -48,20 +63,13 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, } } -bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) { - HGraph* graph = loop_info->GetHeader()->GetGraph(); - for (uint32_t block_id : loop_info->GetBlocks().Indexes()) { - HBasicBlock* block = graph->GetBlocks()[block_id]; - DCHECK(block != nullptr); - if (block->EndsWithIf()) { - HIf* hif = block->GetLastInstruction()->AsIf(); - HInstruction* input = hif->InputAt(0); - if (IsLoopExit(loop_info, hif) && !loop_info->Contains(*input->GetBlock())) { - return true; - } - } +int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range) { + int64_t trip_count; + if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) { + trip_count = LoopAnalysisInfo::kUnknownTripCount; } - return false; + return trip_count; } // Default implementation of loop helper; used for all targets unless a custom implementation @@ -77,18 +85,22 @@ class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper { // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6; - bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE { - return loop_analysis_info->HasLongTypeInstructions() || - IsLoopTooBig(loop_analysis_info, + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const OVERRIDE { + return analysis_info->HasLongTypeInstructions() || + IsLoopTooBig(analysis_info, kScalarHeuristicMaxBodySizeInstr, kScalarHeuristicMaxBodySizeBlocks); } - uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count) const OVERRIDE { + uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const OVERRIDE { + int64_t trip_count = analysis_info->GetTripCount(); + // Unroll only loops with known trip count. + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor; if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } return desired_unrolling_factor; @@ -136,12 +148,12 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper { // TODO: Unroll loops with unknown trip count. DCHECK_NE(vector_length, 0u); if (trip_count < (2 * vector_length + max_peel)) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Don't unroll for large loop body size. uint32_t instruction_count = block->GetInstructions().CountSize(); if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } // Find a beneficial unroll factor with the following restrictions: // - At least one iteration of the transformed loop should be executed. diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h index 7f321b73c8..bcb7b70494 100644 --- a/compiler/optimizing/loop_analysis.h +++ b/compiler/optimizing/loop_analysis.h @@ -21,26 +21,33 @@ namespace art { +class InductionVarRange; class LoopAnalysis; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; - // Class to hold cached information on properties of the loop. class LoopAnalysisInfo : public ValueObject { public: + // No loop unrolling factor (just one copy of the loop-body). + static constexpr uint32_t kNoUnrollingFactor = 1; + // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount). + static constexpr int64_t kUnknownTripCount = -1; + explicit LoopAnalysisInfo(HLoopInformation* loop_info) - : bb_num_(0), + : trip_count_(kUnknownTripCount), + bb_num_(0), instr_num_(0), exits_num_(0), + invariant_exits_num_(0), has_instructions_preventing_scalar_peeling_(false), has_instructions_preventing_scalar_unrolling_(false), has_long_type_instructions_(false), loop_info_(loop_info) {} + int64_t GetTripCount() const { return trip_count_; } size_t GetNumberOfBasicBlocks() const { return bb_num_; } size_t GetNumberOfInstructions() const { return instr_num_; } size_t GetNumberOfExits() const { return exits_num_; } + size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; } bool HasInstructionsPreventingScalarPeeling() const { return has_instructions_preventing_scalar_peeling_; @@ -50,19 +57,27 @@ class LoopAnalysisInfo : public ValueObject { return has_instructions_preventing_scalar_unrolling_; } + bool HasInstructionsPreventingScalarOpts() const { + return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling(); + } + bool HasLongTypeInstructions() const { return has_long_type_instructions_; } - const HLoopInformation* GetLoopInfo() const { return loop_info_; } + HLoopInformation* GetLoopInfo() const { return loop_info_; } private: + // Trip count of the loop if known, kUnknownTripCount otherwise. + int64_t trip_count_; // Number of basic blocks in the loop body. size_t bb_num_; // Number of instructions in the loop body. size_t instr_num_; // Number of loop's exits. size_t exits_num_; + // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant. + size_t invariant_exits_num_; // Whether the loop has instructions which make scalar loop peeling non-beneficial. bool has_instructions_preventing_scalar_peeling_; // Whether the loop has instructions which make scalar loop unrolling non-beneficial. @@ -72,7 +87,7 @@ class LoopAnalysisInfo : public ValueObject { bool has_long_type_instructions_; // Corresponding HLoopInformation. - const HLoopInformation* loop_info_; + HLoopInformation* loop_info_; friend class LoopAnalysis; }; @@ -84,20 +99,12 @@ class LoopAnalysis : public ValueObject { // Calculates loops basic properties like body size, exits number, etc. and fills // 'analysis_results' with this information. static void CalculateLoopBasicProperties(HLoopInformation* loop_info, - LoopAnalysisInfo* analysis_results); + LoopAnalysisInfo* analysis_results, + int64_t trip_count); - // Returns whether the loop has at least one loop invariant exit. - static bool HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info); - - // Returns whether HIf's true or false successor is outside the specified loop. - // - // Prerequisite: HIf must be in the specified loop. - static bool IsLoopExit(HLoopInformation* loop_info, const HIf* hif) { - DCHECK(loop_info->Contains(*hif->GetBlock())); - HBasicBlock* true_succ = hif->IfTrueSuccessor(); - HBasicBlock* false_succ = hif->IfFalseSuccessor(); - return (!loop_info->Contains(*true_succ) || !loop_info->Contains(*false_succ)); - } + // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise. + static int64_t GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range); private: // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. @@ -143,9 +150,9 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { // Returns optimal scalar unrolling factor for the loop. // // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. - virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED, - uint64_t trip_count ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + virtual uint32_t GetScalarUnrollingFactor( + const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; } // Returns whether scalar loop peeling is enabled, @@ -160,7 +167,7 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { int64_t trip_count ATTRIBUTE_UNUSED, uint32_t max_peel ATTRIBUTE_UNUSED, uint32_t vector_length ATTRIBUTE_UNUSED) const { - return kNoUnrollingFactor; + return LoopAnalysisInfo::kNoUnrollingFactor; } }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 72aa25302e..440cd3351e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -744,64 +744,74 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { } bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { - return TryOptimizeInnerLoopFinite(node) || - TryPeelingForLoopInvariantExitsElimination(node) || - TryUnrollingForBranchPenaltyReduction(node); + return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); } // -// Loop unrolling: generic part methods. +// Scalar loop peeling and unrolling: generic part methods. // -bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) { - // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) - // as InstructionSet is needed. - if (compiler_options_ == nullptr) { +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (analysis_info->GetNumberOfExits() > 1) { return false; } - HLoopInformation* loop_info = node->loop_info; - int64_t trip_count = 0; - // Only unroll loops with a known tripcount. - if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) { + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info); + if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) { return false; } - uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count); - if (unrolling_factor == kNoUnrollingFactor) { - return false; - } + if (generate_code) { + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); + // Perform unrolling. + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelUnrollSimpleHelper helper(loop_info); + helper.DoUnrolling(); - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - (loop_analysis_info.GetNumberOfExits() > 1) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { - return false; + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); } + return true; +} - // TODO: support other unrolling factors. - DCHECK_EQ(unrolling_factor, 2u); +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } - // Perform unrolling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoUnrolling(); + if (analysis_info->GetNumberOfInvariantExits() == 0) { + return false; + } - // Remove the redundant loop check after unrolling. - HIf* copy_hif = - helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); - int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; - copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + if (generate_code) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info); + helper.DoPeeling(); + + // Statically evaluate loop check after peeling for loop invariant condition. + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + } return true; } -bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) { +bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) // as InstructionSet is needed. if (compiler_options_ == nullptr) { @@ -809,35 +819,27 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod } HLoopInformation* loop_info = node->loop_info; - // Check 'IsLoopClonable' the last as it might be time-consuming. - if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); + LoopAnalysisInfo analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count); + + if (analysis_info.HasInstructionsPreventingScalarOpts() || + arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) { return false; } - LoopAnalysisInfo loop_analysis_info(loop_info); - LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info); - - // Check "IsLoopClonable" last as it can be time-consuming. - if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() || - arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) || - !LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) || - !PeelUnrollHelper::IsLoopClonable(loop_info)) { + if (!TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { return false; } - // Perform peeling. - PeelUnrollSimpleHelper helper(loop_info); - helper.DoPeeling(); - - const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); - for (auto entry : *hir_map) { - HInstruction* copy = entry.second; - if (copy->IsIf()) { - TryToEvaluateIfCondition(copy->AsIf(), graph_); - } + // Run 'IsLoopClonable' the last as it might be time-consuming. + if (!PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; } - return true; + return TryPeelingForLoopInvariantExitsElimination(&analysis_info) || + TryUnrollingForBranchPenaltyReduction(&analysis_info); } // @@ -1076,7 +1078,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -1103,7 +1105,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Link reductions to their final uses. diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 9743b25259..bc4792458b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -144,12 +144,19 @@ class HLoopOptimization : public HOptimization { bool OptimizeInnerLoop(LoopNode* node); // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling - // opportunities. Returns whether transformation happened. - bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node); + // opportunities. Returns whether transformation happened. 'generate_code' determines whether the + // optimization should be actually applied. + bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code = true); // Tries to apply loop peeling for loop invariant exits elimination. Returns whether - // transformation happened. - bool TryPeelingForLoopInvariantExitsElimination(LoopNode* loop_node); + // transformation happened. 'generate_code' determines whether the optimization should be + // actually applied. + bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply scalar loop peeling and unrolling. + bool TryPeelingAndUnrolling(LoopNode* node); // // Vectorization analysis and synthesis. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 661f66a34c..d243331dbe 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1305,6 +1305,19 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* } } +void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HEnvironment*>& uses = GetEnvUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user->GetHolder())) { + user->ReplaceInput(replacement, index); + } + } +} + void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { HUserRecord<HInstruction*> input_use = InputRecordAt(index); if (input_use.GetInstruction() == replacement) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 825779989c..cd8d07a17a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2217,6 +2217,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void ReplaceWith(HInstruction* instruction); void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bb33ba3564..5352f26e46 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -848,23 +848,23 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, case kAnalysisSkipped: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSkipped); - } break; + } case kAnalysisInvalidBytecode: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledInvalidBytecode); - } break; + } case kAnalysisFailThrowCatchLoop: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledThrowCatchLoop); - } break; + } case kAnalysisFailAmbiguousArrayOp: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledAmbiguousArrayOp); - } break; + } case kAnalysisSuccess: UNREACHABLE(); } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index f246228074..9a26f2f6c4 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -50,7 +50,6 @@ enum class MethodCompilationStat { kNotCompiledThrowCatchLoop, kNotCompiledAmbiguousArrayOp, kNotCompiledHugeMethod, - kNotCompiledIrreducibleAndStringInit, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, kNotCompiledNoCodegen, diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index e44d7b82e5..f903f82d50 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -130,10 +130,12 @@ class OptimizingUnitTestHelper { // Create the dex file based on the fake data. Call the constructor so that we can use virtual // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. dex_files_.emplace_back(new StandardDexFile( - std::make_unique<NonOwningDexFileContainer>(dex_data, sizeof(StandardDexFile::Header)), + dex_data, + sizeof(StandardDexFile::Header), "no_location", /*location_checksum*/ 0, - /*oat_dex_file*/ nullptr)); + /*oat_dex_file*/ nullptr, + /*container*/ nullptr)); return new (allocator) HGraph( allocator, diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dd54468217..dda29a1b4b 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -440,6 +440,62 @@ static bool HasAliasInEnvironments(HInstruction* instruction) { return false; } +void SsaBuilder::ReplaceUninitializedStringPhis() { + ScopedArenaHashSet<HInstruction*> seen_instructions( + local_allocator_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HInstruction*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); + + // Iterate over all inputs and uses of the phi, recursively, until all related instructions + // have been visited. + for (const auto& pair : uninitialized_string_phis_) { + HPhi* string_phi = pair.first; + HInvoke* invoke = pair.second; + worklist.push_back(string_phi); + HNewInstance* found_instance = nullptr; + do { + HInstruction* current = worklist.back(); + worklist.pop_back(); + if (seen_instructions.find(current) != seen_instructions.end()) { + continue; + } + seen_instructions.insert(current); + if (current->IsNewInstance()) { + // If it is the first time we see the allocation, replace its uses. We don't register + // it through `RemoveRedundantUninitializedStrings`, as that method makes assumption about + // aliasing and environment uses that don't hold when the string escapes to phis. + // Note that this also means we will keep the (useless) allocation. + if (found_instance == nullptr) { + found_instance = current->AsNewInstance(); + } else { + DCHECK(found_instance == current); + } + } else if (current->IsPhi()) { + // Push all inputs to the worklist. Those should be Phis or NewInstance. + for (HInstruction* input : current->GetInputs()) { + DCHECK(input->IsPhi() || input->IsNewInstance()) << input->DebugName(); + worklist.push_back(input); + } + } else { + // The verifier prevents any other DEX uses of the uninitialized string. + DCHECK(current->IsEqual() || current->IsNotEqual()); + continue; + } + current->ReplaceUsesDominatedBy(invoke, invoke); + current->ReplaceEnvUsesDominatedBy(invoke, invoke); + // Push all users to the worklist. Now that we have replaced + // the uses dominated by the invokes, the remaining users should only + // be Phi, or Equal/NotEqual. + for (const HUseListNode<HInstruction*>& use : current->GetUses()) { + HInstruction* user = use.GetUser(); + DCHECK(user->IsPhi() || user->IsEqual() || user->IsNotEqual()) << user->DebugName(); + worklist.push_back(user); + } + } while (!worklist.empty()); + seen_instructions.clear(); + DCHECK(found_instance != nullptr); + } +} + void SsaBuilder::RemoveRedundantUninitializedStrings() { if (graph_->IsDebuggable()) { // Do not perform the optimization for consistency with the interpreter @@ -488,27 +544,32 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { GraphAnalysisResult SsaBuilder::BuildSsa() { DCHECK(!graph_->IsInSsaForm()); - // 1) Propagate types of phis. At this point, phis are typed void in the general + // Replace Phis that feed in a String.<init>, as well as their aliases, with + // the actual String allocation invocation. We do this first, as the phis stored in + // the data structure might get removed from the graph in later stages during `BuildSsa`. + ReplaceUninitializedStringPhis(); + + // Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference if we created an equivalent phi. So we need // to propagate the types across phis to give them a correct type. If a type // conflict is detected in this stage, the phi is marked dead. RunPrimitiveTypePropagation(); - // 2) Now that the correct primitive types have been assigned, we can get rid + // Now that the correct primitive types have been assigned, we can get rid // of redundant phis. Note that we cannot do this phase before type propagation, // otherwise we could get rid of phi equivalents, whose presence is a requirement // for the type propagation phase. Note that this is to satisfy statement (a) // of the SsaBuilder (see ssa_builder.h). SsaRedundantPhiElimination(graph_).Run(); - // 3) Fix the type for null constants which are part of an equality comparison. + // Fix the type for null constants which are part of an equality comparison. // We need to do this after redundant phi elimination, to ensure the only cases // that we can see are reference comparison against 0. The redundant phi // elimination ensures we do not see a phi taking two 0 constants in a HEqual // or HNotEqual. FixNullConstantType(); - // 4) Compute type of reference type instructions. The pass assumes that + // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, class_loader_, @@ -516,7 +577,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { handles_, /* is_first_run */ true).Run(); - // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type + // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type // (int/float or long/double) and marked ArraySets with ambiguous input type. // Now that RTP computed the type of the array input, the ambiguity can be // resolved and the correct equivalents kept. @@ -524,13 +585,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { return kAnalysisFailAmbiguousArrayOp; } - // 6) Mark dead phis. This will mark phis which are not used by instructions + // Mark dead phis. This will mark phis which are not used by instructions // or other live phis. If compiling as debuggable code, phis will also be kept // live if they have an environment use. SsaDeadPhiElimination dead_phi_elimimation(graph_); dead_phi_elimimation.MarkDeadPhis(); - // 7) Make sure environments use the right phi equivalent: a phi marked dead + // Make sure environments use the right phi equivalent: a phi marked dead // can have a phi equivalent that is not dead. In that case we have to replace // it with the live equivalent because deoptimization and try/catch rely on // environments containing values of all live vregs at that point. Note that @@ -539,14 +600,14 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // environments to just reference one. FixEnvironmentPhis(); - // 8) Now that the right phis are used for the environments, we can eliminate + // Now that the right phis are used for the environments, we can eliminate // phis we do not need. Regardless of the debuggable status, this phase is /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well // as for the code generation, which does not deal with phis of conflicting // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 9) HInstructionBuidler replaced uses of NewInstances of String with the + // HInstructionBuidler replaced uses of NewInstances of String with the // results of their corresponding StringFactory calls. Unless the String // objects are used before they are initialized, they can be replaced with // NullConstant. Note that this optimization is valid only if unsimplified diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 60831a9e6a..765544508e 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject { local_allocator_(local_allocator), ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), - uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { graph_->InitializeInexactObjectRTI(handles); } @@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject { } } + void AddUninitializedStringPhi(HPhi* phi, HInvoke* invoke) { + uninitialized_string_phis_.push_back(std::make_pair(phi, invoke)); + } + private: void SetLoopHeaderPhiInputs(); void FixEnvironmentPhis(); @@ -118,6 +123,7 @@ class SsaBuilder : public ValueObject { HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); + void ReplaceUninitializedStringPhis(); HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; @@ -131,6 +137,7 @@ class SsaBuilder : public ValueObject { ScopedArenaVector<HArrayGet*> ambiguous_agets_; ScopedArenaVector<HArraySet*> ambiguous_asets_; ScopedArenaVector<HNewInstance*> uninitialized_strings_; + ScopedArenaVector<std::pair<HPhi*, HInvoke*>> uninitialized_string_phis_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 5d361953ba..3e1a36dc9b 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -151,7 +151,7 @@ void StackMapStream::EndStackMapEntry() { StackMap stack_map = code_info.GetStackMapAt(stack_map_index); CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0)); CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0)); - CHECK_EQ(code_info.GetInlineDepthOf(stack_map), inlining_depth); + CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth); }); } } @@ -209,7 +209,7 @@ void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, size_t depth = current_inline_infos_.size() - 1; dchecks_.emplace_back([=](const CodeInfo& code_info) { StackMap stack_map = code_info.GetStackMapAt(stack_map_index); - InlineInfo inline_info = code_info.GetInlineInfoAtDepth(stack_map, depth); + InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth]; CHECK_EQ(inline_info.GetDexPc(), dex_pc); bool encode_art_method = EncodeArtMethodInInlineInfo(method); CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method); @@ -275,7 +275,6 @@ void StackMapStream::CreateDexRegisterMap() { if (kVerifyStackMaps) { size_t stack_map_index = stack_maps_.size(); - uint32_t depth = current_inline_infos_.size(); // We need to make copy of the current registers for later (when the check is run). auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>( current_dex_registers_.begin(), current_dex_registers_.end()); @@ -285,8 +284,9 @@ void StackMapStream::CreateDexRegisterMap() { for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) { CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); } - for (uint32_t d = 0; d < depth; d++) { - for (DexRegisterLocation reg : code_info.GetDexRegisterMapAtDepth(d, stack_map)) { + for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) { + DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info); + for (DexRegisterLocation reg : map) { CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); } } diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 6241e0c25a..9ed90a4839 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -193,13 +193,12 @@ TEST(StackMapTest, Test2) { ASSERT_EQ(-2, location1.GetValue()); ASSERT_TRUE(stack_map.HasInlineInfo()); - InlineInfo inline_info0 = code_info.GetInlineInfoAtDepth(stack_map, 0); - InlineInfo inline_info1 = code_info.GetInlineInfoAtDepth(stack_map, 1); - ASSERT_EQ(2u, code_info.GetInlineDepthOf(stack_map)); - ASSERT_EQ(3u, inline_info0.GetDexPc()); - ASSERT_EQ(2u, inline_info1.GetDexPc()); - ASSERT_TRUE(inline_info0.EncodesArtMethod()); - ASSERT_TRUE(inline_info1.EncodesArtMethod()); + auto inline_infos = code_info.GetInlineInfosOf(stack_map); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(3u, inline_infos[0].GetDexPc()); + ASSERT_EQ(2u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); } // Second stack map. @@ -614,19 +613,18 @@ TEST(StackMapTest, InlineTest) { ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes()); ASSERT_EQ(4, dex_registers0[1].GetConstant()); - InlineInfo if0_0 = ci.GetInlineInfoAtDepth(sm0, 0); - InlineInfo if0_1 = ci.GetInlineInfoAtDepth(sm0, 1); - ASSERT_EQ(2u, ci.GetInlineDepthOf(sm0)); - ASSERT_EQ(2u, if0_0.GetDexPc()); - ASSERT_TRUE(if0_0.EncodesArtMethod()); - ASSERT_EQ(3u, if0_1.GetDexPc()); - ASSERT_TRUE(if0_1.EncodesArtMethod()); + auto inline_infos = ci.GetInlineInfosOf(sm0); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm0); + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]); ASSERT_EQ(1u, dex_registers1.size()); ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes()); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm0); + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]); ASSERT_EQ(3u, dex_registers2.size()); ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes()); ASSERT_EQ(20, dex_registers2[1].GetConstant()); @@ -642,22 +640,20 @@ TEST(StackMapTest, InlineTest) { ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); ASSERT_EQ(0, dex_registers0[1].GetConstant()); - InlineInfo if1_0 = ci.GetInlineInfoAtDepth(sm1, 0); - InlineInfo if1_1 = ci.GetInlineInfoAtDepth(sm1, 1); - InlineInfo if1_2 = ci.GetInlineInfoAtDepth(sm1, 2); - ASSERT_EQ(3u, ci.GetInlineDepthOf(sm1)); - ASSERT_EQ(2u, if1_0.GetDexPc()); - ASSERT_TRUE(if1_0.EncodesArtMethod()); - ASSERT_EQ(3u, if1_1.GetDexPc()); - ASSERT_TRUE(if1_1.EncodesArtMethod()); - ASSERT_EQ(5u, if1_2.GetDexPc()); - ASSERT_TRUE(if1_2.EncodesArtMethod()); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, sm1); + auto inline_infos = ci.GetInlineInfosOf(sm1); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]); ASSERT_EQ(1u, dex_registers1.size()); ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes()); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, sm1); + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]); ASSERT_EQ(3u, dex_registers2.size()); ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes()); ASSERT_EQ(10, dex_registers2[1].GetConstant()); @@ -684,22 +680,20 @@ TEST(StackMapTest, InlineTest) { ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); ASSERT_EQ(0, dex_registers0[1].GetConstant()); - InlineInfo if2_0 = ci.GetInlineInfoAtDepth(sm3, 0); - InlineInfo if2_1 = ci.GetInlineInfoAtDepth(sm3, 1); - InlineInfo if2_2 = ci.GetInlineInfoAtDepth(sm3, 2); - ASSERT_EQ(3u, ci.GetInlineDepthOf(sm3)); - ASSERT_EQ(2u, if2_0.GetDexPc()); - ASSERT_TRUE(if2_0.EncodesArtMethod()); - ASSERT_EQ(5u, if2_1.GetDexPc()); - ASSERT_TRUE(if2_1.EncodesArtMethod()); - ASSERT_EQ(10u, if2_2.GetDexPc()); - ASSERT_TRUE(if2_2.EncodesArtMethod()); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, sm3); + auto inline_infos = ci.GetInlineInfosOf(sm3); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(10u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]); ASSERT_EQ(1u, dex_registers1.size()); ASSERT_EQ(2, dex_registers1[0].GetMachineRegister()); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, sm3); + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]); ASSERT_EQ(2u, dex_registers2.size()); ASSERT_FALSE(dex_registers2[0].IsLive()); ASSERT_EQ(3, dex_registers2[1].GetMachineRegister()); |