diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/builder.cc | 14 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 70 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 7 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.cc | 26 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/optimization.h | 3 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 80 | ||||
| -rw-r--r-- | compiler/optimizing/prepare_for_register_allocation.cc | 28 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.cc | 96 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.h | 14 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_builder.cc | 40 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 12 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.h | 13 |
15 files changed, 231 insertions, 180 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e4680ff2fa..1f9287cbfc 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -723,10 +723,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 049b3e3a40..130f0e970f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -508,19 +508,14 @@ void CodeGenerator::BuildNativeGCMap( dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - uint32_t max_native_offset = 0; - for (size_t i = 0; i < pc_infos_.Size(); i++) { - uint32_t native_offset = pc_infos_.Get(i).native_pc; - if (native_offset > max_native_offset) { - max_native_offset = native_offset; - } - } - - GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t native_offset = pc_info.native_pc; - uint32_t dex_pc = pc_info.dex_pc; + uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); + + size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); + for (size_t i = 0; i != num_stack_maps; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t native_offset = stack_map_entry.native_pc_offset; + uint32_t dex_pc = stack_map_entry.dex_pc; const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; builder.AddEntry(native_offset, references); @@ -528,17 +523,17 @@ void CodeGenerator::BuildNativeGCMap( } void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t pc2dex_offset = pc_info.native_pc; - int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t pc2dex_offset = stack_map_entry.native_pc_offset; + int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc; src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); } } void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = pc_infos_.Size(); + uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); uint32_t pc2dex_offset = 0u; int32_t pc2dex_dalvik_offset = 0; uint32_t dex2pc_data_size = 0u; @@ -547,11 +542,11 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { int32_t dex2pc_dalvik_offset = 0; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); + pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } // Walk over the blocks and find which ones correspond to catch block entries. @@ -586,12 +581,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { dex2pc_dalvik_offset = 0u; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - DCHECK(pc2dex_offset <= pc_info.native_pc); - write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); + write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); + write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -617,9 +612,9 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { auto it = table.PcToDexBegin(); auto it2 = table.DexToPcBegin(); for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - CHECK_EQ(pc_info.native_pc, it.NativePcOffset()); - CHECK_EQ(pc_info.dex_pc, it.DexPc()); + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); + CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); ++it; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -695,14 +690,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - struct PcInfo pc_info; - pc_info.dex_pc = outer_dex_pc; - pc_info.native_pc = GetAssembler()->CodeSize(); - pc_infos_.Add(pc_info); + uint32_t native_pc = GetAssembler()->CodeSize(); if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream_.EndStackMapEntry(); return; } @@ -719,8 +711,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, - pc_info.native_pc, + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, + native_pc, register_mask, locations->GetStackMask(), outer_environment_size, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c6ebf6dbd8..e6b1f7c6aa 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -64,11 +64,6 @@ class CodeAllocator { DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; -struct PcInfo { - uint32_t dex_pc; - uintptr_t native_pc; -}; - class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: SlowPathCode() { @@ -366,7 +361,6 @@ class CodeGenerator { is_baseline_(false), graph_(graph), compiler_options_(compiler_options), - pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), block_order_(nullptr), current_block_index_(0), @@ -455,7 +449,6 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; // The order to use for code generation. diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 5aeaad23c0..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -256,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -267,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -311,7 +311,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), @@ -328,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -338,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -385,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -399,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -410,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -424,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 7465278f8c..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -52,7 +52,6 @@ class HInliner : public HOptimization { bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 126b3b9879..7ef69559de 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -2483,7 +2483,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -3603,7 +3603,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bf0b9fac0f..303a7cb1fd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,49 +318,55 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier simplify3(graph, stats, "last_instruction_simplifier"); - ReferenceTypePropagation type_propagation2(graph, handles); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "last_instruction_simplifier"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &type_propagation, - &simplify2, - &inliner, + intrinsics, + dce1, + fold1, + simplify1, + type_propagation, + simplify2, + inliner, // Run another type propagation phase: inlining will open up more opprotunities // to remove checkast/instanceof and null checks. - &type_propagation2, + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &simplify3, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 3d81c20a16..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,9 +180,9 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, - mirror::Class* klass, - bool is_exact) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); @@ -177,10 +191,10 @@ void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); @@ -189,16 +203,16 @@ void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { +void RTPVisitor::VisitNewArray(HNewArray* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -213,15 +227,15 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -299,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 0a1d4c496e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,26 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 42b9182d55..65610d54a6 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -49,7 +49,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, } dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); register_mask_max_ = std::max(register_mask_max_, register_mask); current_dex_register_ = 0; } @@ -128,16 +127,25 @@ void StackMapStream::EndInlineInfoEntry() { current_inline_info_ = InlineInfoEntry(); } +uint32_t StackMapStream::ComputeMaxNativePcOffset() const { + uint32_t max_native_pc_offset = 0u; + for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { + max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + } + return max_native_pc_offset; +} + size_t StackMapStream::PrepareForFillIn() { int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; inline_info_size_ = ComputeInlineInfoSize(); dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + uint32_t max_native_pc_offset = ComputeMaxNativePcOffset(); stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_, inline_info_size_, dex_register_maps_size_, dex_pc_max_, - native_pc_offset_max_, + max_native_pc_offset, register_mask_max_); stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 274d573350..bc3653d7ea 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -67,7 +67,6 @@ class StackMapStream : public ValueObject { inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), - native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), @@ -126,6 +125,17 @@ class StackMapStream : public ValueObject { uint32_t num_dex_registers); void EndInlineInfoEntry(); + size_t GetNumberOfStackMaps() const { + return stack_maps_.Size(); + } + + const StackMapEntry& GetStackMap(size_t i) const { + DCHECK_LT(i, stack_maps_.Size()); + return stack_maps_.GetRawStorage()[i]; + } + + uint32_t ComputeMaxNativePcOffset() const; + // Prepares the stream to fill in a memory region. Must be called before FillIn. // Returns the size (in bytes) needed to store this stream. size_t PrepareForFillIn(); @@ -163,7 +173,6 @@ class StackMapStream : public ValueObject { GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; - uint32_t native_pc_offset_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; |