diff options
Diffstat (limited to 'compiler/optimizing')
25 files changed, 531 insertions, 121 deletions
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 014353d615..7ddabdee78 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -18,8 +18,28 @@ namespace art { -// This visitor tries to simplify operations that yield a constant. For example -// `input * 0` is replaced by a null constant. +// This visitor tries to simplify instructions that can be evaluated +// as constants. +class HConstantFoldingVisitor : public HGraphDelegateVisitor { + public: + explicit HConstantFoldingVisitor(HGraph* graph) + : HGraphDelegateVisitor(graph) {} + + private: + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + + void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE; + void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE; + + void VisitTypeConversion(HTypeConversion* inst) OVERRIDE; + void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE; + + DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor); +}; + +// This visitor tries to simplify operations with an absorbing input, +// yielding a constant. For example `input * 0` is replaced by a +// null constant. class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { public: explicit InstructionWithAbsorbingInputSimplifier(HGraph* graph) : HGraphVisitor(graph) {} @@ -44,59 +64,69 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; }; + void HConstantFolding::Run() { - InstructionWithAbsorbingInputSimplifier simplifier(graph_); + HConstantFoldingVisitor visitor(graph_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second // instruction into a constant as well. - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - // Traverse this block's instructions in (forward) order and - // replace the ones that can be statically evaluated by a - // compile-time counterpart. - for (HInstructionIterator inst_it(block->GetInstructions()); - !inst_it.Done(); inst_it.Advance()) { - HInstruction* inst = inst_it.Current(); - if (inst->IsBinaryOperation()) { - // Constant folding: replace `op(a, b)' with a constant at - // compile time if `a' and `b' are both constants. - HConstant* constant = inst->AsBinaryOperation()->TryStaticEvaluation(); - if (constant != nullptr) { - inst->ReplaceWith(constant); - inst->GetBlock()->RemoveInstruction(inst); - } else { - inst->Accept(&simplifier); - } - } else if (inst->IsUnaryOperation()) { - // Constant folding: replace `op(a)' with a constant at compile - // time if `a' is a constant. - HConstant* constant = inst->AsUnaryOperation()->TryStaticEvaluation(); - if (constant != nullptr) { - inst->ReplaceWith(constant); - inst->GetBlock()->RemoveInstruction(inst); - } - } else if (inst->IsTypeConversion()) { - // Constant folding: replace `TypeConversion(a)' with a constant at - // compile time if `a' is a constant. - HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation(); - if (constant != nullptr) { - inst->ReplaceWith(constant); - inst->GetBlock()->RemoveInstruction(inst); - } - } else if (inst->IsDivZeroCheck()) { - // We can safely remove the check if the input is a non-null constant. - HDivZeroCheck* check = inst->AsDivZeroCheck(); - HInstruction* check_input = check->InputAt(0); - if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) { - check->ReplaceWith(check_input); - check->GetBlock()->RemoveInstruction(check); - } - } - } + visitor.VisitReversePostOrder(); +} + + +void HConstantFoldingVisitor::VisitBasicBlock(HBasicBlock* block) { + // Traverse this block's instructions (phis don't need to be + // processed) in (forward) order and replace the ones that can be + // statically evaluated by a compile-time counterpart. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + it.Current()->Accept(this); } } +void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) { + // Constant folding: replace `op(a)' with a constant at compile + // time if `a' is a constant. + HConstant* constant = inst->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } +} + +void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { + // Constant folding: replace `op(a, b)' with a constant at + // compile time if `a' and `b' are both constants. + HConstant* constant = inst->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } else { + InstructionWithAbsorbingInputSimplifier simplifier(GetGraph()); + inst->Accept(&simplifier); + } +} + +void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { + // Constant folding: replace `TypeConversion(a)' with a constant at + // compile time if `a' is a constant. + HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } +} + +void HConstantFoldingVisitor::VisitDivZeroCheck(HDivZeroCheck* inst) { + // We can safely remove the check if the input is a non-null constant. + HInstruction* check_input = inst->InputAt(0); + if (check_input->IsConstant() && !check_input->AsConstant()->IsZero()) { + inst->ReplaceWith(check_input); + inst->GetBlock()->RemoveInstruction(inst); + } +} + + void InstructionWithAbsorbingInputSimplifier::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HInstruction* left = instruction->GetLeft(); diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index 2698b2d690..e10b1d6b2e 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -26,13 +26,20 @@ namespace art { * Optimization pass performing a simple constant-expression * evaluation on the SSA form. * + * Note that graph simplifications producing a constant should be + * implemented in art::HConstantFolding, while graph simplifications + * not producing constants should be implemented in + * art::InstructionSimplifier. (This convention is a choice that was + * made during the development of these parts of the compiler and is + * not bound by any technical requirement.) + * * This class is named art::HConstantFolding to avoid name * clashes with the art::ConstantPropagation class defined in * compiler/dex/post_opt_passes.h. */ class HConstantFolding : public HOptimization { public: - explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName) + HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName) : HOptimization(graph, name) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index e6e9177841..4a49c83611 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -593,8 +593,9 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { HBasicBlock* predecessor = loop_header->GetPredecessors()[i]; if (!loop_information->IsBackEdge(*predecessor)) { AddError(StringPrintf( - "Loop header %d has multiple incoming (non back edge) blocks.", - id)); + "Loop header %d has multiple incoming (non back edge) blocks: %d.", + id, + predecessor->GetBlockId())); } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index a5acab81ab..f39699e0a7 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -190,28 +190,34 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, } } -static uint32_t FindClassIndexIn(mirror::Class* cls, const DexFile& dex_file) +static uint32_t FindClassIndexIn(mirror::Class* cls, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) SHARED_REQUIRES(Locks::mutator_lock_) { + uint32_t index = DexFile::kDexNoIndex; if (cls->GetDexCache() == nullptr) { - DCHECK(cls->IsArrayClass()); - // TODO: find the class in `dex_file`. - return DexFile::kDexNoIndex; + DCHECK(cls->IsArrayClass()) << PrettyClass(cls); + index = cls->FindTypeIndexInOtherDexFile(dex_file); } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) { + DCHECK(cls->IsProxyClass()) << PrettyClass(cls); // TODO: deal with proxy classes. - return DexFile::kDexNoIndex; } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { + index = cls->GetDexTypeIndex(); + } else { + index = cls->FindTypeIndexInOtherDexFile(dex_file); + } + + if (index != DexFile::kDexNoIndex) { // Update the dex cache to ensure the class is in. The generated code will // consider it is. We make it safe by updating the dex cache, as other // dex files might also load the class, and there is no guarantee the dex // cache of the dex file of the class will be updated. - if (cls->GetDexCache()->GetResolvedType(cls->GetDexTypeIndex()) == nullptr) { - cls->GetDexCache()->SetResolvedType(cls->GetDexTypeIndex(), cls); + if (dex_cache->GetResolvedType(index) == nullptr) { + dex_cache->SetResolvedType(index, cls); } - return cls->GetDexTypeIndex(); - } else { - // TODO: find the class in `dex_file`. - return DexFile::kDexNoIndex; } + + return index; } bool HInliner::TryInline(HInvoke* invoke_instruction) { @@ -303,7 +309,7 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, uint32_t dex_pc) const { ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); - return new (graph_->GetArena()) HInstanceFieldGet( + HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet( receiver, Primitive::kPrimNot, field->GetOffset(), @@ -313,6 +319,9 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, *field->GetDexFile(), handles_->NewHandle(field->GetDexCache()), dex_pc); + // The class of a field is effectively final, and does not have any memory dependencies. + result->SetSideEffects(SideEffects::None()); + return result; } bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, @@ -322,7 +331,8 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, << invoke_instruction->DebugName(); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file); + uint32_t class_index = FindClassIndexIn( + ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache()); if (class_index == DexFile::kDexNoIndex) { VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) << " from inline cache is not inlined because its class is not" @@ -350,11 +360,39 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, } // We successfully inlined, now add a guard. + bool is_referrer = + (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + AddTypeGuard(receiver, + cursor, + bb_cursor, + class_index, + is_referrer, + invoke_instruction, + /* with_deoptimization */ true); + + // Run type propagation to get the guard typed, and eventually propagate the + // type of the receiver. + ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); + rtp_fixup.Run(); + + MaybeRecordStat(kInlinedMonomorphicCall); + return true; +} + +HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, + HInstruction* cursor, + HBasicBlock* bb_cursor, + uint32_t class_index, + bool is_referrer, + HInstruction* invoke_instruction, + bool with_deoptimization) { + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); HInstanceFieldGet* receiver_class = BuildGetReceiverClass( class_linker, receiver, invoke_instruction->GetDexPc()); - bool is_referrer = - (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + // Note that we will just compare the classes, so we don't need Java semantics access checks. + // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache. HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), class_index, caller_dex_file, @@ -364,8 +402,6 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, /* is_in_dex_cache */ true); HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); - HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); // TODO: Extend reference type propagation to understand the guard. if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(receiver_class, cursor); @@ -374,16 +410,13 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, } bb_cursor->InsertInstructionAfter(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); - bb_cursor->InsertInstructionAfter(deoptimize, compare); - deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); - - // Run type propagation to get the guard typed, and eventually propagate the - // type of the receiver. - ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); - rtp_fixup.Run(); - - MaybeRecordStat(kInlinedMonomorphicCall); - return true; + if (with_deoptimization) { + HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( + compare, invoke_instruction->GetDexPc()); + bb_cursor->InsertInstructionAfter(deoptimize, compare); + deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + } + return compare; } bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, @@ -391,6 +424,174 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, const InlineCache& ic) { DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) << invoke_instruction->DebugName(); + + if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) { + return true; + } + + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + size_t pointer_size = class_linker->GetImagePointerSize(); + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + + bool all_targets_inlined = true; + bool one_target_inlined = false; + for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { + if (ic.GetTypeAt(i) == nullptr) { + break; + } + ArtMethod* method = nullptr; + if (invoke_instruction->IsInvokeInterface()) { + method = ic.GetTypeAt(i)->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke_instruction->IsInvokeVirtual()); + method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + + HInstruction* receiver = invoke_instruction->InputAt(0); + HInstruction* cursor = invoke_instruction->GetPrevious(); + HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + + uint32_t class_index = FindClassIndexIn( + ic.GetTypeAt(i), caller_dex_file, caller_compilation_unit_.GetDexCache()); + HInstruction* return_replacement = nullptr; + if (class_index == DexFile::kDexNoIndex || + !TryBuildAndInline(invoke_instruction, method, &return_replacement)) { + all_targets_inlined = false; + } else { + one_target_inlined = true; + bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + + // If we have inlined all targets before, and this receiver is the last seen, + // we deoptimize instead of keeping the original invoke instruction. + bool deoptimize = all_targets_inlined && + (i != InlineCache::kIndividualCacheSize - 1) && + (ic.GetTypeAt(i + 1) == nullptr); + HInstruction* compare = AddTypeGuard( + receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize); + if (deoptimize) { + if (return_replacement != nullptr) { + invoke_instruction->ReplaceWith(return_replacement); + } + invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + // Because the inline cache data can be populated concurrently, we force the end of the + // iteration. Otherhwise, we could see a new receiver type. + break; + } else { + CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); + } + } + } + + if (!one_target_inlined) { + VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) + << " from inline cache is not inlined because none" + << " of its targets could be inlined"; + return false; + } + MaybeRecordStat(kInlinedPolymorphicCall); + + // Run type propagation to get the guards typed. + ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); + rtp_fixup.Run(); + return true; +} + +void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, + HInstruction* return_replacement, + HInstruction* invoke_instruction) { + uint32_t dex_pc = invoke_instruction->GetDexPc(); + HBasicBlock* cursor_block = compare->GetBlock(); + HBasicBlock* original_invoke_block = invoke_instruction->GetBlock(); + ArenaAllocator* allocator = graph_->GetArena(); + + // Spit the block after the compare: `cursor_block` will now be the start of the diamond, + // and the returned block is the start of the then branch (that could contain multiple blocks). + HBasicBlock* then = cursor_block->SplitAfterForInlining(compare); + + // Split the block containing the invoke before and after the invoke. The returned block + // of the split before will contain the invoke and will be the otherwise branch of + // the diamond. The returned block of the split after will be the merge block + // of the diamond. + HBasicBlock* end_then = invoke_instruction->GetBlock(); + HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction); + HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction); + + // If the methods we are inlining return a value, we create a phi in the merge block + // that will have the `invoke_instruction and the `return_replacement` as inputs. + if (return_replacement != nullptr) { + HPhi* phi = new (allocator) HPhi( + allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc); + merge->AddPhi(phi); + invoke_instruction->ReplaceWith(phi); + phi->AddInput(return_replacement); + phi->AddInput(invoke_instruction); + } + + // Add the control flow instructions. + otherwise->AddInstruction(new (allocator) HGoto(dex_pc)); + end_then->AddInstruction(new (allocator) HGoto(dex_pc)); + cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc)); + + // Add the newly created blocks to the graph. + graph_->AddBlock(then); + graph_->AddBlock(otherwise); + graph_->AddBlock(merge); + + // Set up successor (and implictly predecessor) relations. + cursor_block->AddSuccessor(otherwise); + cursor_block->AddSuccessor(then); + end_then->AddSuccessor(merge); + otherwise->AddSuccessor(merge); + + // Set up dominance information. + then->SetDominator(cursor_block); + cursor_block->AddDominatedBlock(then); + otherwise->SetDominator(cursor_block); + cursor_block->AddDominatedBlock(otherwise); + merge->SetDominator(cursor_block); + cursor_block->AddDominatedBlock(merge); + + // Update the revert post order. + size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block); + MakeRoomFor(&graph_->reverse_post_order_, 1, index); + graph_->reverse_post_order_[++index] = then; + index = IndexOfElement(graph_->reverse_post_order_, end_then); + MakeRoomFor(&graph_->reverse_post_order_, 2, index); + graph_->reverse_post_order_[++index] = otherwise; + graph_->reverse_post_order_[++index] = merge; + + // Set the loop information of the newly created blocks. + HLoopInformation* loop_info = cursor_block->GetLoopInformation(); + if (loop_info != nullptr) { + then->SetLoopInformation(cursor_block->GetLoopInformation()); + merge->SetLoopInformation(cursor_block->GetLoopInformation()); + otherwise->SetLoopInformation(cursor_block->GetLoopInformation()); + for (HLoopInformationOutwardIterator loop_it(*cursor_block); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(then); + loop_it.Current()->Add(merge); + loop_it.Current()->Add(otherwise); + } + // In case the original invoke location was a back edge, we need to update + // the loop to now have the merge block as a back edge. + if (loop_info->IsBackEdge(*original_invoke_block)) { + loop_info->RemoveBackEdge(original_invoke_block); + loop_info->AddBackEdge(merge); + } + } + + // Set the try/catch information of the newly created blocks. + then->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); + merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); + otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); +} + +bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + const InlineCache& ic) { // This optimization only works under JIT for now. DCHECK(Runtime::Current()->UseJit()); if (graph_->GetInstructionSet() == kMips64) { @@ -883,7 +1084,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, HConstantFolding fold(callee_graph); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); - IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_); + IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_, stats_); HOptimization* optimizations[] = { &intrinsics, diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 9dd9bf5ad8..cdb2167082 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -101,12 +101,18 @@ class HInliner : public HOptimization { const InlineCache& ic) SHARED_REQUIRES(Locks::mutator_lock_); - // Try to inline targets of a polymorphic call. Currently unimplemented. + // Try to inline targets of a polymorphic call. bool TryInlinePolymorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, const InlineCache& ic) SHARED_REQUIRES(Locks::mutator_lock_); + bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + const InlineCache& ic) + SHARED_REQUIRES(Locks::mutator_lock_); + + HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, uint32_t dex_pc) const @@ -118,6 +124,57 @@ class HInliner : public HOptimization { bool do_rtp) SHARED_REQUIRES(Locks::mutator_lock_); + // Add a type guard on the given `receiver`. This will add to the graph: + // i0 = HFieldGet(receiver, klass) + // i1 = HLoadClass(class_index, is_referrer) + // i2 = HNotEqual(i0, i1) + // + // And if `with_deoptimization` is true: + // HDeoptimize(i2) + // + // The method returns the `HNotEqual`, that will be used for polymorphic inlining. + HInstruction* AddTypeGuard(HInstruction* receiver, + HInstruction* cursor, + HBasicBlock* bb_cursor, + uint32_t class_index, + bool is_referrer, + HInstruction* invoke_instruction, + bool with_deoptimization) + SHARED_REQUIRES(Locks::mutator_lock_); + + /* + * Ad-hoc implementation for implementing a diamond pattern in the graph for + * polymorphic inlining: + * 1) `compare` becomes the input of the new `HIf`. + * 2) Everything up until `invoke_instruction` is in the then branch (could + * contain multiple blocks). + * 3) `invoke_instruction` is moved to the otherwise block. + * 4) If `return_replacement` is not null, the merge block will have + * a phi whose inputs are `return_replacement` and `invoke_instruction`. + * + * Before: + * Block1 + * compare + * ... + * invoke_instruction + * + * After: + * Block1 + * compare + * if + * / \ + * / \ + * Then block Otherwise block + * ... invoke_instruction + * \ / + * \ / + * Merge block + * phi(return_replacement, invoke_instruction) + */ + void CreateDiamondPatternForPolymorphicInline(HInstruction* compare, + HInstruction* return_replacement, + HInstruction* invoke_instruction); + HGraph* const outermost_graph_; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a48d06f3d0..13d3f752c3 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -92,6 +92,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); void SimplifyCompare(HInvoke* invoke, bool has_zero_op); + void SimplifyIsNaN(HInvoke* invoke); OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -1551,6 +1552,16 @@ void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_sign invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare); } +void InstructionSimplifierVisitor::SimplifyIsNaN(HInvoke* invoke) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + uint32_t dex_pc = invoke->GetDexPc(); + // IsNaN(x) is the same as x != x. + HInstruction* x = invoke->InputAt(0); + HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc); + condition->SetBias(ComparisonBias::kLtBias); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, condition); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { if (instruction->GetIntrinsic() == Intrinsics::kStringEquals) { SimplifyStringEquals(instruction); @@ -1568,6 +1579,9 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { } else if (instruction->GetIntrinsic() == Intrinsics::kIntegerSignum || instruction->GetIntrinsic() == Intrinsics::kLongSignum) { SimplifyCompare(instruction, /* is_signum */ true); + } else if (instruction->GetIntrinsic() == Intrinsics::kFloatIsNaN || + instruction->GetIntrinsic() == Intrinsics::kDoubleIsNaN) { + SimplifyIsNaN(instruction); } } diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index cc4b6f6adc..7905104ed4 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -25,6 +25,13 @@ namespace art { /** * Implements optimizations specific to each instruction. + * + * Note that graph simplifications producing a constant should be + * implemented in art::HConstantFolding, while graph simplifications + * not producing constants should be implemented in + * art::InstructionSimplifier. (This convention is a choice that was + * made during the development of these parts of the compiler and is + * not bound by any technical requirement.) */ class InstructionSimplifier : public HOptimization { public: diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index db39bc8eec..316e86b4c9 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -570,6 +570,7 @@ void IntrinsicsRecognizer::Run() { NeedsEnvironmentOrCache(intrinsic), GetSideEffects(intrinsic), GetExceptions(intrinsic)); + MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); } } } diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 3bf3f7ffae..2ab50bb436 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -33,8 +33,8 @@ static constexpr bool kRoundIsPlusPointFive = false; // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: - IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver) - : HOptimization(graph, kIntrinsicsRecognizerPassName), + IntrinsicsRecognizer(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats) + : HOptimization(graph, kIntrinsicsRecognizerPassName, stats), driver_(driver) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 00a158b10a..ea8669fa18 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1858,8 +1858,6 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) @@ -1867,6 +1865,8 @@ UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4140d94e17..8741fd284f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1618,8 +1618,6 @@ UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) @@ -1627,6 +1625,8 @@ UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 5d6e8c280f..c8629644b6 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1220,8 +1220,6 @@ UNIMPLEMENTED_INTRINSIC(MathTanh) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) @@ -1229,6 +1227,8 @@ UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerCompare) UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerSignum) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index ac2850342d..cf3a3657de 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1764,8 +1764,6 @@ UNIMPLEMENTED_INTRINSIC(MathTanh) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) @@ -1773,6 +1771,8 @@ UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerCompare) UNIMPLEMENTED_INTRINSIC(LongCompare) UNIMPLEMENTED_INTRINSIC(IntegerSignum) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 22cefe8aa5..260a8773fb 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2635,8 +2635,6 @@ UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(LongHighestOneBit) @@ -2644,6 +2642,8 @@ UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit) UNIMPLEMENTED_INTRINSIC(LongLowestOneBit) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index c9a43442b3..93e8c00e5a 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2717,10 +2717,10 @@ UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(DoubleIsInfinite) -UNIMPLEMENTED_INTRINSIC(FloatIsNaN) -UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) // Handled as HIR instructions. +UNIMPLEMENTED_INTRINSIC(FloatIsNaN) +UNIMPLEMENTED_INTRINSIC(DoubleIsNaN) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index a6b4078f46..33bb2e8f30 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -141,6 +141,7 @@ void LICM::Run() { DCHECK(!instruction->HasEnvironment()); } instruction->MoveBefore(pre_header->GetLastInstruction()); + MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved); } else if (instruction->CanThrow()) { // If `instruction` can throw, we cannot move further instructions // that can throw as well. diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index 0b5a0f103b..bf56f53d46 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -26,8 +26,9 @@ class SideEffectsAnalysis; class LICM : public HOptimization { public: - LICM(HGraph* graph, const SideEffectsAnalysis& side_effects) - : HOptimization(graph, kLoopInvariantCodeMotionPassName), side_effects_(side_effects) {} + LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats) + : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats), + side_effects_(side_effects) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 9fb32f4001..d446539700 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -79,7 +79,7 @@ class LICMTest : public CommonCompilerTest { graph_->BuildDominatorTree(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); - LICM(graph_, side_effects).Run(); + LICM(graph_, side_effects, nullptr).Run(); } // General building fields. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index b26ce0aa13..f36dc6e2fd 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1420,7 +1420,38 @@ HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() { } } -HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { +HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) { + DCHECK_EQ(cursor->GetBlock(), this); + + HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), + cursor->GetDexPc()); + new_block->instructions_.first_instruction_ = cursor; + new_block->instructions_.last_instruction_ = instructions_.last_instruction_; + instructions_.last_instruction_ = cursor->previous_; + if (cursor->previous_ == nullptr) { + instructions_.first_instruction_ = nullptr; + } else { + cursor->previous_->next_ = nullptr; + cursor->previous_ = nullptr; + } + + new_block->instructions_.SetBlockOfInstructions(new_block); + + for (HBasicBlock* successor : GetSuccessors()) { + new_block->successors_.push_back(successor); + successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block; + } + successors_.clear(); + + for (HBasicBlock* dominated : GetDominatedBlocks()) { + dominated->dominator_ = new_block; + new_block->dominated_blocks_.push_back(dominated); + } + dominated_blocks_.clear(); + return new_block; +} + +HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); DCHECK_EQ(cursor->GetBlock(), this); @@ -1573,6 +1604,20 @@ void HInstructionList::AddAfter(HInstruction* cursor, const HInstructionList& in } } +void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) { + DCHECK(Contains(cursor)); + if (!instruction_list.IsEmpty()) { + if (cursor == first_instruction_) { + first_instruction_ = instruction_list.first_instruction_; + } else { + cursor->previous_->next_ = instruction_list.first_instruction_; + } + instruction_list.last_instruction_->next_ = cursor; + instruction_list.first_instruction_->previous_ = cursor->previous_; + cursor->previous_ = instruction_list.last_instruction_; + } +} + void HInstructionList::Add(const HInstructionList& instruction_list) { if (IsEmpty()) { first_instruction_ = instruction_list.first_instruction_; @@ -1815,18 +1860,6 @@ void HBasicBlock::ReplaceWith(HBasicBlock* other) { graph_ = nullptr; } -// Create space in `blocks` for adding `number_of_new_blocks` entries -// starting at location `at`. Blocks after `at` are moved accordingly. -static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks, - size_t number_of_new_blocks, - size_t after) { - DCHECK_LT(after, blocks->size()); - size_t old_size = blocks->size(); - size_t new_size = old_size + number_of_new_blocks; - blocks->resize(new_size); - std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end()); -} - void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) { DCHECK_EQ(block->GetGraph(), this); DCHECK(block->GetSuccessors().empty()); @@ -1880,7 +1913,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { DCHECK(!body->IsInLoop()); HInstruction* last = body->GetLastInstruction(); - invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); + // Note that we add instructions before the invoke only to simplify polymorphic inlining. + invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions()); body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock()); // Replace the invoke with the return value of the inlined graph. @@ -1898,7 +1932,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // with the second half. ArenaAllocator* allocator = outer_graph->GetArena(); HBasicBlock* at = invoke->GetBlock(); - HBasicBlock* to = at->SplitAfter(invoke); + // Note that we split before the invoke only to simplify polymorphic inlining. + HBasicBlock* to = at->SplitBeforeForInlining(invoke); HBasicBlock* first = entry_block_->GetSuccessors()[0]; DCHECK(!first->IsInLoop()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 01ba704610..399afabea6 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -131,6 +131,7 @@ class HInstructionList : public ValueObject { void SetBlockOfInstructions(HBasicBlock* block) const; void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); + void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list); void Add(const HInstructionList& instruction_list); // Return the number of instructions in the list. This is an expensive operation. @@ -618,6 +619,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. + friend class HInliner; // For the reverse post order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); }; @@ -972,12 +974,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // loop and try/catch information. HBasicBlock* SplitBefore(HInstruction* cursor); - // Split the block into two blocks just after `cursor`. Returns the newly + // Split the block into two blocks just before `cursor`. Returns the newly // created block. Note that this method just updates raw block information, // like predecessors, successors, dominators, and instruction list. It does not // update the graph, reverse post order, loop information, nor make sure the // blocks are consistent (for example ending with a control flow instruction). - HBasicBlock* SplitAfter(HInstruction* cursor); + HBasicBlock* SplitBeforeForInlining(HInstruction* cursor); + + // Similar to `SplitBeforeForInlining` but does it after `cursor`. + HBasicBlock* SplitAfterForInlining(HInstruction* cursor); // Split catch block into two blocks after the original move-exception bytecode // instruction, or at the beginning if not present. Returns the newly created, @@ -2063,6 +2068,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { } SideEffects GetSideEffects() const { return side_effects_; } + void SetSideEffects(SideEffects other) { side_effects_ = other; } void AddSideEffects(SideEffects other) { side_effects_.Add(other); } size_t GetLifetimePosition() const { return lifetime_position_; } @@ -2101,7 +2107,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { protected: virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0; virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0; - void SetSideEffects(SideEffects other) { side_effects_ = other; } private: void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); } @@ -6370,6 +6375,18 @@ class SwitchTable : public ValueObject { DISALLOW_COPY_AND_ASSIGN(SwitchTable); }; +// Create space in `blocks` for adding `number_of_new_blocks` entries +// starting at location `at`. Blocks after `at` are moved accordingly. +inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks, + size_t number_of_new_blocks, + size_t after) { + DCHECK_LT(after, blocks->size()); + size_t old_size = blocks->size(); + size_t new_size = old_size + number_of_new_blocks; + blocks->resize(new_size); + std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end()); +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 12b748b7b6..ae638dfd0b 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -505,12 +505,12 @@ static void RunOptimizations(HGraph* graph, graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); HConstantFolding* fold1 = new (arena) HConstantFolding(graph); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); - HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph); + HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); - LICM* licm = new (arena) LICM(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects, stats); LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); @@ -519,7 +519,7 @@ static void RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier_after_bce"); InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver, stats); HOptimization* optimizations1[] = { intrinsics, diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 52a7b10cad..179004bd40 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -56,6 +56,10 @@ enum MethodCompilationStat { kMonomorphicCall, kPolymorphicCall, kMegamorphicCall, + kBooleanSimplified, + kIntrinsicRecognized, + kLoopInvariantMoved, + kSelectGenerated, kLastStat }; @@ -124,7 +128,11 @@ class OptimizingCompilerStats { case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break; case kMonomorphicCall: name = "MonomorphicCall"; break; case kPolymorphicCall: name = "PolymorphicCall"; break; - case kMegamorphicCall: name = "kMegamorphicCall"; break; + case kMegamorphicCall: name = "MegamorphicCall"; break; + case kBooleanSimplified : name = "BooleanSimplified"; break; + case kIntrinsicRecognized : name = "IntrinsicRecognized"; break; + case kLoopInvariantMoved : name = "LoopInvariantMoved"; break; + case kSelectGenerated : name = "SelectGenerated"; break; case kLastStat: LOG(FATAL) << "invalid stat " diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 105b30ae5d..e52476ea03 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -141,6 +141,8 @@ void HSelectGenerator::Run() { block->MergeWith(merge_block); } + MaybeRecordStat(MethodCompilationStat::kSelectGenerated); + // No need to update dominance information, as we are simplifying // a simple diamond shape, where the join block is merged with the // entry block. Any following blocks would have had the join block diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index f9d6d4d8de..c6dca581cc 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -47,8 +47,8 @@ namespace art { class HSelectGenerator : public HOptimization { public: - explicit HSelectGenerator(HGraph* graph) - : HOptimization(graph, kSelectGeneratorPassName) {} + HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, kSelectGeneratorPassName, stats) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 43f2499b24..09ca8b7b44 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -422,6 +422,34 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } +static bool HasAliasInEnvironments(HInstruction* instruction) { + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); + use_it.Advance()) { + HEnvironment* use = use_it.Current()->GetUser(); + HUseListNode<HEnvironment*>* next = use_it.Current()->GetNext(); + if (next != nullptr && next->GetUser() == use) { + return true; + } + } + + if (kIsDebugBuild) { + // Do a quadratic search to ensure same environment uses are next + // to each other. + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); + use_it.Advance()) { + HUseListNode<HEnvironment*>* current = use_it.Current(); + HUseListNode<HEnvironment*>* next = current->GetNext(); + while (next != nullptr) { + DCHECK(next->GetUser() != current->GetUser()); + next = next->GetNext(); + } + } + } + return false; +} + void SsaBuilder::RemoveRedundantUninitializedStrings() { if (GetGraph()->IsDebuggable()) { // Do not perform the optimization for consistency with the interpreter @@ -433,7 +461,7 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { // Replace NewInstance of String with NullConstant if not used prior to // calling StringFactory. In case of deoptimization, the interpreter is // expected to skip null check on the `this` argument of the StringFactory call. - if (!new_instance->HasNonEnvironmentUses()) { + if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) { new_instance->ReplaceWith(GetGraph()->GetNullConstant()); new_instance->GetBlock()->RemoveInstruction(new_instance); |