diff options
| author | 2016-02-18 11:12:31 +0000 | |
|---|---|---|
| committer | 2016-02-18 14:41:25 +0000 | |
| commit | 916cc1d504f10a24f43b384e035fdecbe6a74b4c (patch) | |
| tree | a68f7276cba6cd75bc0b200337edde8eafccccc7 /compiler | |
| parent | 442643920a6c539e98aad76594e99b932b5631ba (diff) | |
Implement polymorphic inlining.
For example, before:
HInvokeVirtual
After:
If (receiver == Foo) {
  // inlined code.
} else if (receiver == Bar) {
  // inlined code
} else {
  // HInvokeVirtual or HDeoptimize(receiver != Baz)
}
Change-Id: I5ce305aef8f39f8294bf2b2bcfe60e0dddcfdbec
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/graph_checker.cc | 5 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.cc | 218 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.h | 59 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 65 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 21 | 
5 files changed, 334 insertions, 34 deletions
| diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index e6e9177841..4a49c83611 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -593,8 +593,9 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) {        HBasicBlock* predecessor = loop_header->GetPredecessors()[i];        if (!loop_information->IsBackEdge(*predecessor)) {          AddError(StringPrintf( -            "Loop header %d has multiple incoming (non back edge) blocks.", -            id)); +            "Loop header %d has multiple incoming (non back edge) blocks: %d.", +            id, +            predecessor->GetBlockId()));        }      }    } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index a5acab81ab..498739caf3 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -350,11 +350,39 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,    }    // We successfully inlined, now add a guard. +  bool is_referrer = +      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); +  AddTypeGuard(receiver, +               cursor, +               bb_cursor, +               class_index, +               is_referrer, +               invoke_instruction, +               /* with_deoptimization */ true); + +  // Run type propagation to get the guard typed, and eventually propagate the +  // type of the receiver. +  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); +  rtp_fixup.Run(); + +  MaybeRecordStat(kInlinedMonomorphicCall); +  return true; +} + +HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, +                                     HInstruction* cursor, +                                     HBasicBlock* bb_cursor, +                                     uint32_t class_index, +                                     bool is_referrer, +                                     HInstruction* invoke_instruction, +                                     bool with_deoptimization) { +  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();    HInstanceFieldGet* receiver_class = BuildGetReceiverClass(        class_linker, receiver, invoke_instruction->GetDexPc()); -  bool is_referrer = -      (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); +  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); +  // Note that we will just compare the classes, so we don't need Java semantics access checks. +  // Also, the caller of `AddTypeGuard` must have guaranteed that the class is in the dex cache.    HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(),                                                                 class_index,                                                                 caller_dex_file, @@ -364,8 +392,6 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,                                                                 /* is_in_dex_cache */ true);    HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); -  HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( -      compare, invoke_instruction->GetDexPc());    // TODO: Extend reference type propagation to understand the guard.    if (cursor != nullptr) {      bb_cursor->InsertInstructionAfter(receiver_class, cursor); @@ -374,16 +400,13 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,    }    bb_cursor->InsertInstructionAfter(load_class, receiver_class);    bb_cursor->InsertInstructionAfter(compare, load_class); -  bb_cursor->InsertInstructionAfter(deoptimize, compare); -  deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); - -  // Run type propagation to get the guard typed, and eventually propagate the -  // type of the receiver. -  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); -  rtp_fixup.Run(); - -  MaybeRecordStat(kInlinedMonomorphicCall); -  return true; +  if (with_deoptimization) { +    HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( +        compare, invoke_instruction->GetDexPc()); +    bb_cursor->InsertInstructionAfter(deoptimize, compare); +    deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); +  } +  return compare;  }  bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, @@ -391,6 +414,173 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,                                          const InlineCache& ic) {    DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface())        << invoke_instruction->DebugName(); + +  if (TryInlinePolymorphicCallToSameTarget(invoke_instruction, resolved_method, ic)) { +    return true; +  } + +  ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); +  size_t pointer_size = class_linker->GetImagePointerSize(); +  const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + +  bool all_targets_inlined = true; +  bool one_target_inlined = false; +  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { +    if (ic.GetTypeAt(i) == nullptr) { +      break; +    } +    ArtMethod* method = nullptr; +    if (invoke_instruction->IsInvokeInterface()) { +      method = ic.GetTypeAt(i)->FindVirtualMethodForInterface( +          resolved_method, pointer_size); +    } else { +      DCHECK(invoke_instruction->IsInvokeVirtual()); +      method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual( +          resolved_method, pointer_size); +    } + +    HInstruction* receiver = invoke_instruction->InputAt(0); +    HInstruction* cursor = invoke_instruction->GetPrevious(); +    HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + +    uint32_t class_index = FindClassIndexIn(ic.GetTypeAt(i), caller_dex_file); +    HInstruction* return_replacement = nullptr; +    if (class_index == DexFile::kDexNoIndex || +        !TryBuildAndInline(invoke_instruction, method, &return_replacement)) { +      all_targets_inlined = false; +    } else { +      one_target_inlined = true; +      bool is_referrer = (ic.GetTypeAt(i) == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + +      // If we have inlined all targets before, and this receiver is the last seen, +      // we deoptimize instead of keeping the original invoke instruction. +      bool deoptimize = all_targets_inlined && +          (i != InlineCache::kIndividualCacheSize - 1) && +          (ic.GetTypeAt(i + 1) == nullptr); +      HInstruction* compare = AddTypeGuard( +          receiver, cursor, bb_cursor, class_index, is_referrer, invoke_instruction, deoptimize); +      if (deoptimize) { +        if (return_replacement != nullptr) { +          invoke_instruction->ReplaceWith(return_replacement); +        } +        invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); +        // Because the inline cache data can be populated concurrently, we force the end of the +        // iteration. Otherhwise, we could see a new receiver type. +        break; +      } else { +        CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); +      } +    } +  } + +  if (!one_target_inlined) { +    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method) +                   << " from inline cache is not inlined because none" +                   << " of its targets could be inlined"; +    return false; +  } +  MaybeRecordStat(kInlinedPolymorphicCall); + +  // Run type propagation to get the guards typed. +  ReferenceTypePropagation rtp_fixup(graph_, handles_, /* is_first_run */ false); +  rtp_fixup.Run(); +  return true; +} + +void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, +                                                        HInstruction* return_replacement, +                                                        HInstruction* invoke_instruction) { +  uint32_t dex_pc = invoke_instruction->GetDexPc(); +  HBasicBlock* cursor_block = compare->GetBlock(); +  HBasicBlock* original_invoke_block = invoke_instruction->GetBlock(); +  ArenaAllocator* allocator = graph_->GetArena(); + +  // Spit the block after the compare: `cursor_block` will now be the start of the diamond, +  // and the returned block is the start of the then branch (that could contain multiple blocks). +  HBasicBlock* then = cursor_block->SplitAfterForInlining(compare); + +  // Split the block containing the invoke before and after the invoke. The returned block +  // of the split before will contain the invoke and will be the otherwise branch of +  // the diamond. The returned block of the split after will be the merge block +  // of the diamond. +  HBasicBlock* end_then = invoke_instruction->GetBlock(); +  HBasicBlock* otherwise = end_then->SplitBeforeForInlining(invoke_instruction); +  HBasicBlock* merge = otherwise->SplitAfterForInlining(invoke_instruction); + +  // If the methods we are inlining return a value, we create a phi in the merge block +  // that will have the `invoke_instruction and the `return_replacement` as inputs. +  if (return_replacement != nullptr) { +    HPhi* phi = new (allocator) HPhi( +        allocator, kNoRegNumber, 0, HPhi::ToPhiType(invoke_instruction->GetType()), dex_pc); +    merge->AddPhi(phi); +    invoke_instruction->ReplaceWith(phi); +    phi->AddInput(return_replacement); +    phi->AddInput(invoke_instruction); +  } + +  // Add the control flow instructions. +  otherwise->AddInstruction(new (allocator) HGoto(dex_pc)); +  end_then->AddInstruction(new (allocator) HGoto(dex_pc)); +  cursor_block->AddInstruction(new (allocator) HIf(compare, dex_pc)); + +  // Add the newly created blocks to the graph. +  graph_->AddBlock(then); +  graph_->AddBlock(otherwise); +  graph_->AddBlock(merge); + +  // Set up successor (and implictly predecessor) relations. +  cursor_block->AddSuccessor(otherwise); +  cursor_block->AddSuccessor(then); +  end_then->AddSuccessor(merge); +  otherwise->AddSuccessor(merge); + +  // Set up dominance information. +  then->SetDominator(cursor_block); +  cursor_block->AddDominatedBlock(then); +  otherwise->SetDominator(cursor_block); +  cursor_block->AddDominatedBlock(otherwise); +  merge->SetDominator(cursor_block); +  cursor_block->AddDominatedBlock(merge); + +  // Update the revert post order. +  size_t index = IndexOfElement(graph_->reverse_post_order_, cursor_block); +  MakeRoomFor(&graph_->reverse_post_order_, 1, index); +  graph_->reverse_post_order_[++index] = then; +  index = IndexOfElement(graph_->reverse_post_order_, end_then); +  MakeRoomFor(&graph_->reverse_post_order_, 2, index); +  graph_->reverse_post_order_[++index] = otherwise; +  graph_->reverse_post_order_[++index] = merge; + +  // Set the loop information of the newly created blocks. +  HLoopInformation* loop_info = cursor_block->GetLoopInformation(); +  if (loop_info != nullptr) { +    then->SetLoopInformation(cursor_block->GetLoopInformation()); +    merge->SetLoopInformation(cursor_block->GetLoopInformation()); +    otherwise->SetLoopInformation(cursor_block->GetLoopInformation()); +    for (HLoopInformationOutwardIterator loop_it(*cursor_block); +         !loop_it.Done(); +         loop_it.Advance()) { +      loop_it.Current()->Add(then); +      loop_it.Current()->Add(merge); +      loop_it.Current()->Add(otherwise); +    } +    // In case the original invoke location was a back edge, we need to update +    // the loop to now have the merge block as a back edge. +    if (loop_info->IsBackEdge(*original_invoke_block)) { +      loop_info->RemoveBackEdge(original_invoke_block); +      loop_info->AddBackEdge(merge); +    } +  } + +  // Set the try/catch information of the newly created blocks. +  then->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); +  merge->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); +  otherwise->SetTryCatchInformation(cursor_block->GetTryCatchInformation()); +} + +bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, +                                                    ArtMethod* resolved_method, +                                                    const InlineCache& ic) {    // This optimization only works under JIT for now.    DCHECK(Runtime::Current()->UseJit());    if (graph_->GetInstructionSet() == kMips64) { diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 9dd9bf5ad8..cdb2167082 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -101,12 +101,18 @@ class HInliner : public HOptimization {                                  const InlineCache& ic)      SHARED_REQUIRES(Locks::mutator_lock_); -  // Try to inline targets of a polymorphic call. Currently unimplemented. +  // Try to inline targets of a polymorphic call.    bool TryInlinePolymorphicCall(HInvoke* invoke_instruction,                                  ArtMethod* resolved_method,                                  const InlineCache& ic)      SHARED_REQUIRES(Locks::mutator_lock_); +  bool TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, +                                            ArtMethod* resolved_method, +                                            const InlineCache& ic) +    SHARED_REQUIRES(Locks::mutator_lock_); + +    HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,                                             HInstruction* receiver,                                             uint32_t dex_pc) const @@ -118,6 +124,57 @@ class HInliner : public HOptimization {                                  bool do_rtp)      SHARED_REQUIRES(Locks::mutator_lock_); +  // Add a type guard on the given `receiver`. This will add to the graph: +  // i0 = HFieldGet(receiver, klass) +  // i1 = HLoadClass(class_index, is_referrer) +  // i2 = HNotEqual(i0, i1) +  // +  // And if `with_deoptimization` is true: +  // HDeoptimize(i2) +  // +  // The method returns the `HNotEqual`, that will be used for polymorphic inlining. +  HInstruction* AddTypeGuard(HInstruction* receiver, +                             HInstruction* cursor, +                             HBasicBlock* bb_cursor, +                             uint32_t class_index, +                             bool is_referrer, +                             HInstruction* invoke_instruction, +                             bool with_deoptimization) +    SHARED_REQUIRES(Locks::mutator_lock_); + +  /* +   * Ad-hoc implementation for implementing a diamond pattern in the graph for +   * polymorphic inlining: +   * 1) `compare` becomes the input of the new `HIf`. +   * 2) Everything up until `invoke_instruction` is in the then branch (could +   *    contain multiple blocks). +   * 3) `invoke_instruction` is moved to the otherwise block. +   * 4) If `return_replacement` is not null, the merge block will have +   *    a phi whose inputs are `return_replacement` and `invoke_instruction`. +   * +   * Before: +   *             Block1 +   *             compare +   *              ... +   *         invoke_instruction +   * +   * After: +   *            Block1 +   *            compare +   *              if +   *          /        \ +   *         /          \ +   *   Then block    Otherwise block +   *      ...       invoke_instruction +   *       \              / +   *        \            / +   *          Merge block +   *  phi(return_replacement, invoke_instruction) +   */ +  void CreateDiamondPatternForPolymorphicInline(HInstruction* compare, +                                                HInstruction* return_replacement, +                                                HInstruction* invoke_instruction); +    HGraph* const outermost_graph_;    const DexCompilationUnit& outer_compilation_unit_;    const DexCompilationUnit& caller_compilation_unit_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 27015c0ac6..95d0af539d 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1386,7 +1386,38 @@ HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {    }  } -HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { +HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) { +  DCHECK_EQ(cursor->GetBlock(), this); + +  HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), +                                                                    cursor->GetDexPc()); +  new_block->instructions_.first_instruction_ = cursor; +  new_block->instructions_.last_instruction_ = instructions_.last_instruction_; +  instructions_.last_instruction_ = cursor->previous_; +  if (cursor->previous_ == nullptr) { +    instructions_.first_instruction_ = nullptr; +  } else { +    cursor->previous_->next_ = nullptr; +    cursor->previous_ = nullptr; +  } + +  new_block->instructions_.SetBlockOfInstructions(new_block); + +  for (HBasicBlock* successor : GetSuccessors()) { +    new_block->successors_.push_back(successor); +    successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block; +  } +  successors_.clear(); + +  for (HBasicBlock* dominated : GetDominatedBlocks()) { +    dominated->dominator_ = new_block; +    new_block->dominated_blocks_.push_back(dominated); +  } +  dominated_blocks_.clear(); +  return new_block; +} + +HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) {    DCHECK(!cursor->IsControlFlow());    DCHECK_NE(instructions_.last_instruction_, cursor);    DCHECK_EQ(cursor->GetBlock(), this); @@ -1539,6 +1570,20 @@ void HInstructionList::AddAfter(HInstruction* cursor, const HInstructionList& in    }  } +void HInstructionList::AddBefore(HInstruction* cursor, const HInstructionList& instruction_list) { +  DCHECK(Contains(cursor)); +  if (!instruction_list.IsEmpty()) { +    if (cursor == first_instruction_) { +      first_instruction_ = instruction_list.first_instruction_; +    } else { +      cursor->previous_->next_ = instruction_list.first_instruction_; +    } +    instruction_list.last_instruction_->next_ = cursor; +    instruction_list.first_instruction_->previous_ = cursor->previous_; +    cursor->previous_ = instruction_list.last_instruction_; +  } +} +  void HInstructionList::Add(const HInstructionList& instruction_list) {    if (IsEmpty()) {      first_instruction_ = instruction_list.first_instruction_; @@ -1781,18 +1826,6 @@ void HBasicBlock::ReplaceWith(HBasicBlock* other) {    graph_ = nullptr;  } -// Create space in `blocks` for adding `number_of_new_blocks` entries -// starting at location `at`. Blocks after `at` are moved accordingly. -static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks, -                        size_t number_of_new_blocks, -                        size_t after) { -  DCHECK_LT(after, blocks->size()); -  size_t old_size = blocks->size(); -  size_t new_size = old_size + number_of_new_blocks; -  blocks->resize(new_size); -  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end()); -} -  void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {    DCHECK_EQ(block->GetGraph(), this);    DCHECK(block->GetSuccessors().empty()); @@ -1846,7 +1879,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {      DCHECK(!body->IsInLoop());      HInstruction* last = body->GetLastInstruction(); -    invoke->GetBlock()->instructions_.AddAfter(invoke, body->GetInstructions()); +    // Note that we add instructions before the invoke only to simplify polymorphic inlining. +    invoke->GetBlock()->instructions_.AddBefore(invoke, body->GetInstructions());      body->GetInstructions().SetBlockOfInstructions(invoke->GetBlock());      // Replace the invoke with the return value of the inlined graph. @@ -1864,7 +1898,8 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {      // with the second half.      ArenaAllocator* allocator = outer_graph->GetArena();      HBasicBlock* at = invoke->GetBlock(); -    HBasicBlock* to = at->SplitAfter(invoke); +    // Note that we split before the invoke only to simplify polymorphic inlining. +    HBasicBlock* to = at->SplitBeforeForInlining(invoke);      HBasicBlock* first = entry_block_->GetSuccessors()[0];      DCHECK(!first->IsInLoop()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 854854f238..7567510006 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -131,6 +131,7 @@ class HInstructionList : public ValueObject {    void SetBlockOfInstructions(HBasicBlock* block) const;    void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); +  void AddBefore(HInstruction* cursor, const HInstructionList& instruction_list);    void Add(const HInstructionList& instruction_list);    // Return the number of instructions in the list. This is an expensive operation. @@ -618,6 +619,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {    friend class SsaBuilder;           // For caching constants.    friend class SsaLivenessAnalysis;  // For the linear order. +  friend class HInliner;             // For the reverse post order.    ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);    DISALLOW_COPY_AND_ASSIGN(HGraph);  }; @@ -972,12 +974,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {    // loop and try/catch information.    HBasicBlock* SplitBefore(HInstruction* cursor); -  // Split the block into two blocks just after `cursor`. Returns the newly +  // Split the block into two blocks just before `cursor`. Returns the newly    // created block. Note that this method just updates raw block information,    // like predecessors, successors, dominators, and instruction list. It does not    // update the graph, reverse post order, loop information, nor make sure the    // blocks are consistent (for example ending with a control flow instruction). -  HBasicBlock* SplitAfter(HInstruction* cursor); +  HBasicBlock* SplitBeforeForInlining(HInstruction* cursor); + +  // Similar to `SplitBeforeForInlining` but does it after `cursor`. +  HBasicBlock* SplitAfterForInlining(HInstruction* cursor);    // Split catch block into two blocks after the original move-exception bytecode    // instruction, or at the beginning if not present. Returns the newly created, @@ -6099,6 +6104,18 @@ class SwitchTable : public ValueObject {    DISALLOW_COPY_AND_ASSIGN(SwitchTable);  }; +// Create space in `blocks` for adding `number_of_new_blocks` entries +// starting at location `at`. Blocks after `at` are moved accordingly. +inline void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks, +                        size_t number_of_new_blocks, +                        size_t after) { +  DCHECK_LT(after, blocks->size()); +  size_t old_size = blocks->size(); +  size_t new_size = old_size + number_of_new_blocks; +  blocks->resize(new_size); +  std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end()); +} +  }  // namespace art  #endif  // ART_COMPILER_OPTIMIZING_NODES_H_ |