diff options
author | 2016-11-17 09:04:53 -0800 | |
---|---|---|
committer | 2016-12-19 14:26:33 -0800 | |
commit | b0b051ad6c9fab511346882650d5d689f805a980 (patch) | |
tree | fe02f128018f1aa55be5c0425295ae0ef670de2c | |
parent | d54f43ca39dfa92f08c2d760123f185f0f65fb86 (diff) |
CHA guard optimization (elimination/hoisting).
Test: manual by checking the dump-cfg output.
Change-Id: I254e168b9a85d2d3d23e02eea7e129c1bc9ab920
-rw-r--r-- | compiler/Android.bp | 1 | ||||
-rw-r--r-- | compiler/optimizing/cha_guard_optimization.cc | 253 | ||||
-rw-r--r-- | compiler/optimizing/cha_guard_optimization.h | 42 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 16 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 44 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 5 |
7 files changed, 341 insertions, 24 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 2eb6fba674..46f3358af1 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -49,6 +49,7 @@ art_cc_defaults { "optimizing/block_builder.cc", "optimizing/bounds_check_elimination.cc", "optimizing/builder.cc", + "optimizing/cha_guard_optimization.cc", "optimizing/code_generator.cc", "optimizing/code_generator_utils.cc", "optimizing/constant_folding.cc", diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc new file mode 100644 index 0000000000..fe423012ca --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "cha_guard_optimization.h" + +namespace art { + +// Note we can only do CHA guard elimination/motion in a single pass, since +// if a guard is not removed, another guard might be removed due to +// the existence of the first guard. The first guard should not be further +// removed in another pass. For example, due to further optimizations, +// a receiver of a guard might turn out to be a parameter value, or defined at +// a different site, which makes the guard removable as a result. However +// it's not safe to remove the guard in another pass since another guard might +// have been removed due to the existence of this guard. +// +// As a consequence, we decided not to rely on other passes to remove them +// (such as GVN or instruction simplifier). + +class CHAGuardVisitor : HGraphVisitor { + public: + explicit CHAGuardVisitor(HGraph* graph) + : HGraphVisitor(graph), + block_has_cha_guard_(GetGraph()->GetBlocks().size(), + 0, + graph->GetArena()->Adapter(kArenaAllocCHA)) { + number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); + DCHECK_NE(number_of_guards_to_visit_, 0u); + // Will recount number of guards during guard optimization. + GetGraph()->SetNumberOfCHAGuards(0); + } + + void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE; + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + + private: + void RemoveGuard(HShouldDeoptimizeFlag* flag); + // Return true if `flag` is removed. + bool OptimizeForParameter(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is removed. + bool OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + // Return true if `flag` is hoisted. + bool HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver); + + // Record if each block has any CHA guard. It's updated during the + // reverse post order visit. Use int instead of bool since ArenaVector + // does not support bool. + ArenaVector<int> block_has_cha_guard_; + + // The iterator that's being used for this visitor. Need it to manually + // advance the iterator due to removing/moving more than one instruction. + HInstructionIterator* instruction_iterator_; + + // Used to short-circuit the pass when there is no more guards left to visit. + uint32_t number_of_guards_to_visit_; + + DISALLOW_COPY_AND_ASSIGN(CHAGuardVisitor); +}; + +void CHAGuardVisitor::VisitBasicBlock(HBasicBlock* block) { + if (number_of_guards_to_visit_ == 0) { + return; + } + // Skip phis, just iterate through instructions. + HInstructionIterator it(block->GetInstructions()); + instruction_iterator_ = ⁢ + for (; !it.Done(); it.Advance()) { + DCHECK(it.Current()->IsInBlock()); + it.Current()->Accept(this); + } +} + +void CHAGuardVisitor::RemoveGuard(HShouldDeoptimizeFlag* flag) { + HBasicBlock* block = flag->GetBlock(); + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we remove the guard. + // We need to do it twice since we remove three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + block->RemoveInstruction(deopt); + block->RemoveInstruction(compare); + block->RemoveInstruction(flag); +} + +bool CHAGuardVisitor::OptimizeForParameter(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If some compiled code is invalidated by CHA due to class loading, the + // compiled code will not be entered anymore. So the very fact that the + // compiled code is invoked guarantees that a parameter receiver conforms + // to all the CHA devirtualization assumptions made by the compiled code, + // since all parameter receivers pre-exist any (potential) invalidation of + // the compiled code. + // + // TODO: allow more cases such as a phi whose inputs are all parameters. + if (receiver->IsParameterValue()) { + RemoveGuard(flag); + return true; + } + return false; +} + +bool CHAGuardVisitor::OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If there is another guard that dominates the current guard, and + // that guard is dominated by receiver's definition, then the current + // guard can be eliminated, since receiver must pre-exist that other + // guard, and passing that guard guarantees that receiver conforms to + // all the CHA devirtualization assumptions. + HBasicBlock* dominator = flag->GetBlock(); + HBasicBlock* receiver_def_block = receiver->GetBlock(); + + // Complexity of the following algorithm: + // We potentially need to traverse the full dominator chain to receiver_def_block, + // plus a (partial) linear search within one block for each guard. + // So the worst case for each guard is bounded by the size of the + // biggest block plus the depth of the dominating tree. + + while (dominator != receiver_def_block) { + if (block_has_cha_guard_[dominator->GetBlockId()] == 1) { + RemoveGuard(flag); + return true; + } + dominator = dominator->GetDominator(); + } + + // At this point dominator is the block where receiver is defined. + // We do a linear search within dominator to see if there is a guard after + // receiver's definition. + HInstruction* instruction; + if (dominator == flag->GetBlock()) { + // Flag and receiver are defined in the same block. Search backward from + // the current guard. + instruction = flag->GetPrevious(); + } else { + // Search backward from the last instruction of that dominator. + instruction = dominator->GetLastInstruction(); + } + while (instruction != receiver) { + if (instruction == nullptr) { + // receiver must be defined in this block, we didn't find it + // in the instruction list, so it must be a Phi. + DCHECK(receiver->IsPhi()); + break; + } + if (instruction->IsShouldDeoptimizeFlag()) { + RemoveGuard(flag); + return true; + } + instruction = instruction->GetPrevious(); + } + return false; +} + +bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, + HInstruction* receiver) { + // If receiver is loop invariant, we can hoist the guard out of the + // loop since passing a guard before entering the loop guarantees that + // receiver conforms to all the CHA devirtualization assumptions. + // We only hoist guards out of the inner loop since that offers most of the + // benefit and it might help remove other guards in the inner loop. + HBasicBlock* block = flag->GetBlock(); + HLoopInformation* loop_info = block->GetLoopInformation(); + if (loop_info != nullptr && + !loop_info->IsIrreducible() && + loop_info->IsDefinedOutOfTheLoop(receiver)) { + HInstruction* compare = flag->GetNext(); + DCHECK(compare->IsNotEqual()); + HInstruction* deopt = compare->GetNext(); + DCHECK(deopt->IsDeoptimize()); + + // Advance instruction iterator first before we move the guard. + // We need to do it twice since we move three instructions and the + // visitor is responsible for advancing it once. + instruction_iterator_->Advance(); + instruction_iterator_->Advance(); + + HBasicBlock* pre_header = loop_info->GetPreHeader(); + flag->MoveBefore(pre_header->GetLastInstruction()); + compare->MoveBefore(pre_header->GetLastInstruction()); + + block->RemoveInstruction(deopt); + HInstruction* suspend = loop_info->GetSuspendCheck(); + // Need a new deoptimize instruction that copies the environment + // of the suspend instruction for the loop. + HDeoptimize* deoptimize = + new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), loop_info->GetHeader()); + block_has_cha_guard_[pre_header->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); + return true; + } + return false; +} + +void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + number_of_guards_to_visit_--; + HInstruction* receiver = flag->InputAt(0); + // Don't need the receiver anymore. + flag->RemoveInputAt(0); + if (receiver->IsNullCheck()) { + receiver = receiver->InputAt(0); + } + + if (OptimizeForParameter(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (OptimizeWithDominatingGuard(flag, receiver)) { + DCHECK(!flag->IsInBlock()); + return; + } + if (HoistGuard(flag, receiver)) { + DCHECK(flag->IsInBlock()); + return; + } + + // Need to keep the CHA guard in place. + block_has_cha_guard_[flag->GetBlock()->GetBlockId()] = 1; + GetGraph()->IncrementNumberOfCHAGuards(); +} + +void CHAGuardOptimization::Run() { + if (graph_->GetNumberOfCHAGuards() == 0) { + return; + } + CHAGuardVisitor visitor(graph_); + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + visitor.VisitBasicBlock(block); + } +} + +} // namespace art diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h new file mode 100644 index 0000000000..ba0cdb81fd --- /dev/null +++ b/compiler/optimizing/cha_guard_optimization.h @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ + +#include "optimization.h" + +namespace art { + +/** + * Optimize CHA guards by removing/moving them. + */ +class CHAGuardOptimization : public HOptimization { + public: + explicit CHAGuardOptimization(HGraph* graph) + : HOptimization(graph, kCHAGuardOptimizationPassName) {} + + void Run() OVERRIDE; + + static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization"; + + private: + DISALLOW_COPY_AND_ASSIGN(CHAGuardOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_ diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index fe4662abb1..03b06bacf2 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -506,19 +506,25 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, uint32_t dex_pc, HInstruction* cursor, HBasicBlock* bb_cursor) { - HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc); - HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual( + HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena()) + HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); + HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); } else { bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag); - bb_cursor->InsertInstructionAfter(deopt, should_deopt); + bb_cursor->InsertInstructionAfter(compare, deopt_flag); + bb_cursor->InsertInstructionAfter(deopt, compare); + + // Add receiver as input to aid CHA guard optimization later. + deopt_flag->AddInput(invoke_instruction->InputAt(0)); + DCHECK_EQ(deopt_flag->InputCount(), 1u); deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + outermost_graph_->IncrementNumberOfCHAGuards(); } HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cabc0782ca..b9e284f6f8 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1357,7 +1357,9 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& void HInstruction::MoveBefore(HInstruction* cursor) { DCHECK(!IsPhi()); DCHECK(!IsControlFlow()); - DCHECK(CanBeMoved()); + DCHECK(CanBeMoved() || + // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization. + IsShouldDeoptimizeFlag()); DCHECK(!cursor->IsPhi()); next_->previous_ = previous_; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4a77bed44a..6cfaa86b38 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -330,6 +330,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { invoke_type_(invoke_type), in_ssa_form_(false), should_generate_constructor_barrier_(should_generate_constructor_barrier), + number_of_cha_guards_(0), instruction_set_(instruction_set), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), @@ -551,9 +552,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { } bool HasShouldDeoptimizeFlag() const { - // TODO: if all CHA guards can be eliminated, there is no need for the flag - // even if cha_single_implementation_list_ is not empty. - return !cha_single_implementation_list_.empty(); + return number_of_cha_guards_ != 0; } bool HasTryCatch() const { return has_try_catch_; } @@ -572,6 +571,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; } + uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; } + void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; } + void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; } + private: void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); @@ -667,6 +670,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { const bool should_generate_constructor_barrier_; + // Number of CHA guards in the graph. Used to short-circuit the + // CHA guard optimization pass when there is no CHA guard left. + uint32_t number_of_cha_guards_; + const InstructionSet instruction_set_; // Cached constants. @@ -2349,6 +2356,11 @@ class HBackwardInstructionIterator : public ValueObject { class HVariableInputSizeInstruction : public HInstruction { public: + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + return ArrayRef<HUserRecord<HInstruction*>>(inputs_); + } + void AddInput(HInstruction* input); void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); @@ -2489,11 +2501,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction { bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } void SetType(Primitive::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: @@ -2925,14 +2932,20 @@ class HDeoptimize FINAL : public HTemplateInstruction<1> { // if it's true, starts to do deoptimization. // It has a 4-byte slot on stack. // TODO: allocate a register for this flag. -class HShouldDeoptimizeFlag FINAL : public HExpression<0> { +class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { public: - // TODO: use SideEffects to aid eliminating some CHA guards. - explicit HShouldDeoptimizeFlag(uint32_t dex_pc) - : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + // CHA guards are only optimized in a separate pass and it has no side effects + // with regard to other passes. + HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc) + : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) { } - // We don't eliminate CHA guards yet. + Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; } + + // We do all CHA guard elimination/motion in a single pass, after which there is no + // further guard elimination/motion since a guard might have been used for justification + // of the elimination of another guard. Therefore, we pretend this guard cannot be moved + // to avoid other optimizations trying to move it. bool CanBeMoved() const OVERRIDE { return false; } DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); @@ -3816,11 +3829,6 @@ class HInvoke : public HVariableInputSizeInstruction { public: bool NeedsEnvironment() const OVERRIDE; - using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { - return ArrayRef<HUserRecord<HInstruction*>>(inputs_); - } - void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 0d0f62a55c..4bf5b080a7 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -54,6 +54,7 @@ #include "base/timing_logger.h" #include "bounds_check_elimination.h" #include "builder.h" +#include "cha_guard_optimization.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -517,6 +518,8 @@ static HOptimization* BuildOptimization( return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { return new (arena) HLoopOptimization(graph, most_recent_induction); + } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { + return new (arena) CHAGuardOptimization(graph); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); @@ -779,6 +782,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); + CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); HOptimization* optimizations1[] = { intrinsics, @@ -807,6 +811,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold3, // evaluates code generated by dynamic bce simplify3, lse, + cha_guard, dce3, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a |