CHA guard optimization (elimination/hoisting).
Test: manual by checking the dump-cfg output.
Change-Id: I254e168b9a85d2d3d23e02eea7e129c1bc9ab920
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 2eb6fba..46f3358 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -49,6 +49,7 @@
"optimizing/block_builder.cc",
"optimizing/bounds_check_elimination.cc",
"optimizing/builder.cc",
+ "optimizing/cha_guard_optimization.cc",
"optimizing/code_generator.cc",
"optimizing/code_generator_utils.cc",
"optimizing/constant_folding.cc",
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
new file mode 100644
index 0000000..fe42301
--- /dev/null
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha_guard_optimization.h"
+
+namespace art {
+
+// Note we can only do CHA guard elimination/motion in a single pass, since
+// if a guard is not removed, another guard might be removed due to
+// the existence of the first guard. The first guard should not be further
+// removed in another pass. For example, due to further optimizations,
+// a receiver of a guard might turn out to be a parameter value, or defined at
+// a different site, which makes the guard removable as a result. However
+// it's not safe to remove the guard in another pass since another guard might
+// have been removed due to the existence of this guard.
+//
+// As a consequence, we decided not to rely on other passes to remove them
+// (such as GVN or instruction simplifier).
+
+class CHAGuardVisitor : HGraphVisitor {
+ public:
+ explicit CHAGuardVisitor(HGraph* graph)
+ : HGraphVisitor(graph),
+ block_has_cha_guard_(GetGraph()->GetBlocks().size(),
+ 0,
+ graph->GetArena()->Adapter(kArenaAllocCHA)) {
+ number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
+ DCHECK_NE(number_of_guards_to_visit_, 0u);
+ // Will recount number of guards during guard optimization.
+ GetGraph()->SetNumberOfCHAGuards(0);
+ }
+
+ void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE;
+
+ void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+
+ private:
+ void RemoveGuard(HShouldDeoptimizeFlag* flag);
+ // Return true if `flag` is removed.
+ bool OptimizeForParameter(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+ // Return true if `flag` is removed.
+ bool OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+ // Return true if `flag` is hoisted.
+ bool HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* receiver);
+
+ // Record if each block has any CHA guard. It's updated during the
+ // reverse post order visit. Use int instead of bool since ArenaVector
+ // does not support bool.
+ ArenaVector<int> block_has_cha_guard_;
+
+ // The iterator that's being used for this visitor. Need it to manually
+ // advance the iterator due to removing/moving more than one instruction.
+ HInstructionIterator* instruction_iterator_;
+
+ // Used to short-circuit the pass when there is no more guards left to visit.
+ uint32_t number_of_guards_to_visit_;
+
+ DISALLOW_COPY_AND_ASSIGN(CHAGuardVisitor);
+};
+
+void CHAGuardVisitor::VisitBasicBlock(HBasicBlock* block) {
+ if (number_of_guards_to_visit_ == 0) {
+ return;
+ }
+ // Skip phis, just iterate through instructions.
+ HInstructionIterator it(block->GetInstructions());
+ instruction_iterator_ = ⁢
+ for (; !it.Done(); it.Advance()) {
+ DCHECK(it.Current()->IsInBlock());
+ it.Current()->Accept(this);
+ }
+}
+
+void CHAGuardVisitor::RemoveGuard(HShouldDeoptimizeFlag* flag) {
+ HBasicBlock* block = flag->GetBlock();
+ HInstruction* compare = flag->GetNext();
+ DCHECK(compare->IsNotEqual());
+ HInstruction* deopt = compare->GetNext();
+ DCHECK(deopt->IsDeoptimize());
+
+ // Advance instruction iterator first before we remove the guard.
+ // We need to do it twice since we remove three instructions and the
+ // visitor is responsible for advancing it once.
+ instruction_iterator_->Advance();
+ instruction_iterator_->Advance();
+ block->RemoveInstruction(deopt);
+ block->RemoveInstruction(compare);
+ block->RemoveInstruction(flag);
+}
+
+bool CHAGuardVisitor::OptimizeForParameter(HShouldDeoptimizeFlag* flag,
+ HInstruction* receiver) {
+ // If some compiled code is invalidated by CHA due to class loading, the
+ // compiled code will not be entered anymore. So the very fact that the
+ // compiled code is invoked guarantees that a parameter receiver conforms
+ // to all the CHA devirtualization assumptions made by the compiled code,
+ // since all parameter receivers pre-exist any (potential) invalidation of
+ // the compiled code.
+ //
+ // TODO: allow more cases such as a phi whose inputs are all parameters.
+ if (receiver->IsParameterValue()) {
+ RemoveGuard(flag);
+ return true;
+ }
+ return false;
+}
+
+bool CHAGuardVisitor::OptimizeWithDominatingGuard(HShouldDeoptimizeFlag* flag,
+ HInstruction* receiver) {
+ // If there is another guard that dominates the current guard, and
+ // that guard is dominated by receiver's definition, then the current
+ // guard can be eliminated, since receiver must pre-exist that other
+ // guard, and passing that guard guarantees that receiver conforms to
+ // all the CHA devirtualization assumptions.
+ HBasicBlock* dominator = flag->GetBlock();
+ HBasicBlock* receiver_def_block = receiver->GetBlock();
+
+ // Complexity of the following algorithm:
+ // We potentially need to traverse the full dominator chain to receiver_def_block,
+ // plus a (partial) linear search within one block for each guard.
+ // So the worst case for each guard is bounded by the size of the
+ // biggest block plus the depth of the dominating tree.
+
+ while (dominator != receiver_def_block) {
+ if (block_has_cha_guard_[dominator->GetBlockId()] == 1) {
+ RemoveGuard(flag);
+ return true;
+ }
+ dominator = dominator->GetDominator();
+ }
+
+ // At this point dominator is the block where receiver is defined.
+ // We do a linear search within dominator to see if there is a guard after
+ // receiver's definition.
+ HInstruction* instruction;
+ if (dominator == flag->GetBlock()) {
+ // Flag and receiver are defined in the same block. Search backward from
+ // the current guard.
+ instruction = flag->GetPrevious();
+ } else {
+ // Search backward from the last instruction of that dominator.
+ instruction = dominator->GetLastInstruction();
+ }
+ while (instruction != receiver) {
+ if (instruction == nullptr) {
+ // receiver must be defined in this block, we didn't find it
+ // in the instruction list, so it must be a Phi.
+ DCHECK(receiver->IsPhi());
+ break;
+ }
+ if (instruction->IsShouldDeoptimizeFlag()) {
+ RemoveGuard(flag);
+ return true;
+ }
+ instruction = instruction->GetPrevious();
+ }
+ return false;
+}
+
+bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
+ HInstruction* receiver) {
+ // If receiver is loop invariant, we can hoist the guard out of the
+ // loop since passing a guard before entering the loop guarantees that
+ // receiver conforms to all the CHA devirtualization assumptions.
+ // We only hoist guards out of the inner loop since that offers most of the
+ // benefit and it might help remove other guards in the inner loop.
+ HBasicBlock* block = flag->GetBlock();
+ HLoopInformation* loop_info = block->GetLoopInformation();
+ if (loop_info != nullptr &&
+ !loop_info->IsIrreducible() &&
+ loop_info->IsDefinedOutOfTheLoop(receiver)) {
+ HInstruction* compare = flag->GetNext();
+ DCHECK(compare->IsNotEqual());
+ HInstruction* deopt = compare->GetNext();
+ DCHECK(deopt->IsDeoptimize());
+
+ // Advance instruction iterator first before we move the guard.
+ // We need to do it twice since we move three instructions and the
+ // visitor is responsible for advancing it once.
+ instruction_iterator_->Advance();
+ instruction_iterator_->Advance();
+
+ HBasicBlock* pre_header = loop_info->GetPreHeader();
+ flag->MoveBefore(pre_header->GetLastInstruction());
+ compare->MoveBefore(pre_header->GetLastInstruction());
+
+ block->RemoveInstruction(deopt);
+ HInstruction* suspend = loop_info->GetSuspendCheck();
+ // Need a new deoptimize instruction that copies the environment
+ // of the suspend instruction for the loop.
+ HDeoptimize* deoptimize =
+ new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+ pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+ deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend->GetEnvironment(), loop_info->GetHeader());
+ block_has_cha_guard_[pre_header->GetBlockId()] = 1;
+ GetGraph()->IncrementNumberOfCHAGuards();
+ return true;
+ }
+ return false;
+}
+
+void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+ number_of_guards_to_visit_--;
+ HInstruction* receiver = flag->InputAt(0);
+ // Don't need the receiver anymore.
+ flag->RemoveInputAt(0);
+ if (receiver->IsNullCheck()) {
+ receiver = receiver->InputAt(0);
+ }
+
+ if (OptimizeForParameter(flag, receiver)) {
+ DCHECK(!flag->IsInBlock());
+ return;
+ }
+ if (OptimizeWithDominatingGuard(flag, receiver)) {
+ DCHECK(!flag->IsInBlock());
+ return;
+ }
+ if (HoistGuard(flag, receiver)) {
+ DCHECK(flag->IsInBlock());
+ return;
+ }
+
+ // Need to keep the CHA guard in place.
+ block_has_cha_guard_[flag->GetBlock()->GetBlockId()] = 1;
+ GetGraph()->IncrementNumberOfCHAGuards();
+}
+
+void CHAGuardOptimization::Run() {
+ if (graph_->GetNumberOfCHAGuards() == 0) {
+ return;
+ }
+ CHAGuardVisitor visitor(graph_);
+ for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+ visitor.VisitBasicBlock(block);
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h
new file mode 100644
index 0000000..ba0cdb8
--- /dev/null
+++ b/compiler/optimizing/cha_guard_optimization.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+#define ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
+
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimize CHA guards by removing/moving them.
+ */
+class CHAGuardOptimization : public HOptimization {
+ public:
+ explicit CHAGuardOptimization(HGraph* graph)
+ : HOptimization(graph, kCHAGuardOptimizationPassName) {}
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization";
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CHAGuardOptimization);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CHA_GUARD_OPTIMIZATION_H_
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index fe4662a..03b06ba 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -506,19 +506,25 @@
uint32_t dex_pc,
HInstruction* cursor,
HBasicBlock* bb_cursor) {
- HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc);
- HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual(
+ HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena())
+ HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
+ HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
- HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc);
+ HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
} else {
bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
}
- bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag);
- bb_cursor->InsertInstructionAfter(deopt, should_deopt);
+ bb_cursor->InsertInstructionAfter(compare, deopt_flag);
+ bb_cursor->InsertInstructionAfter(deopt, compare);
+
+ // Add receiver as input to aid CHA guard optimization later.
+ deopt_flag->AddInput(invoke_instruction->InputAt(0));
+ DCHECK_EQ(deopt_flag->InputCount(), 1u);
deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+ outermost_graph_->IncrementNumberOfCHAGuards();
}
HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cabc078..b9e284f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1357,7 +1357,9 @@
void HInstruction::MoveBefore(HInstruction* cursor) {
DCHECK(!IsPhi());
DCHECK(!IsControlFlow());
- DCHECK(CanBeMoved());
+ DCHECK(CanBeMoved() ||
+ // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
+ IsShouldDeoptimizeFlag());
DCHECK(!cursor->IsPhi());
next_->previous_ = previous_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 4a77bed..6cfaa86 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -330,6 +330,7 @@
invoke_type_(invoke_type),
in_ssa_form_(false),
should_generate_constructor_barrier_(should_generate_constructor_barrier),
+ number_of_cha_guards_(0),
instruction_set_(instruction_set),
cached_null_constant_(nullptr),
cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)),
@@ -551,9 +552,7 @@
}
bool HasShouldDeoptimizeFlag() const {
- // TODO: if all CHA guards can be eliminated, there is no need for the flag
- // even if cha_single_implementation_list_ is not empty.
- return !cha_single_implementation_list_.empty();
+ return number_of_cha_guards_ != 0;
}
bool HasTryCatch() const { return has_try_catch_; }
@@ -572,6 +571,10 @@
ReferenceTypeInfo GetInexactObjectRti() const { return inexact_object_rti_; }
+ uint32_t GetNumberOfCHAGuards() { return number_of_cha_guards_; }
+ void SetNumberOfCHAGuards(uint32_t num) { number_of_cha_guards_ = num; }
+ void IncrementNumberOfCHAGuards() { number_of_cha_guards_++; }
+
private:
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited);
@@ -667,6 +670,10 @@
const bool should_generate_constructor_barrier_;
+ // Number of CHA guards in the graph. Used to short-circuit the
+ // CHA guard optimization pass when there is no CHA guard left.
+ uint32_t number_of_cha_guards_;
+
const InstructionSet instruction_set_;
// Cached constants.
@@ -2349,6 +2356,11 @@
class HVariableInputSizeInstruction : public HInstruction {
public:
+ using HInstruction::GetInputRecords; // Keep the const version visible.
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+ return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
+ }
+
void AddInput(HInstruction* input);
void InsertInputAt(size_t index, HInstruction* input);
void RemoveInputAt(size_t index);
@@ -2489,11 +2501,6 @@
bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
- using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
- return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
- }
-
Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
void SetType(Primitive::Type new_type) {
// Make sure that only valid type changes occur. The following are allowed:
@@ -2925,14 +2932,20 @@
// if it's true, starts to do deoptimization.
// It has a 4-byte slot on stack.
// TODO: allocate a register for this flag.
-class HShouldDeoptimizeFlag FINAL : public HExpression<0> {
+class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction {
public:
- // TODO: use SideEffects to aid eliminating some CHA guards.
- explicit HShouldDeoptimizeFlag(uint32_t dex_pc)
- : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+ // CHA guards are only optimized in a separate pass and it has no side effects
+ // with regard to other passes.
+ HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc)
+ : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) {
}
- // We don't eliminate CHA guards yet.
+ Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; }
+
+ // We do all CHA guard elimination/motion in a single pass, after which there is no
+ // further guard elimination/motion since a guard might have been used for justification
+ // of the elimination of another guard. Therefore, we pretend this guard cannot be moved
+ // to avoid other optimizations trying to move it.
bool CanBeMoved() const OVERRIDE { return false; }
DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
@@ -3816,11 +3829,6 @@
public:
bool NeedsEnvironment() const OVERRIDE;
- using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
- return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
- }
-
void SetArgumentAt(size_t index, HInstruction* argument) {
SetRawInputAt(index, argument);
}
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 0d0f62a..4bf5b08 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -54,6 +54,7 @@
#include "base/timing_logger.h"
#include "bounds_check_elimination.h"
#include "builder.h"
+#include "cha_guard_optimization.h"
#include "code_generator.h"
#include "compiled_method.h"
#include "compiler.h"
@@ -517,6 +518,8 @@
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
return new (arena) HLoopOptimization(graph, most_recent_induction);
+ } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
+ return new (arena) CHAGuardOptimization(graph);
#ifdef ART_ENABLE_CODEGEN_arm
} else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -779,6 +782,7 @@
InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
graph, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
+ CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
HOptimization* optimizations1[] = {
intrinsics,
@@ -807,6 +811,7 @@
fold3, // evaluates code generated by dynamic bce
simplify3,
lse,
+ cha_guard,
dce3,
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a