summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/boolean_simplifier.cc121
-rw-r--r--compiler/optimizing/boolean_simplifier.h17
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc465
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc18
-rw-r--r--compiler/optimizing/builder.cc92
-rw-r--r--compiler/optimizing/builder.h5
-rw-r--r--compiler/optimizing/code_generator.cc27
-rw-r--r--compiler/optimizing/code_generator.h44
-rw-r--r--compiler/optimizing/code_generator_arm.cc101
-rw-r--r--compiler/optimizing/code_generator_arm.h23
-rw-r--r--compiler/optimizing/code_generator_arm64.cc157
-rw-r--r--compiler/optimizing/code_generator_arm64.h27
-rw-r--r--compiler/optimizing/code_generator_x86.cc218
-rw-r--r--compiler/optimizing/code_generator_x86.h28
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc167
-rw-r--r--compiler/optimizing/code_generator_x86_64.h30
-rw-r--r--compiler/optimizing/codegen_test.cc10
-rw-r--r--compiler/optimizing/dead_code_elimination.cc27
-rw-r--r--compiler/optimizing/dead_code_elimination.h6
-rw-r--r--compiler/optimizing/dominator_test.cc2
-rw-r--r--compiler/optimizing/find_loops_test.cc11
-rw-r--r--compiler/optimizing/graph_checker.cc85
-rw-r--r--compiler/optimizing/graph_checker_test.cc2
-rw-r--r--compiler/optimizing/graph_test.cc14
-rw-r--r--compiler/optimizing/graph_visualizer.cc27
-rw-r--r--compiler/optimizing/gvn_test.cc8
-rw-r--r--compiler/optimizing/inliner.cc27
-rw-r--r--compiler/optimizing/inliner.h2
-rw-r--r--compiler/optimizing/instruction_simplifier.cc59
-rw-r--r--compiler/optimizing/intrinsics.cc15
-rw-r--r--compiler/optimizing/intrinsics.h34
-rw-r--r--compiler/optimizing/intrinsics_arm.cc210
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc211
-rw-r--r--compiler/optimizing/intrinsics_list.h4
-rw-r--r--compiler/optimizing/intrinsics_x86.cc287
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc275
-rw-r--r--compiler/optimizing/licm.cc21
-rw-r--r--compiler/optimizing/linearize_test.cc2
-rw-r--r--compiler/optimizing/live_ranges_test.cc2
-rw-r--r--compiler/optimizing/liveness_test.cc60
-rw-r--r--compiler/optimizing/locations.cc6
-rw-r--r--compiler/optimizing/locations.h9
-rw-r--r--compiler/optimizing/nodes.cc227
-rw-r--r--compiler/optimizing/nodes.h325
-rw-r--r--compiler/optimizing/nodes_test.cc57
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc7
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc47
-rw-r--r--compiler/optimizing/optimizing_compiler.cc13
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h44
-rw-r--r--compiler/optimizing/optimizing_unit_test.h8
-rw-r--r--compiler/optimizing/parallel_move_resolver.h1
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc2
-rw-r--r--compiler/optimizing/pretty_printer_test.cc2
-rw-r--r--compiler/optimizing/reference_type_propagation.cc2
-rw-r--r--compiler/optimizing/register_allocator.cc74
-rw-r--r--compiler/optimizing/register_allocator.h1
-rw-r--r--compiler/optimizing/register_allocator_test.cc16
-rw-r--r--compiler/optimizing/ssa_builder.cc20
-rw-r--r--compiler/optimizing/ssa_builder.h17
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc43
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h240
-rw-r--r--compiler/optimizing/ssa_test.cc20
-rw-r--r--compiler/optimizing/suspend_check_test.cc2
63 files changed, 3108 insertions, 1016 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index 30c89f2d15..8100a29f32 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -18,6 +18,26 @@
namespace art {
+void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) {
+ DCHECK(block->EndsWithIf());
+
+ // Check if the condition is a Boolean negation.
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HInstruction* boolean_not = if_instruction->InputAt(0);
+ if (!boolean_not->IsBooleanNot()) {
+ return;
+ }
+
+ // Make BooleanNot's input the condition of the If and swap branches.
+ if_instruction->ReplaceInput(boolean_not->InputAt(0), 0);
+ block->SwapSuccessors();
+
+ // Remove the BooleanNot if it is now unused.
+ if (!boolean_not->HasUses()) {
+ boolean_not->GetBlock()->RemoveInstruction(boolean_not);
+ }
+}
+
// Returns true if 'block1' and 'block2' are empty, merge into the same single
// successor and the successor can only be reached from them.
static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
@@ -78,58 +98,69 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) {
}
}
+void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
+ DCHECK(block->EndsWithIf());
+
+ // Find elements of the pattern.
+ HIf* if_instruction = block->GetLastInstruction()->AsIf();
+ HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
+ HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
+ if (!BlocksDoMergeTogether(true_block, false_block)) {
+ return;
+ }
+ HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
+ if (!merge_block->HasSinglePhi()) {
+ return;
+ }
+ HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
+ HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
+ HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
+
+ // Check if the selection negates/preserves the value of the condition and
+ // if so, generate a suitable replacement instruction.
+ HInstruction* if_condition = if_instruction->InputAt(0);
+ HInstruction* replacement;
+ if (NegatesCondition(true_value, false_value)) {
+ replacement = GetOppositeCondition(if_condition);
+ if (replacement->GetBlock() == nullptr) {
+ block->InsertInstructionBefore(replacement, if_instruction);
+ }
+ } else if (PreservesCondition(true_value, false_value)) {
+ replacement = if_condition;
+ } else {
+ return;
+ }
+
+ // Replace the selection outcome with the new instruction.
+ phi->ReplaceWith(replacement);
+ merge_block->RemovePhi(phi);
+
+ // Delete the true branch and merge the resulting chain of blocks
+ // 'block->false_block->merge_block' into one.
+ true_block->DisconnectAndDelete();
+ block->MergeWith(false_block);
+ block->MergeWith(merge_block);
+
+ // Remove the original condition if it is now unused.
+ if (!if_condition->HasUses()) {
+ if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition);
+ }
+}
+
void HBooleanSimplifier::Run() {
// Iterate in post order in the unlikely case that removing one occurrence of
- // the pattern empties a branch block of another occurrence. Otherwise the
- // order does not matter.
+ // the selection pattern empties a branch block of another occurrence.
+ // Otherwise the order does not matter.
for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
if (!block->EndsWithIf()) continue;
- // Find elements of the pattern.
- HIf* if_instruction = block->GetLastInstruction()->AsIf();
- HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
- HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
- if (!BlocksDoMergeTogether(true_block, false_block)) {
- continue;
- }
- HBasicBlock* merge_block = true_block->GetSuccessors().Get(0);
- if (!merge_block->HasSinglePhi()) {
- continue;
- }
- HPhi* phi = merge_block->GetFirstPhi()->AsPhi();
- HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block));
- HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block));
-
- // Check if the selection negates/preserves the value of the condition and
- // if so, generate a suitable replacement instruction.
- HInstruction* if_condition = if_instruction->InputAt(0);
- HInstruction* replacement;
- if (NegatesCondition(true_value, false_value)) {
- replacement = GetOppositeCondition(if_condition);
- if (replacement->GetBlock() == nullptr) {
- block->InsertInstructionBefore(replacement, if_instruction);
- }
- } else if (PreservesCondition(true_value, false_value)) {
- replacement = if_condition;
- } else {
- continue;
- }
+ // If condition is negated, remove the negation and swap the branches.
+ TryRemovingNegatedCondition(block);
- // Replace the selection outcome with the new instruction.
- phi->ReplaceWith(replacement);
- merge_block->RemovePhi(phi);
-
- // Delete the true branch and merge the resulting chain of blocks
- // 'block->false_block->merge_block' into one.
- true_block->DisconnectAndDelete();
- block->MergeWith(false_block);
- block->MergeWith(merge_block);
-
- // Remove the original condition if it is now unused.
- if (!if_condition->HasUses()) {
- if_condition->GetBlock()->RemoveInstruction(if_condition);
- }
+ // If this is a boolean-selection diamond pattern, replace its result with
+ // the condition value (or its negation) and simplify the graph.
+ TryRemovingBooleanSelection(block);
}
}
diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h
index a88733e1af..733ebaac2c 100644
--- a/compiler/optimizing/boolean_simplifier.h
+++ b/compiler/optimizing/boolean_simplifier.h
@@ -14,11 +14,15 @@
* limitations under the License.
*/
-// This optimization recognizes a common pattern where a boolean value is
-// either cast to an integer or negated by selecting from zero/one integer
-// constants with an If statement. Because boolean values are internally
-// represented as zero/one, we can safely replace the pattern with a suitable
-// condition instruction.
+// This optimization recognizes two common patterns:
+// (a) Boolean selection: Casting a boolean to an integer or negating it is
+// carried out with an If statement selecting from zero/one integer
+// constants. Because Boolean values are represented as zero/one, the
+// pattern can be replaced with the condition instruction itself or its
+// negation, depending on the layout.
+// (b) Negated condition: Instruction simplifier may replace an If's condition
+// with a boolean value. If this value is the result of a Boolean negation,
+// the true/false branches can be swapped and negation removed.
// Example: Negating a boolean value
// B1:
@@ -66,6 +70,9 @@ class HBooleanSimplifier : public HOptimization {
static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier";
private:
+ void TryRemovingNegatedCondition(HBasicBlock* block);
+ void TryRemovingBooleanSelection(HBasicBlock* block);
+
DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier);
};
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 3645f19f09..b2b54965b5 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -246,6 +246,148 @@ class ValueBound : public ValueObject {
int32_t constant_;
};
+// Collect array access data for a loop.
+// TODO: make it work for multiple arrays inside the loop.
+class ArrayAccessInsideLoopFinder : public ValueObject {
+ public:
+ explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
+ : induction_variable_(induction_variable),
+ found_array_length_(nullptr),
+ offset_low_(INT_MAX),
+ offset_high_(INT_MIN) {
+ Run();
+ }
+
+ HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
+ bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
+ int32_t GetOffsetLow() const { return offset_low_; }
+ int32_t GetOffsetHigh() const { return offset_high_; }
+
+ // Returns if `block` that is in loop_info may exit the loop, unless it's
+ // the loop header for loop_info.
+ static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
+ DCHECK(loop_info->Contains(*block));
+ if (block == loop_info->GetHeader()) {
+ // Loop header of loop_info. Exiting loop is normal.
+ return false;
+ }
+ const GrowableArray<HBasicBlock*> successors = block->GetSuccessors();
+ for (size_t i = 0; i < successors.Size(); i++) {
+ if (!loop_info->Contains(*successors.Get(i))) {
+ // One of the successors exits the loop.
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
+ for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) {
+ HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i);
+ if (!block->Dominates(back_edge)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ void Run() {
+ HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
+ for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) {
+ HBasicBlock* block = it_loop.Current();
+ DCHECK(block->IsInLoop());
+ if (!DominatesAllBackEdges(block, loop_info)) {
+ // In order not to trigger deoptimization unnecessarily, make sure
+ // that all array accesses collected are really executed in the loop.
+ // For array accesses in a branch inside the loop, don't collect the
+ // access. The bounds check in that branch might not be eliminated.
+ continue;
+ }
+ if (EarlyExit(block, loop_info)) {
+ // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
+ // that the loop will loop through the full monotonic value range from
+ // initial_ to end_. So adding deoptimization might be too aggressive and can
+ // trigger deoptimization unnecessarily even if the loop won't actually throw
+ // AIOOBE. Otherwise, the loop induction variable is going to cover the full
+ // monotonic value range from initial_ to end_, and deoptimizations are added
+ // iff the loop will throw AIOOBE.
+ found_array_length_ = nullptr;
+ return;
+ }
+ for (HInstruction* instruction = block->GetFirstInstruction();
+ instruction != nullptr;
+ instruction = instruction->GetNext()) {
+ if (!instruction->IsArrayGet() && !instruction->IsArraySet()) {
+ continue;
+ }
+ HInstruction* index = instruction->InputAt(1);
+ if (!index->IsBoundsCheck()) {
+ continue;
+ }
+
+ HArrayLength* array_length = index->InputAt(1)->AsArrayLength();
+ if (array_length == nullptr) {
+ DCHECK(index->InputAt(1)->IsIntConstant());
+ // TODO: may optimize for constant case.
+ continue;
+ }
+
+ HInstruction* array = array_length->InputAt(0);
+ if (array->IsNullCheck()) {
+ array = array->AsNullCheck()->InputAt(0);
+ }
+ if (loop_info->Contains(*array->GetBlock())) {
+ // Array is defined inside the loop. Skip.
+ continue;
+ }
+
+ if (found_array_length_ != nullptr && found_array_length_ != array_length) {
+ // There is already access for another array recorded for the loop.
+ // TODO: handle multiple arrays.
+ continue;
+ }
+
+ index = index->AsBoundsCheck()->InputAt(0);
+ HInstruction* left = index;
+ int32_t right = 0;
+ if (left == induction_variable_ ||
+ (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
+ left == induction_variable_)) {
+ // For patterns like array[i] or array[i + 2].
+ if (right < offset_low_) {
+ offset_low_ = right;
+ }
+ if (right > offset_high_) {
+ offset_high_ = right;
+ }
+ } else {
+ // Access not in induction_variable/(induction_variable_ + constant)
+ // format. Skip.
+ continue;
+ }
+ // Record this array.
+ found_array_length_ = array_length;
+ }
+ }
+ }
+
+ private:
+ // The instruction that corresponds to a MonotonicValueRange.
+ HInstruction* induction_variable_;
+
+ // The array length of the array that's accessed inside the loop.
+ HArrayLength* found_array_length_;
+
+ // The lowest and highest constant offsets relative to induction variable
+ // instruction_ in all array accesses.
+ // If array access are: array[i-1], array[i], array[i+1],
+ // offset_low_ is -1 and offset_high is 1.
+ int32_t offset_low_;
+ int32_t offset_high_;
+
+ DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
+};
+
/**
* Represent a range of lower bound and upper bound, both being inclusive.
* Currently a ValueRange may be generated as a result of the following:
@@ -332,21 +474,31 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
class MonotonicValueRange : public ValueRange {
public:
MonotonicValueRange(ArenaAllocator* allocator,
+ HPhi* induction_variable,
HInstruction* initial,
int32_t increment,
ValueBound bound)
// To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
// used as a regular value range, due to possible overflow/underflow.
: ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
+ induction_variable_(induction_variable),
initial_(initial),
+ end_(nullptr),
+ inclusive_(false),
increment_(increment),
bound_(bound) {}
virtual ~MonotonicValueRange() {}
+ HInstruction* GetInductionVariable() const { return induction_variable_; }
int32_t GetIncrement() const { return increment_; }
-
ValueBound GetBound() const { return bound_; }
+ void SetEnd(HInstruction* end) { end_ = end; }
+ void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
+ HBasicBlock* GetLoopHead() const {
+ DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
+ return induction_variable_->GetBlock();
+ }
MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
@@ -371,6 +523,10 @@ class MonotonicValueRange : public ValueRange {
if (increment_ > 0) {
// Monotonically increasing.
ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower());
+ if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) {
+ // Lower bound isn't useful. Leave it to deoptimization.
+ return this;
+ }
// We currently conservatively assume max array length is INT_MAX. If we can
// make assumptions about the max array length, e.g. due to the max heap size,
@@ -417,6 +573,11 @@ class MonotonicValueRange : public ValueRange {
DCHECK_NE(increment_, 0);
// Monotonically decreasing.
ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
+ if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) &&
+ !upper.IsRelatedToArrayLength()) {
+ // Upper bound isn't useful. Leave it to deoptimization.
+ return this;
+ }
// Need to take care of underflow. Try to prove underflow won't happen
// for common cases.
@@ -432,10 +593,217 @@ class MonotonicValueRange : public ValueRange {
}
}
+ // Returns true if adding a (constant >= value) check for deoptimization
+ // is allowed and will benefit compiled code.
+ bool CanAddDeoptimizationConstant(HInstruction* value,
+ int32_t constant,
+ bool* is_proven) {
+ *is_proven = false;
+ // See if we can prove the relationship first.
+ if (value->IsIntConstant()) {
+ if (value->AsIntConstant()->GetValue() >= constant) {
+ // Already true.
+ *is_proven = true;
+ return true;
+ } else {
+ // May throw exception. Don't add deoptimization.
+ // Keep bounds checks in the loops.
+ return false;
+ }
+ }
+ // Can benefit from deoptimization.
+ return true;
+ }
+
+ // Adds a check that (value >= constant), and HDeoptimize otherwise.
+ void AddDeoptimizationConstant(HInstruction* value,
+ int32_t constant) {
+ HBasicBlock* block = induction_variable_->GetBlock();
+ DCHECK(block->IsLoopHeader());
+ HGraph* graph = block->GetGraph();
+ HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+ HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+ HIntConstant* const_instr = graph->GetIntConstant(constant);
+ HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
+ HDeoptimize* deoptimize = new (graph->GetArena())
+ HDeoptimize(cond, suspend_check->GetDexPc());
+ pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+ pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+ deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend_check->GetEnvironment(), block);
+ }
+
+ // Returns true if adding a (value <= array_length + offset) check for deoptimization
+ // is allowed and will benefit compiled code.
+ bool CanAddDeoptimizationArrayLength(HInstruction* value,
+ HArrayLength* array_length,
+ int32_t offset,
+ bool* is_proven) {
+ *is_proven = false;
+ if (offset > 0) {
+ // There might be overflow issue.
+ // TODO: handle this, possibly with some distance relationship between
+ // offset_low and offset_high, or using another deoptimization to make
+ // sure (array_length + offset) doesn't overflow.
+ return false;
+ }
+
+ // See if we can prove the relationship first.
+ if (value == array_length) {
+ if (offset >= 0) {
+ // Already true.
+ *is_proven = true;
+ return true;
+ } else {
+ // May throw exception. Don't add deoptimization.
+ // Keep bounds checks in the loops.
+ return false;
+ }
+ }
+ // Can benefit from deoptimization.
+ return true;
+ }
+
+ // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
+ void AddDeoptimizationArrayLength(HInstruction* value,
+ HArrayLength* array_length,
+ int32_t offset) {
+ HBasicBlock* block = induction_variable_->GetBlock();
+ DCHECK(block->IsLoopHeader());
+ HGraph* graph = block->GetGraph();
+ HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+ HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
+
+ // We may need to hoist null-check and array_length out of loop first.
+ if (!array_length->GetBlock()->Dominates(pre_header)) {
+ HInstruction* array = array_length->InputAt(0);
+ HNullCheck* null_check = array->AsNullCheck();
+ if (null_check != nullptr) {
+ array = null_check->InputAt(0);
+ }
+ // We've already made sure array is defined before the loop when collecting
+ // array accesses for the loop.
+ DCHECK(array->GetBlock()->Dominates(pre_header));
+ if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) {
+ // Hoist null check out of loop with a deoptimization.
+ HNullConstant* null_constant = graph->GetNullConstant();
+ HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
+ // TODO: for one dex_pc, share the same deoptimization slow path.
+ HDeoptimize* null_check_deoptimize = new (graph->GetArena())
+ HDeoptimize(null_check_cond, suspend_check->GetDexPc());
+ pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction());
+ pre_header->InsertInstructionBefore(
+ null_check_deoptimize, pre_header->GetLastInstruction());
+ // Eliminate null check in the loop.
+ null_check->ReplaceWith(array);
+ null_check->GetBlock()->RemoveInstruction(null_check);
+ null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend_check->GetEnvironment(), block);
+ }
+ // Hoist array_length out of loop.
+ array_length->MoveBefore(pre_header->GetLastInstruction());
+ }
+
+ HIntConstant* offset_instr = graph->GetIntConstant(offset);
+ HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
+ HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add);
+ HDeoptimize* deoptimize = new (graph->GetArena())
+ HDeoptimize(cond, suspend_check->GetDexPc());
+ pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction());
+ pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction());
+ pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
+ deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend_check->GetEnvironment(), block);
+ }
+
+ // Add deoptimizations in loop pre-header with the collected array access
+ // data so that value ranges can be established in loop body.
+ // Returns true if deoptimizations are successfully added, or if it's proven
+ // it's not necessary.
+ bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
+ int32_t offset_low = finder.GetOffsetLow();
+ int32_t offset_high = finder.GetOffsetHigh();
+ HArrayLength* array_length = finder.GetFoundArrayLength();
+
+ HBasicBlock* pre_header =
+ induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
+ if (!initial_->GetBlock()->Dominates(pre_header) ||
+ !end_->GetBlock()->Dominates(pre_header)) {
+ // Can't move initial_ or end_ into pre_header for comparisons.
+ return false;
+ }
+
+ bool is_constant_proven, is_length_proven;
+ if (increment_ == 1) {
+ // Increasing from initial_ to end_.
+ int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high;
+ if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) &&
+ CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) {
+ if (!is_constant_proven) {
+ AddDeoptimizationConstant(initial_, -offset_low);
+ }
+ if (!is_length_proven) {
+ AddDeoptimizationArrayLength(end_, array_length, offset);
+ }
+ return true;
+ }
+ } else if (increment_ == -1) {
+ // Decreasing from initial_ to end_.
+ int32_t constant = inclusive_ ? -offset_low : -offset_low - 1;
+ if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) &&
+ CanAddDeoptimizationArrayLength(
+ initial_, array_length, -offset_high - 1, &is_length_proven)) {
+ if (!is_constant_proven) {
+ AddDeoptimizationConstant(end_, constant);
+ }
+ if (!is_length_proven) {
+ AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1);
+ }
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
+ ValueRange* NarrowWithDeoptimization() {
+ if (increment_ != 1 && increment_ != -1) {
+ // TODO: possibly handle overflow/underflow issues with deoptimization.
+ return this;
+ }
+
+ if (end_ == nullptr) {
+ // No full info to add deoptimization.
+ return this;
+ }
+
+ ArrayAccessInsideLoopFinder finder(induction_variable_);
+
+ if (!finder.HasFoundArrayLength()) {
+ // No array access was found inside the loop that can benefit
+ // from deoptimization.
+ return this;
+ }
+
+ if (!AddDeoptimization(finder)) {
+ return this;
+ }
+
+ // After added deoptimizations, induction variable fits in
+ // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
+ ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
+ ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
+ // We've narrowed the range after added deoptimizations.
+ return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
+ }
+
private:
- HInstruction* const initial_;
- const int32_t increment_;
- ValueBound bound_; // Additional value bound info for initial_;
+ HPhi* const induction_variable_; // Induction variable for this monotonic value range.
+ HInstruction* const initial_; // Initial value.
+ HInstruction* end_; // End value.
+ bool inclusive_; // Whether end value is inclusive.
+ const int32_t increment_; // Increment for each loop iteration.
+ const ValueBound bound_; // Additional value bound info for initial_.
DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
};
@@ -598,6 +966,20 @@ class BCEVisitor : public HGraphVisitor {
// There should be no critical edge at this point.
DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u);
+ ValueRange* left_range = LookupValueRange(left, block);
+ MonotonicValueRange* left_monotonic_range = nullptr;
+ if (left_range != nullptr) {
+ left_monotonic_range = left_range->AsMonotonicValueRange();
+ if (left_monotonic_range != nullptr) {
+ HBasicBlock* loop_head = left_monotonic_range->GetLoopHead();
+ if (instruction->GetBlock() != loop_head) {
+ // For monotonic value range, don't handle `instruction`
+ // if it's not defined in the loop header.
+ return;
+ }
+ }
+ }
+
bool found;
ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
// Each comparison can establish a lower bound and an upper bound
@@ -610,7 +992,6 @@ class BCEVisitor : public HGraphVisitor {
ValueRange* right_range = LookupValueRange(right, block);
if (right_range != nullptr) {
if (right_range->IsMonotonicValueRange()) {
- ValueRange* left_range = LookupValueRange(left, block);
if (left_range != nullptr && left_range->IsMonotonicValueRange()) {
HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond,
left_range->AsMonotonicValueRange(),
@@ -628,6 +1009,17 @@ class BCEVisitor : public HGraphVisitor {
bool overflow, underflow;
if (cond == kCondLT || cond == kCondLE) {
+ if (left_monotonic_range != nullptr) {
+ // Update the info for monotonic value range.
+ if (left_monotonic_range->GetInductionVariable() == left &&
+ left_monotonic_range->GetIncrement() < 0 &&
+ block == left_monotonic_range->GetLoopHead() &&
+ instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+ left_monotonic_range->SetEnd(right);
+ left_monotonic_range->SetInclusive(cond == kCondLT);
+ }
+ }
+
if (!upper.Equals(ValueBound::Max())) {
int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -651,6 +1043,17 @@ class BCEVisitor : public HGraphVisitor {
ApplyRangeFromComparison(left, block, false_successor, new_range);
}
} else if (cond == kCondGT || cond == kCondGE) {
+ if (left_monotonic_range != nullptr) {
+ // Update the info for monotonic value range.
+ if (left_monotonic_range->GetInductionVariable() == left &&
+ left_monotonic_range->GetIncrement() > 0 &&
+ block == left_monotonic_range->GetLoopHead() &&
+ instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
+ left_monotonic_range->SetEnd(right);
+ left_monotonic_range->SetInclusive(cond == kCondGT);
+ }
+ }
+
// array.length as a lower bound isn't considered useful.
if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
@@ -755,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor {
bounds_check->GetBlock()->RemoveInstruction(bounds_check);
}
+ static bool HasSameInputAtBackEdges(HPhi* phi) {
+ DCHECK(phi->IsLoopHeaderPhi());
+ // Start with input 1. Input 0 is from the incoming block.
+ HInstruction* input1 = phi->InputAt(1);
+ DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+ *phi->GetBlock()->GetPredecessors().Get(1)));
+ for (size_t i = 2, e = phi->InputCount(); i < e; ++i) {
+ DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge(
+ *phi->GetBlock()->GetPredecessors().Get(i)));
+ if (input1 != phi->InputAt(i)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
void VisitPhi(HPhi* phi) {
- if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
- DCHECK_EQ(phi->InputCount(), 2U);
+ if (phi->IsLoopHeaderPhi()
+ && (phi->GetType() == Primitive::kPrimInt)
+ && HasSameInputAtBackEdges(phi)) {
HInstruction* instruction = phi->InputAt(1);
HInstruction *left;
int32_t increment;
@@ -790,6 +1210,7 @@ class BCEVisitor : public HGraphVisitor {
}
range = new (GetGraph()->GetArena()) MonotonicValueRange(
GetGraph()->GetArena(),
+ phi,
initial_value,
increment,
bound);
@@ -809,6 +1230,36 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* left = cond->GetLeft();
HInstruction* right = cond->GetRight();
HandleIf(instruction, left, right, cmp);
+
+ HBasicBlock* block = instruction->GetBlock();
+ ValueRange* left_range = LookupValueRange(left, block);
+ if (left_range == nullptr) {
+ return;
+ }
+
+ if (left_range->IsMonotonicValueRange() &&
+ block == left_range->AsMonotonicValueRange()->GetLoopHead()) {
+ // The comparison is for an induction variable in the loop header.
+ DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
+ HBasicBlock* loop_body_successor;
+ if (LIKELY(block->GetLoopInformation()->
+ Contains(*instruction->IfFalseSuccessor()))) {
+ loop_body_successor = instruction->IfFalseSuccessor();
+ } else {
+ loop_body_successor = instruction->IfTrueSuccessor();
+ }
+ ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
+ if (new_left_range == left_range) {
+ // We are not successful in narrowing the monotonic value range to
+ // a regular value range. Try using deoptimization.
+ new_left_range = left_range->AsMonotonicValueRange()->
+ NarrowWithDeoptimization();
+ if (new_left_range != left_range) {
+ GetValueRangeMap(instruction->IfFalseSuccessor())->
+ Overwrite(left->GetId(), new_left_range);
+ }
+ }
+ }
}
}
}
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 97be778dbd..163458f75c 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -42,7 +42,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -147,7 +147,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -219,7 +219,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -291,7 +291,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
@@ -364,7 +364,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
int initial,
int increment,
IfCondition cond = kCondGE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -501,7 +501,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
int initial,
int increment = -1,
IfCondition cond = kCondLE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -632,7 +632,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
int initial,
int increment,
IfCondition cond) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -743,7 +743,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
HInstruction** bounds_check,
int initial,
IfCondition cond = kCondGE) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
@@ -868,7 +868,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
graph->SetHasBoundsChecks(true);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 96e08fd24c..58416ee93b 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -19,6 +19,7 @@
#include "art_field-inl.h"
#include "base/logging.h"
#include "class_linker.h"
+#include "dex/verified_method.h"
#include "dex_file-inl.h"
#include "dex_instruction-inl.h"
#include "dex/verified_method.h"
@@ -281,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) {
// To avoid splitting blocks, we compute ahead of time the instructions that
// start a new block, and create these blocks.
- ComputeBranchTargets(code_ptr, code_end, &number_of_branches);
+ if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) {
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode);
+ return false;
+ }
// Note that the compiler driver is null when unit testing.
if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) {
@@ -348,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) {
current_block_ = block;
}
-void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
+bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
const uint16_t* code_end,
size_t* number_of_branches) {
branch_targets_.SetSize(code_end - code_ptr);
@@ -373,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
}
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
- if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+
+ if (code_ptr >= code_end) {
+ if (instruction.CanFlowThrough()) {
+ // In the normal case we should never hit this but someone can artificially forge a dex
+ // file to fall-through out the method code. In this case we bail out compilation.
+ return false;
+ }
+ } else if (FindBlockStartingAt(dex_pc) == nullptr) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
}
@@ -405,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
// Fall-through. Add a block if there is more code afterwards.
dex_pc += instruction.SizeInCodeUnits();
code_ptr += instruction.SizeInCodeUnits();
- if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) {
+ if (code_ptr >= code_end) {
+ // In the normal case we should never hit this but someone can artificially forge a dex
+ // file to fall-through out the method code. In this case we bail out compilation.
+ // (A switch can fall-through so we don't need to check CanFlowThrough().)
+ return false;
+ } else if (FindBlockStartingAt(dex_pc) == nullptr) {
block = new (arena_) HBasicBlock(graph_, dex_pc);
branch_targets_.Put(dex_pc, block);
}
@@ -414,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr,
dex_pc += instruction.SizeInCodeUnits();
}
}
+ return true;
}
HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const {
@@ -612,6 +629,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
// Potential class initialization check, in the case of a static method call.
HClinitCheck* clinit_check = nullptr;
+ // Replace calls to String.<init> with StringFactory.
+ int32_t string_init_offset = 0;
+ bool is_string_init = compiler_driver_->IsStringInit(method_idx, dex_file_, &string_init_offset);
+ if (is_string_init) {
+ return_type = Primitive::kPrimNot;
+ is_instance_call = false;
+ number_of_arguments--;
+ invoke_type = kStatic;
+ optimized_invoke_type = kStatic;
+ }
HInvoke* invoke = nullptr;
@@ -638,9 +665,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
*dex_compilation_unit_->GetDexFile())));
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
- mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
- soa, dex_cache, class_loader, dex_compilation_unit_, method_idx,
- optimized_invoke_type);
+ ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
+ soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, optimized_invoke_type);
if (resolved_method == nullptr) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod);
@@ -691,14 +717,14 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
current_block_->AddInstruction(load_class);
clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
current_block_->AddInstruction(clinit_check);
- ++number_of_arguments;
}
}
}
invoke = new (arena_) HInvokeStaticOrDirect(
arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
- is_recursive, invoke_type, optimized_invoke_type, clinit_check_requirement);
+ is_recursive, string_init_offset, invoke_type, optimized_invoke_type,
+ clinit_check_requirement);
}
size_t start_index = 0;
@@ -714,6 +740,9 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
uint32_t descriptor_index = 1;
uint32_t argument_index = start_index;
+ if (is_string_init) {
+ start_index = 1;
+ }
for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) {
Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]);
bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble);
@@ -730,16 +759,38 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
i++;
}
}
+ DCHECK_EQ(argument_index, number_of_arguments);
if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) {
// Add the class initialization check as last input of `invoke`.
DCHECK(clinit_check != nullptr);
- invoke->SetArgumentAt(argument_index++, clinit_check);
+ invoke->SetArgumentAt(argument_index, clinit_check);
}
- DCHECK_EQ(argument_index, number_of_arguments);
current_block_->AddInstruction(invoke);
latest_result_ = invoke;
+
+ // Add move-result for StringFactory method.
+ if (is_string_init) {
+ uint32_t orig_this_reg = is_range ? register_index : args[0];
+ const VerifiedMethod* verified_method =
+ compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex());
+ if (verified_method == nullptr) {
+ LOG(WARNING) << "No verified method for method calling String.<init>: "
+ << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_);
+ return false;
+ }
+ const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map =
+ verified_method->GetStringInitPcRegMap();
+ auto map_it = string_init_map.find(dex_pc);
+ if (map_it != string_init_map.end()) {
+ std::set<uint32_t> reg_set = map_it->second;
+ for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) {
+ UpdateLocal(*set_it, invoke);
+ }
+ }
+ UpdateLocal(orig_this_reg, invoke);
+ }
return true;
}
@@ -1916,12 +1967,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
case Instruction::NEW_INSTANCE: {
uint16_t type_index = instruction.VRegB_21c();
- QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
- ? kQuickAllocObjectWithAccessCheck
- : kQuickAllocObject;
-
- current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
- UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+ if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) {
+ // Turn new-instance of string into a const 0.
+ int32_t register_index = instruction.VRegA();
+ HNullConstant* constant = graph_->GetNullConstant();
+ UpdateLocal(register_index, constant);
+ } else {
+ QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+ ? kQuickAllocObjectWithAccessCheck
+ : kQuickAllocObject;
+
+ current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint));
+ UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction());
+ }
break;
}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index dc6d97eb0c..36503ce43a 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject {
// the newly created blocks.
// As a side effect, also compute the number of dex instructions, blocks, and
// branches.
- void ComputeBranchTargets(const uint16_t* start,
+ // Returns true if all the branches fall inside the method code, false otherwise.
+ // (In normal cases this should always return true but someone can artificially
+ // create a code unit in which branches fall-through out of it).
+ bool ComputeBranchTargets(const uint16_t* start,
const uint16_t* end,
size_t* number_of_branches);
void MaybeUpdateCurrentBlock(size_t index);
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5163395cac..a5d5305836 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
if (environment->GetInstructionAt(i) != nullptr) {
Primitive::Type type = environment->GetInstructionAt(i)->GetType();
- DCHECK(CheckType(type, locations->GetEnvironmentAt(i)))
- << type << " " << locations->GetEnvironmentAt(i);
+ DCHECK(CheckType(type, environment->GetLocationAt(i)))
+ << type << " " << environment->GetLocationAt(i);
} else {
- DCHECK(locations->GetEnvironmentAt(i).IsInvalid())
- << locations->GetEnvironmentAt(i);
+ DCHECK(environment->GetLocationAt(i).IsInvalid())
+ << environment->GetLocationAt(i);
}
}
return true;
@@ -114,18 +114,24 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
}
+size_t CodeGenerator::GetCachePointerOffset(uint32_t index) {
+ auto pointer_size = InstructionSetPointerSize(GetInstructionSet());
+ return mirror::Array::DataOffset(pointer_size).Uint32Value() + pointer_size * index;
+}
+
void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
Initialize();
if (!is_leaf) {
MarkNotLeaf();
}
+ const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet());
InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs()
+ GetGraph()->GetTemporariesVRegSlots()
+ 1 /* filler */,
0, /* the baseline compiler does not have live registers at slow path */
0, /* the baseline compiler does not have live registers at slow path */
GetGraph()->GetMaximumNumberOfOutVRegs()
- + 1 /* current method */,
+ + (is_64_bit ? 2 : 1) /* current method */,
GetGraph()->GetBlocks());
CompileInternal(allocator, /* is_baseline */ true);
}
@@ -153,6 +159,7 @@ HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
}
void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
+ is_baseline_ = is_baseline;
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
@@ -269,7 +276,8 @@ int32_t CodeGenerator::GetStackSlot(HLocal* local) const {
uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs();
if (reg_number >= number_of_locals) {
// Local is a parameter of the method. It is stored in the caller's frame.
- return GetFrameSize() + kVRegSize // ART method
+ // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode.
+ return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet()) // ART method
+ (reg_number - number_of_locals) * kVRegSize;
} else {
// Local is a temporary in this method. It is stored in this method's frame.
@@ -679,6 +687,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
locations->GetStackMask(),
environment_size,
inlining_depth);
+ if (environment != nullptr) {
+ // TODO: Handle parent environment.
+ DCHECK(environment->GetParent() == nullptr);
+ DCHECK_EQ(environment->GetDexPc(), dex_pc);
+ }
// Walk over the environment, and record the location of dex registers.
for (size_t i = 0; i < environment_size; ++i) {
@@ -688,7 +701,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
continue;
}
- Location location = locations->GetEnvironmentAt(i);
+ Location location = environment->GetLocationAt(i);
switch (location.GetKind()) {
case Location::kConstant: {
DCHECK_EQ(current, location.GetConstant());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e536b2d0ee..c6317f18d3 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -105,6 +105,25 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> {
DISALLOW_COPY_AND_ASSIGN(SlowPathCode);
};
+class InvokeDexCallingConventionVisitor {
+ public:
+ virtual Location GetNextLocation(Primitive::Type type) = 0;
+
+ protected:
+ InvokeDexCallingConventionVisitor() {}
+ virtual ~InvokeDexCallingConventionVisitor() {}
+
+ // The current index for core registers.
+ uint32_t gp_index_ = 0u;
+ // The current index for floating-point registers.
+ uint32_t float_index_ = 0u;
+ // The current stack index.
+ uint32_t stack_index_ = 0u;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+};
+
class CodeGenerator {
public:
// Compiles the graph to executable instructions. Returns whether the compilation
@@ -126,7 +145,7 @@ class CodeGenerator {
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
// Note that this follows the current calling convention.
return GetFrameSize()
- + kVRegSize // Art method
+ + InstructionSetPointerSize(GetInstructionSet()) // Art method
+ parameter->GetIndex() * kVRegSize;
}
@@ -212,6 +231,10 @@ class CodeGenerator {
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
void BuildStackMaps(std::vector<uint8_t>* vector);
+ bool IsBaseline() const {
+ return is_baseline_;
+ }
+
bool IsLeafMethod() const {
return is_leaf_;
}
@@ -243,6 +266,8 @@ class CodeGenerator {
// Note: this method assumes we always have the same pointer size, regardless
// of the architecture.
static size_t GetCacheOffset(uint32_t index);
+ // Pointer variant for ArtMethod and ArtField arrays.
+ size_t GetCachePointerOffset(uint32_t index);
void EmitParallelMoves(Location from1,
Location to1,
@@ -304,6 +329,7 @@ class CodeGenerator {
return GetFpuSpillSize() + GetCoreSpillSize();
}
+ virtual ParallelMoveResolver* GetMoveResolver() = 0;
protected:
CodeGenerator(HGraph* graph,
@@ -325,6 +351,7 @@ class CodeGenerator {
number_of_register_pairs_(number_of_register_pairs),
core_callee_save_mask_(core_callee_save_mask),
fpu_callee_save_mask_(fpu_callee_save_mask),
+ is_baseline_(false),
graph_(graph),
compiler_options_(compiler_options),
pc_infos_(graph->GetArena(), 32),
@@ -346,7 +373,6 @@ class CodeGenerator {
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
- virtual ParallelMoveResolver* GetMoveResolver() = 0;
virtual HGraphVisitor* GetLocationBuilder() = 0;
virtual HGraphVisitor* GetInstructionVisitor() = 0;
@@ -404,6 +430,9 @@ class CodeGenerator {
const uint32_t core_callee_save_mask_;
const uint32_t fpu_callee_save_mask_;
+ // Whether we are using baseline.
+ bool is_baseline_;
+
private:
void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
@@ -442,11 +471,13 @@ class CallingConvention {
CallingConvention(const C* registers,
size_t number_of_registers,
const F* fpu_registers,
- size_t number_of_fpu_registers)
+ size_t number_of_fpu_registers,
+ size_t pointer_size)
: registers_(registers),
number_of_registers_(number_of_registers),
fpu_registers_(fpu_registers),
- number_of_fpu_registers_(number_of_fpu_registers) {}
+ number_of_fpu_registers_(number_of_fpu_registers),
+ pointer_size_(pointer_size) {}
size_t GetNumberOfRegisters() const { return number_of_registers_; }
size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; }
@@ -463,8 +494,8 @@ class CallingConvention {
size_t GetStackOffsetOf(size_t index) const {
// We still reserve the space for parameters passed by registers.
- // Add one for the method pointer.
- return (index + 1) * kVRegSize;
+ // Add space for the method pointer.
+ return pointer_size_ + index * kVRegSize;
}
private:
@@ -472,6 +503,7 @@ class CallingConvention {
const size_t number_of_registers_;
const F* fpu_registers_;
const size_t number_of_fpu_registers_;
+ const size_t pointer_size_;
DISALLOW_COPY_AND_ASSIGN(CallingConvention);
};
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 01748a9f5c..2b1131d65f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -17,13 +17,13 @@
#include "code_generator_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "art_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_arm.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
+#include "mirror/class-inl.h"
#include "thread.h"
#include "utils/arm/assembler_arm.h"
#include "utils/arm/managed_register_arm.h"
@@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
// If not null, the block to branch to after the suspend check.
@@ -489,11 +493,6 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
-static uint32_t LeastSignificantBit(uint32_t mask) {
- // ffs starts at 1.
- return ffs(mask) - 1;
-}
-
void CodeGeneratorARM::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
// Save one extra register for baseline. Note that on thumb2, there is no easy
@@ -605,7 +604,7 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const {
UNREACHABLE();
}
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -680,7 +679,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
return Location();
}
-Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -1241,13 +1240,9 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation, but this step is not
- // run in baseline. So we remove them manually here if we find them.
- // TODO: Instead of this local workaround, address this properly.
- if (invoke->IsStaticWithExplicitClinitCheck()) {
- invoke->RemoveClinitCheckOrLoadClassAsLastInput();
- }
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
codegen_->GetInstructionSetFeatures());
@@ -1273,9 +1268,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen)
}
void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation.
- DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -1292,8 +1287,8 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
locations->AddTemp(Location::RegisterLocation(R0));
- InvokeDexCallingConventionVisitor calling_convention_visitor;
- for (size_t i = 0; i < invoke->InputCount(); i++) {
+ InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+ for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
HInstruction* input = invoke->InputAt(i);
locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
}
@@ -1317,8 +1312,8 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
- invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -1331,7 +1326,7 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
- uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmWordSize).Int32Value();
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// LR = temp->GetEntryPoint();
@@ -1351,8 +1346,8 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
- (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+ uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+ invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -1370,7 +1365,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke)
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
- uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmWordSize).Int32Value();
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// LR = temp->GetEntryPoint();
@@ -3543,8 +3538,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction)
void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
__ LoadFromOffset(
kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());
@@ -3791,12 +3796,12 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
codegen_->LoadCurrentMethod(out);
- __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value());
+ __ LoadFromOffset(kLoadWord, out, out, ArtMethod::DeclaringClassOffset().Int32Value());
} else {
DCHECK(cls->CanCallRuntime());
codegen_->LoadCurrentMethod(out);
__ LoadFromOffset(
- kLoadWord, out, out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value());
+ kLoadWord, out, out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value());
__ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
@@ -3853,7 +3858,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
Register out = load->GetLocations()->Out().AsRegister<Register>();
codegen_->LoadCurrentMethod(out);
- __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value());
+ __ LoadFromOffset(kLoadWord, out, out, ArtMethod::DeclaringClassOffset().Int32Value());
__ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
__ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
__ cmp(out, ShifterOperand(0));
@@ -4071,23 +4076,33 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
//
// Currently we implement the app -> app logic, which looks up in the resolve cache.
- // temp = method;
- LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ LoadFromOffset(
- kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
- // temp = temp[index_in_cache]
- __ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+ if (invoke->IsStringInit()) {
+ // temp = thread->string_init_entrypoint
+ __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset());
// LR = temp[offset_of_quick_compiled_code]
__ LoadFromOffset(kLoadWord, LR, temp,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmWordSize).Int32Value());
// LR()
__ blx(LR);
} else {
- __ bl(GetFrameEntryLabel());
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+ // temp = temp[index_in_cache]
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+ // LR = temp[offset_of_quick_compiled_code]
+ __ LoadFromOffset(kLoadWord, LR, temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmWordSize).Int32Value());
+ // LR()
+ __ blx(LR);
+ } else {
+ __ bl(GetFrameEntryLabel());
+ }
}
DCHECK(!IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 600903621d..c410fa80ba 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -54,7 +54,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
kRuntimeParameterFpuRegisters,
- kRuntimeParameterFpuRegistersLength) {}
+ kRuntimeParameterFpuRegistersLength,
+ kArmPointerSize) {}
private:
DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -72,28 +73,26 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister>
: CallingConvention(kParameterCoreRegisters,
kParameterCoreRegistersLength,
kParameterFpuRegisters,
- kParameterFpuRegistersLength) {}
+ kParameterFpuRegistersLength,
+ kArmPointerSize) {}
private:
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor {
public:
- InvokeDexCallingConventionVisitor()
- : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {}
+ InvokeDexCallingConventionVisitorARM() {}
+ virtual ~InvokeDexCallingConventionVisitorARM() {}
- Location GetNextLocation(Primitive::Type type);
+ Location GetNextLocation(Primitive::Type type) OVERRIDE;
Location GetReturnLocation(Primitive::Type type);
private:
InvokeDexCallingConvention calling_convention;
- uint32_t gp_index_;
- uint32_t float_index_;
- uint32_t double_index_;
- uint32_t stack_index_;
+ uint32_t double_index_ = 0;
- DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM);
};
class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap {
@@ -151,7 +150,7 @@ class LocationsBuilderARM : public HGraphVisitor {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
CodeGeneratorARM* const codegen_;
- InvokeDexCallingConventionVisitor parameter_visitor_;
+ InvokeDexCallingConventionVisitorARM parameter_visitor_;
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
};
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index dada4ce5bd..55ef66fa99 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -17,6 +17,7 @@
#include "code_generator_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "art_method.h"
#include "common_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -24,8 +25,7 @@
#include "intrinsics.h"
#include "intrinsics_arm64.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
+#include "mirror/class-inl.h"
#include "offsets.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
@@ -65,7 +65,6 @@ using helpers::WRegisterFrom;
using helpers::XRegisterFrom;
using helpers::ARM64EncodableConstantOrRegister;
-static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>);
static constexpr int kCurrentMethodStackOffset = 0;
inline Condition ARM64Condition(IfCondition cond) {
@@ -285,6 +284,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
// If not null, the block to branch to after the suspend check.
@@ -372,15 +375,15 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
#undef __
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
Location next_location;
if (type == Primitive::kPrimVoid) {
LOG(FATAL) << "Unreachable type " << type;
}
if (Primitive::IsFloatingPointType(type) &&
- (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) {
- next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++));
+ (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
+ next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
} else if (!Primitive::IsFloatingPointType(type) &&
(gp_index_ < calling_convention.GetNumberOfRegisters())) {
next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
@@ -964,7 +967,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
DCHECK(RequiresCurrentMethod());
- DCHECK(current_method.IsW());
+ CHECK(current_method.IsX());
__ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
}
@@ -1034,8 +1037,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathARM64* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
Register temp = temps.AcquireW();
@@ -1907,8 +1921,8 @@ void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
locations->AddTemp(LocationFrom(x0));
- InvokeDexCallingConventionVisitor calling_convention_visitor;
- for (size_t i = 0; i < invoke->InputCount(); i++) {
+ InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+ for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
HInstruction* input = invoke->InputAt(i);
locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
}
@@ -1925,12 +1939,12 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
- Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
- uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
- (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+ Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+ invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
Location receiver = invoke->GetLocations()->InAt(0);
Offset class_offset = mirror::Object::ClassOffset();
- Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
// The register ip1 is required to be used for the hidden argument in
// art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
@@ -1942,16 +1956,16 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
- __ Ldr(temp, StackOperandFrom(receiver));
- __ Ldr(temp, HeapOperand(temp, class_offset));
+ __ Ldr(temp.W(), StackOperandFrom(receiver));
+ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
} else {
- __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
+ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
- __ Ldr(temp, HeapOperand(temp, method_offset));
+ __ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
- __ Ldr(lr, HeapOperand(temp, entry_point));
+ __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
// lr();
__ Blr(lr);
DCHECK(!codegen_->IsLeafMethod());
@@ -1968,13 +1982,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation, but this step is not
- // run in baseline. So we remove them manually here if we find them.
- // TODO: Instead of this local workaround, address this properly.
- if (invoke->IsStaticWithExplicitClinitCheck()) {
- invoke->RemoveClinitCheckOrLoadClassAsLastInput();
- }
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
if (intrinsic.TryDispatch(invoke)) {
@@ -1996,8 +2006,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) {
// Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
DCHECK(temp.Is(kArtMethodRegister));
- size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() +
- invoke->GetDexMethodIndex() * kHeapRefSize;
+ size_t index_in_cache = GetCachePointerOffset(invoke->GetDexMethodIndex());
// TODO: Implement all kinds of calls:
// 1) boot -> boot
@@ -2006,36 +2015,47 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
//
// Currently we implement the app -> app logic, which looks up in the resolve cache.
- // temp = method;
- LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
- // temp = temp[index_in_cache];
- __ Ldr(temp, HeapOperand(temp, index_in_cache));
- // lr = temp->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArm64WordSize)));
- // lr();
+ if (invoke->IsStringInit()) {
+ // temp = thread->string_init_entrypoint
+ __ Ldr(temp.X(), MemOperand(tr, invoke->GetStringInitOffset()));
+ // LR = temp->entry_point_from_quick_compiled_code_;
+ __ Ldr(lr, MemOperand(
+ temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
+ // lr()
__ Blr(lr);
} else {
- __ Bl(&frame_entry_label_);
+ // temp = method;
+ LoadCurrentMethod(temp.X());
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ Ldr(temp.W(), MemOperand(temp.X(),
+ ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+ // temp = temp[index_in_cache];
+ __ Ldr(temp.X(), MemOperand(temp, index_in_cache));
+ // lr = temp->entry_point_from_quick_compiled_code_;
+ __ Ldr(lr, MemOperand(temp.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64WordSize).Int32Value()));
+ // lr();
+ __ Blr(lr);
+ } else {
+ __ Bl(&frame_entry_label_);
+ }
}
DCHECK(!IsLeafMethod());
}
void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation.
- DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
}
BlockPoolsScope block_pools(GetVIXLAssembler());
- Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
codegen_->GenerateStaticOrDirectCall(invoke, temp);
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -2047,27 +2067,27 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
- Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0));
- size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
- invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+ Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
Offset class_offset = mirror::Object::ClassOffset();
- Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
+ Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
BlockPoolsScope block_pools(GetVIXLAssembler());
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
- __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex()));
- __ Ldr(temp, HeapOperand(temp, class_offset));
+ __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex()));
+ __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
} else {
DCHECK(receiver.IsRegister());
- __ Ldr(temp, HeapOperandFrom(receiver, class_offset));
+ __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
- __ Ldr(temp, HeapOperand(temp, method_offset));
+ __ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
- __ Ldr(lr, HeapOperand(temp, entry_point.SizeValue()));
+ __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
// lr();
__ Blr(lr);
DCHECK(!codegen_->IsLeafMethod());
@@ -2086,12 +2106,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- codegen_->LoadCurrentMethod(out);
- __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset()));
+ codegen_->LoadCurrentMethod(out.X());
+ __ Ldr(out, MemOperand(out.X(), ArtMethod::DeclaringClassOffset().Int32Value()));
} else {
DCHECK(cls->CanCallRuntime());
- codegen_->LoadCurrentMethod(out);
- __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset()));
+ codegen_->LoadCurrentMethod(out.X());
+ __ Ldr(out, MemOperand(out.X(), ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
__ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
@@ -2138,8 +2158,8 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
codegen_->AddSlowPath(slow_path);
Register out = OutputRegister(load);
- codegen_->LoadCurrentMethod(out);
- __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset()));
+ codegen_->LoadCurrentMethod(out.X());
+ __ Ldr(out, MemOperand(out.X(), ArtMethod::DeclaringClassOffset().Int32Value()));
__ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
__ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
__ Cbz(out, slow_path->GetEntryLabel());
@@ -2267,7 +2287,7 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
locations->SetOut(LocationFrom(x0));
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
- void*, uint32_t, int32_t, mirror::ArtMethod*>();
+ void*, uint32_t, int32_t, ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -2275,17 +2295,16 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
DCHECK(type_index.Is(w0));
- Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
- DCHECK(current_method.Is(w2));
- codegen_->LoadCurrentMethod(current_method);
+ Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimLong);
+ DCHECK(current_method.Is(x2));
+ codegen_->LoadCurrentMethod(current_method.X());
__ Mov(type_index, instruction->GetTypeIndex());
codegen_->InvokeRuntime(
GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
instruction->GetDexPc(),
nullptr);
- CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
- void*, uint32_t, int32_t, mirror::ArtMethod*>();
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
}
void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -2295,7 +2314,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
- CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>();
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -2304,14 +2323,14 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
DCHECK(type_index.Is(w0));
Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot);
DCHECK(current_method.Is(w1));
- codegen_->LoadCurrentMethod(current_method);
+ codegen_->LoadCurrentMethod(current_method.X());
__ Mov(type_index, instruction->GetTypeIndex());
codegen_->InvokeRuntime(
GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
instruction->GetDexPc(),
nullptr);
- CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>();
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
void LocationsBuilderARM64::VisitNot(HNot* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5a358671cc..3486cdebec 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -45,7 +45,7 @@ static const vixl::FPRegister kParameterFPRegisters[] = {
static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
const vixl::Register tr = vixl::x18; // Thread Register
-static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke.
+static const vixl::Register kArtMethodRegister = vixl::x0; // Method register on invoke.
const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
@@ -94,7 +94,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register,
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
kRuntimeParameterFpuRegisters,
- kRuntimeParameterFpuRegistersLength) {}
+ kRuntimeParameterFpuRegistersLength,
+ kArm64PointerSize) {}
Location GetReturnLocation(Primitive::Type return_type);
@@ -108,7 +109,8 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl
: CallingConvention(kParameterCoreRegisters,
kParameterCoreRegistersLength,
kParameterFPRegisters,
- kParameterFPRegistersLength) {}
+ kParameterFPRegistersLength,
+ kArm64PointerSize) {}
Location GetReturnLocation(Primitive::Type return_type) {
return ARM64ReturnLocation(return_type);
@@ -119,25 +121,20 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor {
public:
- InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+ InvokeDexCallingConventionVisitorARM64() {}
+ virtual ~InvokeDexCallingConventionVisitorARM64() {}
- Location GetNextLocation(Primitive::Type type);
+ Location GetNextLocation(Primitive::Type type) OVERRIDE;
Location GetReturnLocation(Primitive::Type return_type) {
return calling_convention.GetReturnLocation(return_type);
}
private:
InvokeDexCallingConvention calling_convention;
- // The current index for core registers.
- uint32_t gp_index_;
- // The current index for floating-point registers.
- uint32_t fp_index_;
- // The current stack index.
- uint32_t stack_index_;
-
- DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64);
};
class InstructionCodeGeneratorARM64 : public HGraphVisitor {
@@ -193,7 +190,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
void HandleShift(HBinaryOperation* instr);
CodeGeneratorARM64* const codegen_;
- InvokeDexCallingConventionVisitor parameter_visitor_;
+ InvokeDexCallingConventionVisitorARM64 parameter_visitor_;
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
};
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 04999bedb0..60fd29bf74 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
+#include "art_method.h"
#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
@@ -23,8 +24,7 @@
#include "intrinsics.h"
#include "intrinsics_x86.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
+#include "mirror/class-inl.h"
#include "thread.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
HBasicBlock* const successor_;
@@ -551,7 +555,7 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const {
UNREACHABLE();
}
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -582,7 +586,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
case Primitive::kPrimFloat: {
- uint32_t index = fp_index_++;
+ uint32_t index = float_index_++;
stack_index_++;
if (index < calling_convention.GetNumberOfFpuRegisters()) {
return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -592,7 +596,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
case Primitive::kPrimDouble: {
- uint32_t index = fp_index_++;
+ uint32_t index = float_index_++;
stack_index_ += 2;
if (index < calling_convention.GetNumberOfFpuRegisters()) {
return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) {
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -1194,13 +1197,9 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation, but this step is not
- // run in baseline. So we remove them manually here if we find them.
- // TODO: Instead of this local workaround, address this properly.
- if (invoke->IsStaticWithExplicitClinitCheck()) {
- invoke->RemoveClinitCheckOrLoadClassAsLastInput();
- }
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -1220,9 +1219,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen)
}
void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation.
- DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -1242,8 +1241,8 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
locations->AddTemp(Location::RegisterLocation(EAX));
- InvokeDexCallingConventionVisitor calling_convention_visitor;
- for (size_t i = 0; i < invoke->InputCount(); i++) {
+ InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+ for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
HInstruction* input = invoke->InputAt(i);
locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
}
@@ -1276,8 +1275,8 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
- invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+ uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -1293,7 +1292,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
__ call(Address(
- temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -1308,8 +1307,8 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
- (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+ uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+ invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -1329,7 +1328,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke)
// temp = temp->GetImtEntryAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86WordSize).Int32Value()));
DCHECK(!codegen_->IsLeafMethod());
@@ -2744,17 +2743,12 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
switch (op->GetResultType()) {
- case Primitive::kPrimInt: {
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
- locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
- locations->SetOut(Location::SameAsFirstInput());
- break;
- }
+ case Primitive::kPrimInt:
case Primitive::kPrimLong: {
+ // Can't have Location::Any() and output SameAsFirstInput()
locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
- locations->SetInAt(1, Location::RegisterLocation(ECX));
+ // The shift count needs to be in CL or a constant.
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2773,6 +2767,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
+ DCHECK(first.IsRegister());
Register first_reg = first.AsRegister<Register>();
if (second.IsRegister()) {
Register second_reg = second.AsRegister<Register>();
@@ -2785,7 +2780,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
__ shrl(first_reg, second_reg);
}
} else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
if (op->IsShl()) {
__ shll(first_reg, imm);
} else if (op->IsShr()) {
@@ -2797,14 +2796,29 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
break;
}
case Primitive::kPrimLong: {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- GenerateShlLong(first, second_reg);
- } else if (op->IsShr()) {
- GenerateShrLong(first, second_reg);
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ GenerateShlLong(first, second_reg);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, second_reg);
+ } else {
+ GenerateUShrLong(first, second_reg);
+ }
} else {
- GenerateUShrLong(first, second_reg);
+ // Shift by a constant.
+ int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+ // Nothing to do if the shift is 0, as the input is already the output.
+ if (shift != 0) {
+ if (op->IsShl()) {
+ GenerateShlLong(first, shift);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, shift);
+ } else {
+ GenerateUShrLong(first, shift);
+ }
+ }
}
break;
}
@@ -2813,6 +2827,34 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
}
}
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 1) {
+ // This is just an addition.
+ __ addl(low, low);
+ __ adcl(high, high);
+ } else if (shift == 32) {
+ // Shift by 32 is easy. High gets low, and low gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToLow(),
+ loc.ToHigh(),
+ Primitive::kPrimInt,
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+ loc.ToLow(),
+ Primitive::kPrimInt);
+ } else if (shift > 32) {
+ // Low part becomes 0. High part is low part << (shift-32).
+ __ movl(high, low);
+ __ shll(high, Immediate(shift - 32));
+ __ xorl(low, low);
+ } else {
+ // Between 1 and 31.
+ __ shld(high, low, Immediate(shift));
+ __ shll(low, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
Label done;
__ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2824,6 +2866,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Need to copy the sign.
+ DCHECK_NE(low, high);
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ } else if (shift > 32) {
+ DCHECK_NE(low, high);
+ // High part becomes sign. Low part is shifted by shift - 32.
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ __ sarl(low, Immediate(shift - 32));
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ sarl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2835,6 +2898,30 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. Low gets high, and high gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToHigh(),
+ loc.ToLow(),
+ Primitive::kPrimInt,
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)),
+ loc.ToHigh(),
+ Primitive::kPrimInt);
+ } else if (shift > 32) {
+ // Low part is high >> (shift - 32). High part becomes 0.
+ __ movl(low, high);
+ __ shrl(low, Immediate(shift - 32));
+ __ xorl(high, high);
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ shrl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3114,18 +3201,28 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
// 3) app -> app
//
// Currently we implement the app -> app logic, which looks up in the resolve cache.
- // temp = method;
- LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+
+ if (invoke->IsStringInit()) {
+ // temp = thread->string_init_entrypoint
+ __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
// (temp + offset_of_quick_compiled_code)()
__ call(Address(
- temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
} else {
- __ call(GetFrameEntryLabel());
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp,
+ CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(temp,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ } else {
+ __ call(GetFrameEntryLabel());
+ }
}
DCHECK(!IsLeafMethod());
@@ -3904,8 +4001,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction)
void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathX86* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
__ fs()->cmpw(Address::Absolute(
Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0));
if (successor == nullptr) {
@@ -4171,11 +4279,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
codegen_->LoadCurrentMethod(out);
- __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value()));
} else {
DCHECK(cls->CanCallRuntime());
codegen_->LoadCurrentMethod(out);
- __ movl(out, Address(out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
__ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
@@ -4230,7 +4338,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
Register out = load->GetLocations()->Out().AsRegister<Register>();
codegen_->LoadCurrentMethod(out);
- __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value()));
__ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
__ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
__ testl(out, out);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 8bd3cd3585..43214fe7d5 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -52,7 +52,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmReg
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
kRuntimeParameterFpuRegisters,
- kRuntimeParameterFpuRegistersLength) {}
+ kRuntimeParameterFpuRegistersLength,
+ kX86PointerSize) {}
private:
DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -64,7 +65,8 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste
kParameterCoreRegisters,
kParameterCoreRegistersLength,
kParameterFpuRegisters,
- kParameterFpuRegistersLength) {}
+ kParameterFpuRegistersLength,
+ kX86PointerSize) {}
RegisterPair GetRegisterPairAt(size_t argument_index) {
DCHECK_LT(argument_index + 1, GetNumberOfRegisters());
@@ -75,22 +77,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor {
public:
- InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+ InvokeDexCallingConventionVisitorX86() {}
+ virtual ~InvokeDexCallingConventionVisitorX86() {}
- Location GetNextLocation(Primitive::Type type);
+ Location GetNextLocation(Primitive::Type type) OVERRIDE;
private:
InvokeDexCallingConvention calling_convention;
- // The current index for cpu registers.
- uint32_t gp_index_;
- // The current index for fpu registers.
- uint32_t fp_index_;
- // The current stack index.
- uint32_t stack_index_;
-
- DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86);
};
class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
@@ -137,7 +134,7 @@ class LocationsBuilderX86 : public HGraphVisitor {
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
CodeGeneratorX86* const codegen_;
- InvokeDexCallingConventionVisitor parameter_visitor_;
+ InvokeDexCallingConventionVisitorX86 parameter_visitor_;
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
};
@@ -171,6 +168,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
void GenerateUShrLong(const Location& loc, Register shifter);
+ void GenerateShlLong(const Location& loc, int shift);
+ void GenerateShrLong(const Location& loc, int shift);
+ void GenerateUShrLong(const Location& loc, int shift);
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5ce932928b..b0174b9b16 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,14 +16,14 @@
#include "code_generator_x86_64.h"
+#include "art_method.h"
#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_x86_64.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
-#include "mirror/class.h"
+#include "mirror/class-inl.h"
#include "mirror/object_reference.h"
#include "thread.h"
#include "utils/assembler.h"
@@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 {
if (is_div_) {
__ negq(cpu_reg_);
} else {
- __ movq(cpu_reg_, Immediate(0));
+ __ xorl(cpu_reg_, cpu_reg_);
}
}
__ jmp(GetExitLabel());
@@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 {
return &return_label_;
}
+ HBasicBlock* GetSuccessor() const {
+ return successor_;
+ }
+
private:
HSuspendCheck* const instruction_;
HBasicBlock* const successor_;
@@ -366,18 +370,27 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
//
// Currently we implement the app -> app logic, which looks up in the resolve cache.
- // temp = method;
- LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ if (invoke->IsStringInit()) {
+ // temp = thread->string_init_entrypoint
+ __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset()));
// (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86_64WordSize).SizeValue()));
} else {
- __ call(&frame_entry_label_);
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+ // temp = temp[index_in_cache]
+ __ movq(temp, Address(
+ temp, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86_64WordSize).SizeValue()));
+ } else {
+ __ call(&frame_entry_label_);
+ }
}
DCHECK(!IsLeafMethod());
@@ -533,7 +546,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
}
}
- __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
+ __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
}
void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -573,7 +586,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
DCHECK(RequiresCurrentMethod());
- __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+ __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
}
Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const {
@@ -663,7 +676,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
DCHECK(constant->IsLongConstant());
value = constant->AsLongConstant()->GetValue();
}
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(source.IsDoubleStackSlot());
@@ -696,9 +709,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
} else if (const_to_move->IsLongConstant()) {
int64_t value = const_to_move->AsLongConstant()->GetValue();
if (location.IsRegister()) {
- __ movq(location.AsRegister<CpuRegister>(), Immediate(value));
+ Load64BitValue(location.AsRegister<CpuRegister>(), value);
} else if (location.IsDoubleStackSlot()) {
- __ movq(CpuRegister(TMP), Immediate(value));
+ Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP));
} else {
DCHECK(location.IsConstant());
@@ -763,7 +776,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck());
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
@@ -948,7 +960,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
LocationSummary* locations = comp->GetLocations();
CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
// Clear register: setcc only sets the low byte.
- __ xorq(reg, reg);
+ __ xorl(reg, reg);
Location lhs = locations->InAt(0);
Location rhs = locations->InAt(1);
if (rhs.IsRegister()) {
@@ -1232,7 +1244,7 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
codegen_->GenerateFrameExit();
}
-Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
+Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -1262,7 +1274,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
case Primitive::kPrimFloat: {
- uint32_t index = fp_index_++;
+ uint32_t index = float_index_++;
stack_index_++;
if (index < calling_convention.GetNumberOfFpuRegisters()) {
return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1272,7 +1284,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
case Primitive::kPrimDouble: {
- uint32_t index = fp_index_++;
+ uint32_t index = float_index_++;
stack_index_ += 2;
if (index < calling_convention.GetNumberOfFpuRegisters()) {
return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
@@ -1289,13 +1301,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type
}
void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation, but this step is not
- // run in baseline. So we remove them manually here if we find them.
- // TODO: Instead of this local workaround, address this properly.
- if (invoke->IsStaticWithExplicitClinitCheck()) {
- invoke->RemoveClinitCheckOrLoadClassAsLastInput();
- }
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -1315,9 +1323,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg
}
void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- // Explicit clinit checks triggered by static invokes must have been
- // pruned by art::PrepareForRegisterAllocation.
- DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
return;
@@ -1334,8 +1342,8 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
locations->AddTemp(Location::RegisterLocation(RDI));
- InvokeDexCallingConventionVisitor calling_convention_visitor;
- for (size_t i = 0; i < invoke->InputCount(); i++) {
+ InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+ for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
HInstruction* input = invoke->InputAt(i);
locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType()));
}
@@ -1376,8 +1384,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke)
}
CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() +
- invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
+ size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+ invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
@@ -1390,9 +1398,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke)
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
- __ movl(temp, Address(temp, method_offset));
+ __ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86_64WordSize).SizeValue()));
DCHECK(!codegen_->IsLeafMethod());
@@ -1408,15 +1416,15 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() +
- (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry);
+ uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
+ invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
// Set the hidden argument.
- __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(),
- Immediate(invoke->GetDexMethodIndex()));
+ CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+ codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
// temp = object->GetClass();
if (receiver.IsStackSlot()) {
@@ -1427,9 +1435,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetImtEntryAt(method_offset);
- __ movl(temp, Address(temp, method_offset));
+ __ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kX86_64WordSize).SizeValue()));
DCHECK(!codegen_->IsLeafMethod());
@@ -1852,7 +1860,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-float(output)
__ cvtsi2ss(temp, output, true);
// if input >= temp goto done
@@ -1865,7 +1873,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -1877,7 +1885,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Label done, nan;
- __ movq(output, Immediate(kPrimLongMax));
+ codegen_->Load64BitValue(output, kPrimLongMax);
// temp = long-to-double(output)
__ cvtsi2sd(temp, output, true);
// if input >= temp goto done
@@ -1890,7 +1898,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver
__ jmp(&done);
__ Bind(&nan);
// output = 0
- __ xorq(output, output);
+ __ xorl(output, output);
__ Bind(&done);
break;
}
@@ -2479,7 +2487,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
case Primitive::kPrimLong: {
if (instruction->IsRem()) {
- __ xorq(output_register, output_register);
+ __ xorl(output_register, output_register);
} else {
__ movq(output_register, input_register);
if (imm == -1) {
@@ -2523,7 +2531,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
- __ movq(rdx, Immediate(std::abs(imm) - 1));
+ codegen_->Load64BitValue(rdx, std::abs(imm) - 1);
__ addq(rdx, numerator);
__ testq(numerator, numerator);
__ cmov(kGreaterEqual, rdx, numerator);
@@ -2620,7 +2628,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
__ movq(numerator, rax);
// RAX = magic
- __ movq(rax, Immediate(magic));
+ codegen_->Load64BitValue(rax, magic);
// RDX:RAX = magic * numerator
__ imulq(numerator);
@@ -2649,8 +2657,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
if (IsInt<32>(imm)) {
__ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
} else {
- __ movq(numerator, Immediate(imm));
- __ imulq(rdx, numerator);
+ __ imulq(rdx, codegen_->LiteralInt64Address(imm));
}
__ subq(rax, rdx);
@@ -3016,8 +3023,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
-
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3038,7 +3045,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2)));
- __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex()));
+ codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
+ instruction->GetTypeIndex());
__ gs()->call(
Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true));
@@ -3860,8 +3868,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio
void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
HBasicBlock* successor) {
SuspendCheckSlowPathX86_64* slow_path =
- new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
- codegen_->AddSlowPath(slow_path);
+ down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
+ if (slow_path == nullptr) {
+ slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
+ instruction->SetSlowPath(slow_path);
+ codegen_->AddSlowPath(slow_path);
+ if (successor != nullptr) {
+ DCHECK(successor->IsLoopHeader());
+ codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+ }
+ } else {
+ DCHECK_EQ(slow_path->GetSuccessor(), successor);
+ }
+
__ gs()->cmpw(Address::Absolute(
Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0));
if (successor == nullptr) {
@@ -3934,45 +3953,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
if (destination.IsRegister()) {
- __ movq(destination.AsRegister<CpuRegister>(), Immediate(value));
+ codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), Immediate(value));
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
} else if (constant->IsFloatConstant()) {
float fp_value = constant->AsFloatConstant()->GetValue();
int32_t value = bit_cast<int32_t, float>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
// easy FP 0.0.
__ xorps(dest, dest);
} else {
- __ movl(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movss(dest, codegen_->LiteralFloatAddress(fp_value));
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
+ Immediate imm(value);
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
}
} else {
DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
double fp_value = constant->AsDoubleConstant()->GetValue();
int64_t value = bit_cast<int64_t, double>(fp_value);
- Immediate imm(value);
if (destination.IsFpuRegister()) {
XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
if (value == 0) {
__ xorpd(dest, dest);
} else {
- __ movq(CpuRegister(TMP), imm);
- __ movd(dest, CpuRegister(TMP));
+ __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value));
}
} else {
DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ movq(CpuRegister(TMP), imm);
+ codegen_->Load64BitValue(CpuRegister(TMP), value);
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
}
@@ -4110,11 +4126,11 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
codegen_->LoadCurrentMethod(out);
- __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value()));
} else {
DCHECK(cls->CanCallRuntime());
codegen_->LoadCurrentMethod(out);
- __ movl(out, Address(out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()));
__ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
@@ -4159,7 +4175,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
CpuRegister out = load->GetLocations()->Out().AsRegister<CpuRegister>();
codegen_->LoadCurrentMethod(CpuRegister(out));
- __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()));
+ __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value()));
__ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
__ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
__ testl(out, out);
@@ -4431,6 +4447,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
+ if (value == 0) {
+ __ xorl(dest, dest);
+ } else if (value > 0 && IsInt<32>(value)) {
+ // We can use a 32 bit move, as it will zero-extend and is one byte shorter.
+ __ movl(dest, Immediate(static_cast<int32_t>(value)));
+ } else {
+ __ movq(dest, Immediate(value));
+ }
+}
+
void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
// Generate the constant area if needed.
X86_64Assembler* assembler = GetAssembler();
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 6cdc82262c..4be401a0fa 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -37,7 +37,7 @@ static constexpr FloatRegister kParameterFloatRegisters[] =
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters);
-static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX };
+static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 };
@@ -50,7 +50,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR
: CallingConvention(kRuntimeParameterCoreRegisters,
kRuntimeParameterCoreRegistersLength,
kRuntimeParameterFpuRegisters,
- kRuntimeParameterFpuRegistersLength) {}
+ kRuntimeParameterFpuRegistersLength,
+ kX86_64PointerSize) {}
private:
DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
@@ -62,28 +63,24 @@ class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegis
kParameterCoreRegisters,
kParameterCoreRegistersLength,
kParameterFloatRegisters,
- kParameterFloatRegistersLength) {}
+ kParameterFloatRegistersLength,
+ kX86_64PointerSize) {}
private:
DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
};
-class InvokeDexCallingConventionVisitor {
+class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor {
public:
- InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {}
+ InvokeDexCallingConventionVisitorX86_64() {}
+ virtual ~InvokeDexCallingConventionVisitorX86_64() {}
- Location GetNextLocation(Primitive::Type type);
+ Location GetNextLocation(Primitive::Type type) OVERRIDE;
private:
InvokeDexCallingConvention calling_convention;
- // The current index for cpu registers.
- uint32_t gp_index_;
- // The current index for fpu registers.
- uint32_t fp_index_;
- // The current stack index.
- uint32_t stack_index_;
-
- DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor);
+
+ DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64);
};
class CodeGeneratorX86_64;
@@ -147,7 +144,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
void HandleFieldGet(HInstruction* instruction);
CodeGeneratorX86_64* const codegen_;
- InvokeDexCallingConventionVisitor parameter_visitor_;
+ InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64);
};
@@ -287,6 +284,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Address LiteralInt32Address(int32_t v);
Address LiteralInt64Address(int64_t v);
+ // Load a 64 bit value into a register in the most efficient manner.
+ void Load64BitValue(CpuRegister dest, int64_t value);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 94f56e5d3e..bfed1a89de 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph,
static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) {
ArenaPool pool;
ArenaAllocator arena(&pool);
- HGraph* graph = new (&arena) HGraph(&arena);
+ HGraph* graph = CreateGraph(&arena);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe
static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) {
ArenaPool pool;
ArenaAllocator arena(&pool);
- HGraph* graph = new (&arena) HGraph(&arena);
+ HGraph* graph = CreateGraph(&arena);
HGraphBuilder builder(graph, Primitive::kPrimLong);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) {
for (size_t i = 0; i < arraysize(lhs); i++) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry_block);
@@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) {
for (size_t i = 0; i < arraysize(lhs); i++) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 91cd60acce..6fbe75e802 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -47,6 +47,12 @@ static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) {
}
}
+static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) {
+ for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) {
+ set->SetBit(it.Current()->GetHeader()->GetBlockId());
+ }
+}
+
void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) {
if (stats_ != nullptr) {
stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction,
@@ -58,17 +64,26 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
// Classify blocks as reachable/unreachable.
ArenaAllocator* allocator = graph_->GetArena();
ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false);
+ ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false);
+
MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks);
- // Remove all dead blocks. Process blocks in post-order, because removal needs
- // the block's chain of dominators.
+ // Remove all dead blocks. Iterate in post order because removal needs the
+ // block's chain of dominators and nested loops need to be updated from the
+ // inside out.
for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
- if (live_blocks.IsBitSet(block->GetBlockId())) {
- continue;
+ int id = block->GetBlockId();
+ if (live_blocks.IsBitSet(id)) {
+ if (affected_loops.IsBitSet(id)) {
+ DCHECK(block->IsLoopHeader());
+ block->GetLoopInformation()->Update();
+ }
+ } else {
+ MaybeRecordDeadBlock(block);
+ MarkLoopHeadersContaining(*block, &affected_loops);
+ block->DisconnectAndDelete();
}
- MaybeRecordDeadBlock(block);
- block->DisconnectAndDelete();
}
// Connect successive blocks created by dead branches. Order does not matter.
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 0bea0fc1c2..59a57c4345 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -31,13 +31,13 @@ class HDeadCodeElimination : public HOptimization {
public:
HDeadCodeElimination(HGraph* graph,
OptimizingCompilerStats* stats = nullptr,
- const char* name = kDeadCodeEliminationPassName)
+ const char* name = kInitialDeadCodeEliminationPassName)
: HOptimization(graph, true, name, stats) {}
void Run() OVERRIDE;
- static constexpr const char* kDeadCodeEliminationPassName =
- "dead_code_elimination";
+ static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination";
+ static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final";
private:
void MaybeRecordDeadBlock(HBasicBlock* block);
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index 61a7697301..78ae1dd960 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -27,7 +27,7 @@ namespace art {
static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index 2bfecc696a..29aa97a83a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -28,7 +28,7 @@
namespace art {
static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) {
TestBlock(graph, 0, false, -1); // entry block
TestBlock(graph, 1, false, -1); // pre header
- const int blocks2[] = {2, 3, 4, 5, 8};
- TestBlock(graph, 2, true, 2, blocks2, 5); // loop header
+ const int blocks2[] = {2, 3, 4, 5};
+ TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header
TestBlock(graph, 3, false, 2); // block in loop
- TestBlock(graph, 4, false, 2); // original back edge
- TestBlock(graph, 5, false, 2); // original back edge
+ TestBlock(graph, 4, false, 2); // back edge
+ TestBlock(graph, 5, false, 2); // back edge
TestBlock(graph, 6, false, -1); // return block
TestBlock(graph, 7, false, -1); // exit block
- TestBlock(graph, 8, false, 2); // synthesized back edge
}
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index dc3124b35f..fd28f0b83f 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -170,7 +170,8 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
}
- // Ensure the uses of `instruction` are defined in a block of the graph.
+ // Ensure the uses of `instruction` are defined in a block of the graph,
+ // and the entry in the use list is consistent.
for (HUseIterator<HInstruction*> use_it(instruction->GetUses());
!use_it.Done(); use_it.Advance()) {
HInstruction* use = use_it.Current()->GetUser();
@@ -184,6 +185,27 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
use->GetId(),
instruction->GetId()));
}
+ size_t use_index = use_it.Current()->GetIndex();
+ if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
+ AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+ "UseListNode index.",
+ use->DebugName(),
+ use->GetId(),
+ instruction->GetId()));
+ }
+ }
+
+ // Ensure the environment uses entries are consistent.
+ for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses());
+ !use_it.Done(); use_it.Advance()) {
+ HEnvironment* use = use_it.Current()->GetUser();
+ size_t use_index = use_it.Current()->GetIndex();
+ if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) {
+ AddError(StringPrintf("Environment user of %s:%d has a wrong "
+ "UseListNode index.",
+ instruction->DebugName(),
+ instruction->GetId()));
+ }
}
// Ensure 'instruction' has pointers to its inputs' use entries.
@@ -191,7 +213,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
HInstruction* input = input_record.GetInstruction();
HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
- if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+ size_t use_index = use_node->GetIndex();
+ if ((use_node == nullptr)
+ || !input->GetUses().Contains(use_node)
+ || (use_index >= e)
+ || (use_index != i)) {
AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
"at input %u (%s:%d).",
instruction->DebugName(),
@@ -262,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
+ HLoopInformation* loop_information = loop_header->GetLoopInformation();
// Ensure the pre-header block is first in the list of
// predecessors of a loop header.
@@ -271,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
id));
}
- // Ensure the loop header has only two predecessors and that only the
- // second one is a back edge.
+ // Ensure the loop header has only one incoming branch and the remaining
+ // predecessors are back edges.
size_t num_preds = loop_header->GetPredecessors().Size();
if (num_preds < 2) {
AddError(StringPrintf(
"Loop header %d has less than two predecessors: %zu.",
id,
num_preds));
- } else if (num_preds > 2) {
- AddError(StringPrintf(
- "Loop header %d has more than two predecessors: %zu.",
- id,
- num_preds));
} else {
- HLoopInformation* loop_information = loop_header->GetLoopInformation();
HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0);
if (loop_information->IsBackEdge(*first_predecessor)) {
AddError(StringPrintf(
"First predecessor of loop header %d is a back edge.",
id));
}
- HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1);
- if (!loop_information->IsBackEdge(*second_predecessor)) {
- AddError(StringPrintf(
- "Second predecessor of loop header %d is not a back edge.",
- id));
+ for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) {
+ HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i);
+ if (!loop_information->IsBackEdge(*predecessor)) {
+ AddError(StringPrintf(
+ "Loop header %d has multiple incoming (non back edge) blocks.",
+ id));
+ }
}
}
- const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks();
+ const ArenaBitVector& loop_blocks = loop_information->GetBlocks();
- // Ensure there is only one back edge per loop.
- size_t num_back_edges =
- loop_header->GetLoopInformation()->GetBackEdges().Size();
+ // Ensure back edges belong to the loop.
+ size_t num_back_edges = loop_information->GetBackEdges().Size();
if (num_back_edges == 0) {
AddError(StringPrintf(
"Loop defined by header %d has no back edge.",
id));
- } else if (num_back_edges > 1) {
- AddError(StringPrintf(
- "Loop defined by header %d has several back edges: %zu.",
- id,
- num_back_edges));
} else {
- DCHECK_EQ(num_back_edges, 1u);
- int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId();
- if (!loop_blocks.IsBitSet(back_edge_id)) {
- AddError(StringPrintf(
- "Loop defined by header %d has an invalid back edge %d.",
- id,
- back_edge_id));
+ for (size_t i = 0; i < num_back_edges; ++i) {
+ int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId();
+ if (!loop_blocks.IsBitSet(back_edge_id)) {
+ AddError(StringPrintf(
+ "Loop defined by header %d has an invalid back edge %d.",
+ id,
+ back_edge_id));
+ }
}
}
@@ -368,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
// Ensure an instruction having an environment is dominated by the
// instructions contained in the environment.
- HEnvironment* environment = instruction->GetEnvironment();
- if (environment != nullptr) {
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* env_instruction = environment->GetInstructionAt(i);
if (env_instruction != nullptr
diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc
index 923468ff16..eca0d9344f 100644
--- a/compiler/optimizing/graph_checker_test.cc
+++ b/compiler/optimizing/graph_checker_test.cc
@@ -30,7 +30,7 @@ namespace art {
* 1: Exit
*/
HGraph* CreateSimpleCFG(ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry_block = new (allocator) HBasicBlock(graph);
entry_block->AddInstruction(new (allocator) HGoto());
graph->AddBlock(entry_block);
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index 50398b4790..59d50926ad 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_true = createGotoBlock(graph, &allocator);
@@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_false = createGotoBlock(graph, &allocator);
@@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
HBasicBlock* return_block = createReturnBlock(graph, &allocator);
@@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry_block = createEntryBlock(graph, &allocator);
HBasicBlock* first_if_block = createIfBlock(graph, &allocator);
HBasicBlock* if_block = createIfBlock(graph, &allocator);
@@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* block = createGotoBlock(graph, &allocator);
HInstruction* got = block->GetLastInstruction();
ASSERT_TRUE(got->IsControlFlow());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index ca9cbc3d01..f5c630bf97 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -17,6 +17,7 @@
#include "graph_visualizer.h"
#include "code_generator.h"
+#include "dead_code_elimination.h"
#include "licm.h"
#include "nodes.h"
#include "optimization.h"
@@ -211,17 +212,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << "]";
}
if (instruction->HasEnvironment()) {
- HEnvironment* env = instruction->GetEnvironment();
- output_ << " (env: [ ";
- for (size_t i = 0, e = env->Size(); i < e; ++i) {
- HInstruction* insn = env->GetInstructionAt(i);
- if (insn != nullptr) {
- output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
- } else {
- output_ << " _ ";
+ output_ << " (env:";
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
+ output_ << " [ ";
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* insn = environment->GetInstructionAt(i);
+ if (insn != nullptr) {
+ output_ << GetTypeId(insn->GetType()) << insn->GetId() << " ";
+ } else {
+ output_ << " _ ";
+ }
}
+ output_ << "]";
}
- output_ << "])";
+ output_ << ")";
}
if (IsPass(SsaLivenessAnalysis::kLivenessPassName)
&& is_after_pass_
@@ -248,7 +254,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
}
}
output_ << " (liveness: " << instruction->GetLifetimePosition() << ")";
- } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) {
+ } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)
+ || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) {
output_ << " ( loop_header:";
HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
if (info == nullptr) {
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index a81d49aa0c..c3ce7e142a 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ada32db047..e51732396d 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -16,6 +16,7 @@
#include "inliner.h"
+#include "art_method-inl.h"
#include "builder.h"
#include "class_linker.h"
#include "constant_folding.h"
@@ -23,7 +24,6 @@
#include "driver/compiler_driver-inl.h"
#include "driver/dex_compilation_unit.h"
#include "instruction_simplifier.h"
-#include "mirror/art_method-inl.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
#include "nodes.h"
@@ -81,11 +81,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
hs.NewHandle(caller_compilation_unit_.GetClassLinker()->FindDexCache(caller_dex_file)));
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
soa.Decode<mirror::ClassLoader*>(caller_compilation_unit_.GetClassLoader())));
- Handle<mirror::ArtMethod> resolved_method(hs.NewHandle(
- compiler_driver_->ResolveMethod(
- soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type)));
+ ArtMethod* resolved_method(compiler_driver_->ResolveMethod(
+ soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type));
- if (resolved_method.Get() == nullptr) {
+ if (resolved_method == nullptr) {
VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file);
return false;
}
@@ -141,7 +140,6 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
}
if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) {
- resolved_method->SetShouldNotInline();
return false;
}
@@ -150,7 +148,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
return true;
}
-bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
+bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
HInvoke* invoke_instruction,
uint32_t method_index,
bool can_use_dex_cache) const {
@@ -170,7 +168,12 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
nullptr);
HGraph* callee_graph = new (graph_->GetArena()) HGraph(
- graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId());
+ graph_->GetArena(),
+ caller_dex_file,
+ method_index,
+ compiler_driver_->GetInstructionSet(),
+ graph_->IsDebuggable(),
+ graph_->GetCurrentInstructionId());
OptimizingCompilerStats inline_stats;
HGraphBuilder builder(callee_graph,
@@ -183,6 +186,9 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
if (!builder.BuildGraph(*code_item)) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
<< " could not be built, so cannot be inlined";
+ // There could be multiple reasons why the graph could not be built, including
+ // unaccessible methods/fields due to using a different dex cache. We do not mark
+ // the method as non-inlineable so that other callers can still try to inline it.
return false;
}
@@ -190,12 +196,14 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
compiler_driver_->GetInstructionSet())) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
<< " cannot be inlined because of the register allocator";
+ resolved_method->SetShouldNotInline();
return false;
}
if (!callee_graph->TryBuildingSsa()) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
<< " could not be transformed to SSA";
+ resolved_method->SetShouldNotInline();
return false;
}
@@ -232,6 +240,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
if (block->IsLoopHeader()) {
VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
<< " could not be inlined because it contains a loop";
+ resolved_method->SetShouldNotInline();
return false;
}
@@ -261,6 +270,8 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file)
<< " could not be inlined because " << current->DebugName()
<< " it is in a different dex file and requires access to the dex cache";
+ // Do not flag the method as not-inlineable. A caller within the same
+ // dex file could still successfully inline it.
return false;
}
}
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 1dbc7d392b..831bdf22a0 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -48,7 +48,7 @@ class HInliner : public HOptimization {
private:
bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const;
- bool TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method,
+ bool TryBuildAndInline(ArtMethod* resolved_method,
HInvoke* invoke_instruction,
uint32_t method_index,
bool can_use_dex_cache) const;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2df7c166d8..46fad17b8f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
- if ((input_cst != nullptr) && input_cst->IsZero()) {
- // Replace code looking like
- // SHL dst, src, 0
- // with
- // src
- instruction->ReplaceWith(input_other);
- instruction->GetBlock()->RemoveInstruction(instruction);
+ if (input_cst != nullptr) {
+ if (input_cst->IsZero()) {
+ // Replace code looking like
+ // SHL dst, src, 0
+ // with
+ // src
+ instruction->ReplaceWith(input_other);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ } else if (instruction->IsShl() && input_cst->IsOne()) {
+ // Replace Shl looking like
+ // SHL dst, src, 1
+ // with
+ // ADD dst, src, src
+ HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
+ input_other,
+ input_other);
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+ RecordSimplification();
+ }
}
}
@@ -377,15 +389,42 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
return;
}
- if ((input_cst != nullptr) && input_cst->IsMinusOne() &&
- (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) {
+ if ((input_cst != nullptr) && input_cst->IsMinusOne()) {
// Replace code looking like
// DIV dst, src, -1
// with
// NEG dst, src
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
- instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+ instruction, new (GetGraph()->GetArena()) HNeg(type, input_other));
RecordSimplification();
+ return;
+ }
+
+ if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) {
+ // Try replacing code looking like
+ // DIV dst, src, constant
+ // with
+ // MUL dst, src, 1 / constant
+ HConstant* reciprocal = nullptr;
+ if (type == Primitive::Primitive::kPrimDouble) {
+ double value = input_cst->AsDoubleConstant()->GetValue();
+ if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) {
+ reciprocal = GetGraph()->GetDoubleConstant(1.0 / value);
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimFloat);
+ float value = input_cst->AsFloatConstant()->GetValue();
+ if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) {
+ reciprocal = GetGraph()->GetFloatConstant(1.0f / value);
+ }
+ }
+
+ if (reciprocal != nullptr) {
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
+ instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal));
+ RecordSimplification();
+ return;
+ }
}
}
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 20aa45f197..9e18f114ad 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -22,6 +22,7 @@
#include "invoke_type.h"
#include "nodes.h"
#include "quick/inline_method_analyser.h"
+#include "utils.h"
namespace art {
@@ -186,6 +187,8 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
return Intrinsics::kStringCharAt;
case kIntrinsicCompareTo:
return Intrinsics::kStringCompareTo;
+ case kIntrinsicGetCharsNoCheck:
+ return Intrinsics::kStringGetCharsNoCheck;
case kIntrinsicIsEmptyOrLength:
// The inliner can handle these two cases - and this is the preferred approach
// since after inlining the call is no longer visible (as opposed to waiting
@@ -194,6 +197,12 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
case kIntrinsicIndexOf:
return ((method.d.data & kIntrinsicFlagBase0) == 0) ?
Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf;
+ case kIntrinsicNewStringFromBytes:
+ return Intrinsics::kStringNewStringFromBytes;
+ case kIntrinsicNewStringFromChars:
+ return Intrinsics::kStringNewStringFromChars;
+ case kIntrinsicNewStringFromString:
+ return Intrinsics::kStringNewStringFromString;
case kIntrinsicCas:
switch (GetType(method.d.data, false)) {
@@ -280,6 +289,11 @@ static Intrinsics GetIntrinsic(InlineMethod method) {
case kInlineOpIPut:
return Intrinsics::kNone;
+ // String init cases, not intrinsics.
+
+ case kInlineStringInit:
+ return Intrinsics::kNone;
+
// No default case to make the compiler warn on missing cases.
}
return Intrinsics::kNone;
@@ -361,4 +375,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
}
} // namespace art
-
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index dbb7cbaa98..c243ef3f8b 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -17,8 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
#define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
+#include "code_generator.h"
#include "nodes.h"
#include "optimization.h"
+#include "parallel_move_resolver.h"
namespace art {
@@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
+ static void MoveArguments(HInvoke* invoke,
+ CodeGenerator* codegen,
+ InvokeDexCallingConventionVisitor* calling_convention_visitor) {
+ if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ // When we do not run baseline, explicit clinit checks triggered by static
+ // invokes must have been pruned by art::PrepareForRegisterAllocation.
+ DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
+ }
+
+ if (invoke->GetNumberOfArguments() == 0) {
+ // No argument to move.
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+
+ for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ }
+
protected:
IntrinsicVisitor() {}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index abdf04ebb1..db35b8f767 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -17,11 +17,11 @@
#include "intrinsics_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "art_method.h"
#include "code_generator_arm.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
#include "mirror/string.h"
#include "thread.h"
#include "utils/arm/assembler_arm.h"
@@ -48,7 +48,7 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener
DCHECK_NE(type, Primitive::kPrimVoid);
- if (Primitive::IsIntegralType(type)) {
+ if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
if (type == Primitive::kPrimLong) {
Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
@@ -77,28 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener
}
}
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
- if (invoke->InputCount() == 0) {
- // No argument to move.
- return;
- }
-
- LocationSummary* locations = invoke->GetLocations();
- InvokeDexCallingConventionVisitor calling_convention_visitor;
-
- // We're moving potentially two or more locations to locations that could overlap, so we need
- // a parallel move resolver.
- HParallelMove parallel_move(arena);
-
- for (size_t i = 0; i < invoke->InputCount(); i++) {
- HInstruction* input = invoke->InputAt(i);
- Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
- Location actual_loc = locations->InAt(i);
-
- parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
- }
-
- codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) {
+ InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+ IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
}
// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -117,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM {
SaveLiveRegisters(codegen, invoke_->GetLocations());
- MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke_, codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -810,10 +791,6 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
const MemberOffset value_offset = mirror::String::ValueOffset();
// Location of count
const MemberOffset count_offset = mirror::String::CountOffset();
- // Starting offset within data array
- const MemberOffset offset_offset = mirror::String::OffsetOffset();
- // Start of char data with array_
- const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer.
Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character.
@@ -835,15 +812,10 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
__ cmp(idx, ShifterOperand(temp));
__ b(slow_path->GetEntryLabel(), CS);
- // Index computation.
- __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset.
- __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset.
- __ add(temp, temp, ShifterOperand(idx));
- DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting.
- __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+ __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value())); // array_temp := str.value.
// Load the value.
- __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp].
+ __ ldrh(out, Address(array_temp, idx, LSL, 1)); // out := array_temp[idx].
__ Bind(slow_path->GetExitLabel());
}
@@ -878,6 +850,169 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+ ArmAssembler* assembler,
+ CodeGeneratorARM* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+ Register tmp_reg = locations->GetTemp(0).AsRegister<Register>();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeARM* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ Register char_reg = locations->InAt(1).AsRegister<Register>();
+ __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max());
+ __ cmp(char_reg, ShifterOperand(tmp_reg));
+ slow_path = new (allocator) IntrinsicSlowPathARM(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), HI);
+ }
+
+ if (start_at_zero) {
+ DCHECK_EQ(tmp_reg, R2);
+ // Start-index = 0.
+ __ LoadImmediate(tmp_reg, 0);
+ }
+
+ __ LoadFromOffset(kLoadWord, LR, TR,
+ QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value());
+ __ blx(LR);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetOut(Location::RegisterLocation(R0));
+
+ // Need a temp for slow-path codepoint compare, and need to send start-index=0.
+ locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(Location::RegisterLocation(R0));
+
+ // Need a temp for slow-path codepoint compare.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+ locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register byte_array = locations->InAt(0).AsRegister<Register>();
+ __ cmp(byte_array, ShifterOperand(0));
+ SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), EQ);
+
+ __ LoadFromOffset(
+ kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ blx(LR);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+
+ __ LoadFromOffset(
+ kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ blx(LR);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(R0));
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+ __ cmp(string_to_copy, ShifterOperand(0));
+ SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel(), EQ);
+
+ __ LoadFromOffset(kLoadWord,
+ LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value());
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ blx(LR);
+ __ Bind(slow_path->GetExitLabel());
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -904,9 +1039,8 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding
UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a753b2da9..957373f6f9 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -17,12 +17,12 @@
#include "intrinsics_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
+#include "art_method.h"
#include "code_generator_arm64.h"
#include "common_arm64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
#include "mirror/string.h"
#include "thread.h"
#include "utils/arm64/assembler_arm64.h"
@@ -75,7 +75,7 @@ static void MoveFromReturnRegister(Location trg,
DCHECK_NE(type, Primitive::kPrimVoid);
- if (Primitive::IsIntegralType(type)) {
+ if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) {
Register trg_reg = RegisterFrom(trg, type);
Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type);
__ Mov(trg_reg, res_reg, kDiscardForSameWReg);
@@ -86,28 +86,9 @@ static void MoveFromReturnRegister(Location trg,
}
}
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) {
- if (invoke->InputCount() == 0) {
- // No argument to move.
- return;
- }
-
- LocationSummary* locations = invoke->GetLocations();
- InvokeDexCallingConventionVisitor calling_convention_visitor;
-
- // We're moving potentially two or more locations to locations that could overlap, so we need
- // a parallel move resolver.
- HParallelMove parallel_move(arena);
-
- for (size_t i = 0; i < invoke->InputCount(); i++) {
- HInstruction* input = invoke->InputAt(i);
- Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
- Location actual_loc = locations->InAt(i);
-
- parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
- }
-
- codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) {
+ InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
+ IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
}
// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -126,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
SaveLiveRegisters(codegen, invoke_->GetLocations());
- MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke_, codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
@@ -953,10 +934,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) {
const MemberOffset value_offset = mirror::String::ValueOffset();
// Location of count
const MemberOffset count_offset = mirror::String::CountOffset();
- // Starting offset within data array
- const MemberOffset offset_offset = mirror::String::OffsetOffset();
- // Start of char data with array_
- const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
Register obj = WRegisterFrom(locations->InAt(0)); // String object pointer.
Register idx = WRegisterFrom(locations->InAt(1)); // Index of character.
@@ -979,21 +956,15 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) {
__ Cmp(idx, temp);
__ B(hs, slow_path->GetEntryLabel());
- // Index computation.
- __ Ldr(temp, HeapOperand(obj, offset_offset)); // temp := str.offset.
- __ Ldr(array_temp, HeapOperand(obj, value_offset)); // array_temp := str.offset.
- __ Add(temp, temp, idx);
- DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting.
- __ Add(temp, temp, Operand(data_offset.Int32Value() / 2));
+ __ Add(array_temp, obj, Operand(value_offset.Int32Value())); // array_temp := str.value.
// Load the value.
- __ Ldrh(out, MemOperand(array_temp.X(), temp, UXTW, 1)); // out := array_temp[temp].
+ __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1)); // out := array_temp[idx].
__ Bind(slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
- // The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCall,
kIntrinsified);
@@ -1022,6 +993,169 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+ vixl::MacroAssembler* masm,
+ CodeGeneratorARM64* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+ Register tmp_reg = WRegisterFrom(locations->GetTemp(0));
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeARM64* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ Register char_reg = WRegisterFrom(locations->InAt(1));
+ __ Mov(tmp_reg, 0xFFFF);
+ __ Cmp(char_reg, Operand(tmp_reg));
+ slow_path = new (allocator) IntrinsicSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ B(hi, slow_path->GetEntryLabel());
+ }
+
+ if (start_at_zero) {
+ // Start-index = 0.
+ __ Mov(tmp_reg, 0);
+ }
+
+ __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value()));
+ __ Blr(lr);
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+ // Need a temp for slow-path codepoint compare, and need to send start_index=0.
+ locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+ // best to align the inputs accordingly.
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+
+ // Need a temp for slow-path codepoint compare.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ vixl::MacroAssembler* masm = GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register byte_array = WRegisterFrom(locations->InAt(0));
+ __ Cmp(byte_array, 0);
+ SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ B(eq, slow_path->GetEntryLabel());
+
+ __ Ldr(lr,
+ MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value()));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Blr(lr);
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) {
+ vixl::MacroAssembler* masm = GetVIXLAssembler();
+
+ __ Ldr(lr,
+ MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value()));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Blr(lr);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+ // The inputs plus one temp.
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) {
+ vixl::MacroAssembler* masm = GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register string_to_copy = WRegisterFrom(locations->InAt(0));
+ __ Cmp(string_to_copy, 0);
+ SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ B(eq, slow_path->GetEntryLabel());
+
+ __ Ldr(lr,
+ MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value()));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Blr(lr);
+ __ Bind(slow_path->GetExitLabel());
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -1031,9 +1165,8 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED
}
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 10f6e1d6c7..2c9248f52c 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -60,8 +60,12 @@
V(MemoryPokeShortNative, kStatic) \
V(StringCharAt, kDirect) \
V(StringCompareTo, kDirect) \
+ V(StringGetCharsNoCheck, kDirect) \
V(StringIndexOf, kDirect) \
V(StringIndexOfAfter, kDirect) \
+ V(StringNewStringFromBytes, kStatic) \
+ V(StringNewStringFromChars, kStatic) \
+ V(StringNewStringFromString, kStatic) \
V(UnsafeCASInt, kDirect) \
V(UnsafeCASLong, kDirect) \
V(UnsafeCASObject, kDirect) \
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 7275edb695..989dd0df30 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -16,12 +16,14 @@
#include "intrinsics_x86.h"
+#include <limits>
+
#include "arch/x86/instruction_set_features_x86.h"
+#include "art_method.h"
#include "code_generator_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
#include "mirror/string.h"
#include "thread.h"
#include "utils/x86/assembler_x86.h"
@@ -111,28 +113,9 @@ static void MoveFromReturnRegister(Location target,
}
}
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
- if (invoke->InputCount() == 0) {
- // No argument to move.
- return;
- }
-
- LocationSummary* locations = invoke->GetLocations();
- InvokeDexCallingConventionVisitor calling_convention_visitor;
-
- // We're moving potentially two or more locations to locations that could overlap, so we need
- // a parallel move resolver.
- HParallelMove parallel_move(arena);
-
- for (size_t i = 0; i < invoke->InputCount(); i++) {
- HInstruction* input = invoke->InputAt(i);
- Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
- Location actual_loc = locations->InAt(i);
-
- parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
- }
-
- codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
+ InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
+ IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
}
// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -143,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX
// restored!
class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
public:
- explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
- : invoke_(invoke) {
- // The temporary register has to be EAX for x86 invokes.
- DCHECK_EQ(temp, EAX);
- }
+ explicit IntrinsicSlowPathX86(HInvoke* invoke)
+ : invoke_(invoke) { }
void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
@@ -155,7 +135,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
SaveLiveRegisters(codegen, invoke_->GetLocations());
- MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke_, codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
@@ -749,7 +729,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
}
static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
- MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke, codegen);
DCHECK(invoke->IsInvokeStaticOrDirect());
codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
@@ -899,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
locations->SetOut(Location::SameAsFirstInput());
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
@@ -910,23 +888,17 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
// Location of count
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
- // Starting offset within data array
- const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
- // Start of char data with array_
- const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
Register obj = locations->InAt(0).AsRegister<Register>();
Register idx = locations->InAt(1).AsRegister<Register>();
Register out = locations->Out().AsRegister<Register>();
- Location temp_loc = locations->GetTemp(0);
- Register temp = temp_loc.AsRegister<Register>();
// TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
// the cost.
// TODO: For simplicity, the index parameter is requested in a register, so different from Quick
// we will not optimize the code for constants (which would save a register).
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
X86Assembler* assembler = GetAssembler();
@@ -935,12 +907,8 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
codegen_->MaybeRecordImplicitNullCheck(invoke);
__ j(kAboveEqual, slow_path->GetEntryLabel());
- // Get the actual element.
- __ movl(temp, idx); // temp := idx.
- __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
- __ movl(out, Address(obj, value_offset)); // obj := obj.array.
- // out = out[2*temp].
- __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+ // out = out[2*idx].
+ __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
__ Bind(slow_path->GetExitLabel());
}
@@ -954,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(Location::RegisterLocation(EAX));
- // Needs to be EAX for the invoke.
- locations->AddTemp(Location::RegisterLocation(EAX));
}
void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
@@ -967,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
Register argument = locations->InAt(1).AsRegister<Register>();
__ testl(argument, argument);
- SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
- invoke, locations->GetTemp(0).AsRegister<Register>());
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
codegen_->AddSlowPath(slow_path);
__ j(kEqual, slow_path->GetEntryLabel());
@@ -976,6 +941,227 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ // The data needs to be in EDI for scasw. So request that the string is there, anyways.
+ locations->SetInAt(0, Location::RegisterLocation(EDI));
+ // If we look for a constant char, we'll still have to copy it into EAX. So just request the
+ // allocator to do that, anyways. We can still do the constant check by checking the parameter
+ // of the instruction explicitly.
+ // Note: This works as we don't clobber EAX anywhere.
+ locations->SetInAt(1, Location::RegisterLocation(EAX));
+ if (!start_at_zero) {
+ locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
+ }
+ // As we clobber EDI during execution anyways, also use it as the output.
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // repne scasw uses ECX as the counter.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ // Need another temporary to be able to compute the result.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+ X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ Register string_obj = locations->InAt(0).AsRegister<Register>();
+ Register search_value = locations->InAt(1).AsRegister<Register>();
+ Register counter = locations->GetTemp(0).AsRegister<Register>();
+ Register string_length = locations->GetTemp(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ // Check our assumptions for registers.
+ DCHECK_EQ(string_obj, EDI);
+ DCHECK_EQ(search_value, EAX);
+ DCHECK_EQ(counter, ECX);
+ DCHECK_EQ(out, EDI);
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeX86* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+ slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ j(kAbove, slow_path->GetEntryLabel());
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+ // Location of reference to data array within the String object.
+ int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count within the String object.
+ int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+ // Load string length, i.e., the count field of the string.
+ __ movl(string_length, Address(string_obj, count_offset));
+
+ // Do a zero-length check.
+ // TODO: Support jecxz.
+ Label not_found_label;
+ __ testl(string_length, string_length);
+ __ j(kEqual, &not_found_label);
+
+ if (start_at_zero) {
+ // Number of chars to scan is the same as the string length.
+ __ movl(counter, string_length);
+
+ // Move to the start of the string.
+ __ addl(string_obj, Immediate(value_offset));
+ } else {
+ Register start_index = locations->InAt(2).AsRegister<Register>();
+
+ // Do a start_index check.
+ __ cmpl(start_index, string_length);
+ __ j(kGreaterEqual, &not_found_label);
+
+ // Ensure we have a start index >= 0;
+ __ xorl(counter, counter);
+ __ cmpl(start_index, Immediate(0));
+ __ cmovl(kGreater, counter, start_index);
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
+ // compare.
+ __ negl(counter);
+ __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+ }
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in EDI.
+ // * Counter in ECX.
+ __ repne_scasw();
+
+ // Did we find a match?
+ __ j(kNotEqual, &not_found_label);
+
+ // Yes, we matched. Compute the index of the result.
+ __ subl(string_length, counter);
+ __ leal(out, Address(string_length, -1));
+
+ Label done;
+ __ jmp(&done);
+
+ // Failed to match; return -1.
+ __ Bind(&not_found_label);
+ __ movl(out, Immediate(-1));
+
+ // And join up at the end.
+ __ Bind(&done);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ X86Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register byte_array = locations->InAt(0).AsRegister<Register>();
+ __ testl(byte_array, byte_array);
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ j(kEqual, slow_path->GetEntryLabel());
+
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes)));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
+ X86Assembler* assembler = GetAssembler();
+
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars)));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(EAX));
+}
+
+void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
+ X86Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register string_to_copy = locations->InAt(0).AsRegister<Register>();
+ __ testl(string_to_copy, string_to_copy);
+ SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ j(kEqual, slow_path->GetEntryLabel());
+
+ __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString)));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Bind(slow_path->GetExitLabel());
+}
+
static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
Register address = locations->InAt(0).AsRegisterPairLow<Register>();
Location out_loc = locations->Out();
@@ -1536,8 +1722,7 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
}
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 35daaf60bb..c245cb646f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -16,12 +16,14 @@
#include "intrinsics_x86_64.h"
+#include <limits>
+
#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "art_method-inl.h"
#include "code_generator_x86_64.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
-#include "mirror/art_method.h"
#include "mirror/string.h"
#include "thread.h"
#include "utils/x86_64/assembler_x86_64.h"
@@ -103,28 +105,9 @@ static void MoveFromReturnRegister(Location trg,
}
}
-static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
- if (invoke->InputCount() == 0) {
- // No argument to move.
- return;
- }
-
- LocationSummary* locations = invoke->GetLocations();
- InvokeDexCallingConventionVisitor calling_convention_visitor;
-
- // We're moving potentially two or more locations to locations that could overlap, so we need
- // a parallel move resolver.
- HParallelMove parallel_move(arena);
-
- for (size_t i = 0; i < invoke->InputCount(); i++) {
- HInstruction* input = invoke->InputAt(i);
- Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
- Location actual_loc = locations->InAt(i);
-
- parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
- }
-
- codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
+ IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
}
// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
@@ -143,7 +126,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
SaveLiveRegisters(codegen, invoke_->GetLocations());
- MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke_, codegen);
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -623,7 +606,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
}
static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
- MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
+ MoveArguments(invoke, codegen);
DCHECK(invoke->IsInvokeStaticOrDirect());
codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
@@ -802,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
__ Bind(&nan);
// output = 0
- __ xorq(out, out);
+ __ xorl(out, out);
__ Bind(&done);
}
@@ -824,16 +807,10 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
// Location of count
const int32_t count_offset = mirror::String::CountOffset().Int32Value();
- // Starting offset within data array
- const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
- // Start of char data with array_
- const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- Location temp_loc = locations->GetTemp(0);
- CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
// TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
// the cost.
@@ -849,12 +826,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
codegen_->MaybeRecordImplicitNullCheck(invoke);
__ j(kAboveEqual, slow_path->GetEntryLabel());
- // Get the actual element.
- __ movl(temp, idx); // temp := idx.
- __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
- __ movl(out, Address(obj, value_offset)); // obj := obj.array.
- // out = out[2*temp].
- __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
+ // out = out[2*idx].
+ __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
__ Bind(slow_path->GetExitLabel());
}
@@ -887,6 +860,229 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+static void CreateStringIndexOfLocations(HInvoke* invoke,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = new (allocator) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ // The data needs to be in RDI for scasw. So request that the string is there, anyways.
+ locations->SetInAt(0, Location::RegisterLocation(RDI));
+ // If we look for a constant char, we'll still have to copy it into RAX. So just request the
+ // allocator to do that, anyways. We can still do the constant check by checking the parameter
+ // of the instruction explicitly.
+ // Note: This works as we don't clobber RAX anywhere.
+ locations->SetInAt(1, Location::RegisterLocation(RAX));
+ if (!start_at_zero) {
+ locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
+ }
+ // As we clobber RDI during execution anyways, also use it as the output.
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // repne scasw uses RCX as the counter.
+ locations->AddTemp(Location::RegisterLocation(RCX));
+ // Need another temporary to be able to compute the result.
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenerateStringIndexOf(HInvoke* invoke,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ ArenaAllocator* allocator,
+ bool start_at_zero) {
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ // Check our assumptions for registers.
+ DCHECK_EQ(string_obj.AsRegister(), RDI);
+ DCHECK_EQ(search_value.AsRegister(), RAX);
+ DCHECK_EQ(counter.AsRegister(), RCX);
+ DCHECK_EQ(out.AsRegister(), RDI);
+
+ // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+ // or directly dispatch if we have a constant.
+ SlowPathCodeX86_64* slow_path = nullptr;
+ if (invoke->InputAt(1)->IsIntConstant()) {
+ if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
+ std::numeric_limits<uint16_t>::max()) {
+ // Always needs the slow-path. We could directly dispatch to it, but this case should be
+ // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ return;
+ }
+ } else {
+ __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
+ slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
+ codegen->AddSlowPath(slow_path);
+ __ j(kAbove, slow_path->GetEntryLabel());
+ }
+
+ // From here down, we know that we are looking for a char that fits in 16 bits.
+ // Location of reference to data array within the String object.
+ int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ // Location of count within the String object.
+ int32_t count_offset = mirror::String::CountOffset().Int32Value();
+
+ // Load string length, i.e., the count field of the string.
+ __ movl(string_length, Address(string_obj, count_offset));
+
+ // Do a length check.
+ // TODO: Support jecxz.
+ Label not_found_label;
+ __ testl(string_length, string_length);
+ __ j(kEqual, &not_found_label);
+
+ if (start_at_zero) {
+ // Number of chars to scan is the same as the string length.
+ __ movl(counter, string_length);
+
+ // Move to the start of the string.
+ __ addq(string_obj, Immediate(value_offset));
+ } else {
+ CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
+
+ // Do a start_index check.
+ __ cmpl(start_index, string_length);
+ __ j(kGreaterEqual, &not_found_label);
+
+ // Ensure we have a start index >= 0;
+ __ xorl(counter, counter);
+ __ cmpl(start_index, Immediate(0));
+ __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
+
+ // Move to the start of the string: string_obj + value_offset + 2 * start_index.
+ __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
+
+ // Now update ecx, the work counter: it's gonna be string.length - start_index.
+ __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
+ __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
+ }
+
+ // Everything is set up for repne scasw:
+ // * Comparison address in RDI.
+ // * Counter in ECX.
+ __ repne_scasw();
+
+ // Did we find a match?
+ __ j(kNotEqual, &not_found_label);
+
+ // Yes, we matched. Compute the index of the result.
+ __ subl(string_length, counter);
+ __ leal(out, Address(string_length, -1));
+
+ Label done;
+ __ jmp(&done);
+
+ // Failed to match; return -1.
+ __ Bind(&not_found_label);
+ __ movl(out, Immediate(-1));
+
+ // And join up at the end.
+ __ Bind(&done);
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ CreateStringIndexOfLocations(invoke, arena_, false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
+ X86_64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
+ __ testl(byte_array, byte_array);
+ SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ j(kEqual, slow_path->GetEntryLabel());
+
+ __ gs()->call(Address::Absolute(
+ QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
+ X86_64Assembler* assembler = GetAssembler();
+
+ __ gs()->call(Address::Absolute(
+ QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCall,
+ kIntrinsified);
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetOut(Location::RegisterLocation(RAX));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
+ X86_64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
+ __ testl(string_to_copy, string_to_copy);
+ SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
+ codegen_->AddSlowPath(slow_path);
+ __ j(kEqual, slow_path->GetEntryLabel());
+
+ __ gs()->call(Address::Absolute(
+ QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
+ codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+ __ Bind(slow_path->GetExitLabel());
+}
+
static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
@@ -1390,8 +1586,7 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN
void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
-UNIMPLEMENTED_INTRINSIC(StringIndexOf)
-UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index bf9b8e59c5..2535ea274a 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
}
}
- if (instruction->HasEnvironment()) {
- HEnvironment* environment = instruction->GetEnvironment();
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* input = environment->GetInstructionAt(i);
if (input != nullptr) {
@@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) {
* If `environment` has a loop header phi, we replace it with its first input.
*/
static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) {
- for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HInstruction* input = environment->GetInstructionAt(i);
- if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
- environment->RemoveAsUserOfInput(i);
- HInstruction* incoming = input->InputAt(0);
- environment->SetRawEnvAt(i, incoming);
- incoming->AddEnvUseAt(environment, i);
+ for (; environment != nullptr; environment = environment->GetParent()) {
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* input = environment->GetInstructionAt(i);
+ if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
+ environment->RemoveAsUserOfInput(i);
+ HInstruction* incoming = input->InputAt(0);
+ environment->SetRawEnvAt(i, incoming);
+ incoming->AddEnvUseAt(environment, i);
+ }
}
}
}
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 7818c606db..4f259b5095 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -39,7 +39,7 @@ namespace art {
static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 52367730ed..7cb00a1923 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -32,7 +32,7 @@
namespace art {
static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 8a96ee9ace..9d7d0b6c67 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector,
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) {
TEST(LivenessTest, Loop6) {
// Bitsets are made of:
- // (constant0, constant4, constant5, phi in block 2, phi in block 8)
+ // (constant0, constant4, constant5, phi in block 2)
const char* expected =
"Block 0\n"
- " live in: (00000)\n"
- " live out: (11100)\n"
- " kill: (11100)\n"
+ " live in: (0000)\n"
+ " live out: (1110)\n"
+ " kill: (1110)\n"
"Block 1\n"
- " live in: (11100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
+ " live in: (1110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
"Block 2\n" // loop header
- " live in: (01100)\n"
- " live out: (01110)\n"
- " kill: (00010)\n"
+ " live in: (0110)\n"
+ " live out: (0111)\n"
+ " kill: (0001)\n"
"Block 3\n"
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
- "Block 4\n" // original back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
- "Block 5\n" // original back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00000)\n"
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
+ "Block 4\n" // back edge
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
+ "Block 5\n" // back edge
+ " live in: (0110)\n"
+ " live out: (0110)\n"
+ " kill: (0000)\n"
"Block 6\n" // return block
- " live in: (00010)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
+ " live in: (0001)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n"
"Block 7\n" // exit block
- " live in: (00000)\n"
- " live out: (00000)\n"
- " kill: (00000)\n"
- "Block 8\n" // synthesized back edge
- " live in: (01100)\n"
- " live out: (01100)\n"
- " kill: (00001)\n";
+ " live in: (0000)\n"
+ " live out: (0000)\n"
+ " kill: (0000)\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index a1ae67009e..42aba04828 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
bool intrinsified)
: inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()),
temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0),
- environment_(instruction->GetBlock()->GetGraph()->GetArena(),
- instruction->EnvironmentSize()),
output_overlaps_(Location::kOutputOverlap),
call_kind_(call_kind),
stack_mask_(nullptr),
@@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction,
for (size_t i = 0; i < instruction->InputCount(); ++i) {
inputs_.Put(i, Location());
}
- environment_.SetSize(instruction->EnvironmentSize());
- for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) {
- environment_.Put(i, Location());
- }
instruction->SetLocations(this);
if (NeedsSafepoint()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index c3a99150c4..09bbb33042 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
return temps_.Size();
}
- void SetEnvironmentAt(uint32_t at, Location location) {
- environment_.Put(at, location);
- }
-
- Location GetEnvironmentAt(uint32_t at) const {
- return environment_.Get(at);
- }
-
Location Out() const { return output_; }
bool CanCall() const { return call_kind_ != kNoCall; }
@@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> {
private:
GrowableArray<Location> inputs_;
GrowableArray<Location> temps_;
- GrowableArray<Location> environment_;
// Whether the output overlaps with any of the inputs. If it overlaps, then it cannot
// share the same register as the inputs.
Location::OutputOverlap output_overlaps_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index c158ddf4ee..2bad68217b 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,8 @@
#include "nodes.h"
#include "ssa_builder.h"
+#include "base/bit_vector-inl.h"
+#include "base/bit_utils.h"
#include "utils/growable_array.h"
#include "scoped_thread_state_change.h"
@@ -37,8 +39,9 @@ static void RemoveAsUser(HInstruction* instruction) {
instruction->RemoveAsUserOfInput(i);
}
- HEnvironment* environment = instruction->GetEnvironment();
- if (environment != nullptr) {
+ for (HEnvironment* environment = instruction->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
if (environment->GetInstructionAt(i) != nullptr) {
environment->RemoveAsUserOfInput(i);
@@ -191,24 +194,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
void HGraph::SimplifyLoop(HBasicBlock* header) {
HLoopInformation* info = header->GetLoopInformation();
- // If there are more than one back edge, make them branch to the same block that
- // will become the only back edge. This simplifies finding natural loops in the
- // graph.
- // Also, if the loop is a do/while (that is the back edge is an if), change the
- // back edge to be a goto. This simplifies code generation of suspend cheks.
- if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) {
- HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc());
- AddBlock(new_back_edge);
- new_back_edge->AddInstruction(new (arena_) HGoto());
- for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) {
- HBasicBlock* back_edge = info->GetBackEdges().Get(pred);
- back_edge->ReplaceSuccessor(header, new_back_edge);
- }
- info->ClearBackEdges();
- info->AddBackEdge(new_back_edge);
- new_back_edge->AddSuccessor(header);
- }
-
// Make sure the loop has only one pre header. This simplifies SSA building by having
// to just look at the pre header to know which locals are initialized at entry of the
// loop.
@@ -218,11 +203,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
AddBlock(pre_header);
pre_header->AddInstruction(new (arena_) HGoto());
- ArenaBitVector back_edges(arena_, GetBlocks().Size(), false);
- HBasicBlock* back_edge = info->GetBackEdges().Get(0);
for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) {
HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
- if (predecessor != back_edge) {
+ if (!info->IsBackEdge(*predecessor)) {
predecessor->ReplaceSuccessor(header, pre_header);
pred--;
}
@@ -230,9 +213,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
pre_header->AddSuccessor(header);
}
- // Make sure the second predecessor of a loop header is the back edge.
- if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) {
- header->SwapPredecessors();
+ // Make sure the first predecessor of a loop header is the incoming block.
+ if (info->IsBackEdge(*header->GetPredecessors().Get(0))) {
+ HBasicBlock* to_swap = header->GetPredecessors().Get(0);
+ for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) {
+ HBasicBlock* predecessor = header->GetPredecessors().Get(pred);
+ if (!info->IsBackEdge(*predecessor)) {
+ header->predecessors_.Put(pred, to_swap);
+ header->predecessors_.Put(0, predecessor);
+ break;
+ }
+ }
}
// Place the suspend check at the beginning of the header, so that live registers
@@ -303,25 +294,6 @@ HNullConstant* HGraph::GetNullConstant() {
return cached_null_constant_;
}
-template <class InstructionType, typename ValueType>
-InstructionType* HGraph::CreateConstant(ValueType value,
- ArenaSafeMap<ValueType, InstructionType*>* cache) {
- // Try to find an existing constant of the given value.
- InstructionType* constant = nullptr;
- auto cached_constant = cache->find(value);
- if (cached_constant != cache->end()) {
- constant = cached_constant->second;
- }
-
- // If not found or previously deleted, create and cache a new instruction.
- if (constant == nullptr || constant->GetBlock() == nullptr) {
- constant = new (arena_) InstructionType(value);
- cache->Overwrite(value, constant);
- InsertConstant(constant);
- }
- return constant;
-}
-
HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) {
switch (type) {
case Primitive::Type::kPrimBoolean:
@@ -343,6 +315,18 @@ HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) {
}
}
+void HGraph::CacheFloatConstant(HFloatConstant* constant) {
+ int32_t value = bit_cast<int32_t, float>(constant->GetValue());
+ DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end());
+ cached_float_constants_.Overwrite(value, constant);
+}
+
+void HGraph::CacheDoubleConstant(HDoubleConstant* constant) {
+ int64_t value = bit_cast<int64_t, double>(constant->GetValue());
+ DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end());
+ cached_double_constants_.Overwrite(value, constant);
+}
+
void HLoopInformation::Add(HBasicBlock* block) {
blocks_.SetBit(block->GetBlockId());
}
@@ -364,26 +348,60 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) {
}
bool HLoopInformation::Populate() {
- DCHECK_EQ(GetBackEdges().Size(), 1u);
- HBasicBlock* back_edge = GetBackEdges().Get(0);
- DCHECK(back_edge->GetDominator() != nullptr);
- if (!header_->Dominates(back_edge)) {
- // This loop is not natural. Do not bother going further.
- return false;
- }
+ DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated";
+ for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) {
+ HBasicBlock* back_edge = GetBackEdges().Get(i);
+ DCHECK(back_edge->GetDominator() != nullptr);
+ if (!header_->Dominates(back_edge)) {
+ // This loop is not natural. Do not bother going further.
+ return false;
+ }
- // Populate this loop: starting with the back edge, recursively add predecessors
- // that are not already part of that loop. Set the header as part of the loop
- // to end the recursion.
- // This is a recursive implementation of the algorithm described in
- // "Advanced Compiler Design & Implementation" (Muchnick) p192.
- blocks_.SetBit(header_->GetBlockId());
- PopulateRecursive(back_edge);
+ // Populate this loop: starting with the back edge, recursively add predecessors
+ // that are not already part of that loop. Set the header as part of the loop
+ // to end the recursion.
+ // This is a recursive implementation of the algorithm described in
+ // "Advanced Compiler Design & Implementation" (Muchnick) p192.
+ blocks_.SetBit(header_->GetBlockId());
+ PopulateRecursive(back_edge);
+ }
return true;
}
+void HLoopInformation::Update() {
+ HGraph* graph = header_->GetGraph();
+ for (uint32_t id : blocks_.Indexes()) {
+ HBasicBlock* block = graph->GetBlocks().Get(id);
+ // Reset loop information of non-header blocks inside the loop, except
+ // members of inner nested loops because those should already have been
+ // updated by their own LoopInformation.
+ if (block->GetLoopInformation() == this && block != header_) {
+ block->SetLoopInformation(nullptr);
+ }
+ }
+ blocks_.ClearAllBits();
+
+ if (back_edges_.IsEmpty()) {
+ // The loop has been dismantled, delete its suspend check and remove info
+ // from the header.
+ DCHECK(HasSuspendCheck());
+ header_->RemoveInstruction(suspend_check_);
+ header_->SetLoopInformation(nullptr);
+ header_ = nullptr;
+ suspend_check_ = nullptr;
+ } else {
+ if (kIsDebugBuild) {
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ DCHECK(header_->Dominates(back_edges_.Get(i)));
+ }
+ }
+ // This loop still has reachable back edges. Repopulate the list of blocks.
+ bool populate_successful = Populate();
+ DCHECK(populate_successful);
+ }
+}
+
HBasicBlock* HLoopInformation::GetPreHeader() const {
- DCHECK_EQ(header_->GetPredecessors().Size(), 2u);
return header_->GetDominator();
}
@@ -395,6 +413,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const {
return other.blocks_.IsBitSet(header_->GetBlockId());
}
+size_t HLoopInformation::GetLifetimeEnd() const {
+ size_t last_position = 0;
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position);
+ }
+ return last_position;
+}
+
bool HBasicBlock::Dominates(HBasicBlock* other) const {
// Walk up the dominator tree from `other`, to find out if `this`
// is an ancestor.
@@ -456,6 +482,20 @@ void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstructio
instructions_.InsertInstructionBefore(instruction, cursor);
}
+void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK(!cursor->IsPhi());
+ DCHECK(!instruction->IsPhi());
+ DCHECK_EQ(instruction->GetId(), -1);
+ DCHECK_NE(cursor->GetId(), -1);
+ DCHECK_EQ(cursor->GetBlock(), this);
+ DCHECK(!instruction->IsControlFlow());
+ DCHECK(!cursor->IsControlFlow());
+ instruction->SetBlock(this);
+ instruction->SetId(GetGraph()->GetNextInstructionId());
+ UpdateInputsUsers(instruction);
+ instructions_.InsertInstructionAfter(instruction, cursor);
+}
+
void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
DCHECK_EQ(phi->GetId(), -1);
DCHECK_NE(cursor->GetId(), -1);
@@ -481,6 +521,7 @@ static void Remove(HInstructionList* instruction_list,
}
void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+ DCHECK(!instruction->IsPhi());
Remove(&instructions_, this, instruction, ensure_safety);
}
@@ -488,6 +529,24 @@ void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) {
Remove(&phis_, this, phi, ensure_safety);
}
+void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety) {
+ if (instruction->IsPhi()) {
+ RemovePhi(instruction->AsPhi(), ensure_safety);
+ } else {
+ RemoveInstruction(instruction, ensure_safety);
+ }
+}
+
+void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) {
+ for (size_t i = 0; i < locals.Size(); i++) {
+ HInstruction* instruction = locals.Get(i);
+ SetRawEnvAt(i, instruction);
+ if (instruction != nullptr) {
+ instruction->AddEnvUseAt(this, i);
+ }
+ }
+}
+
void HEnvironment::CopyFrom(HEnvironment* env) {
for (size_t i = 0; i < env->Size(); i++) {
HInstruction* instruction = env->GetInstructionAt(i);
@@ -498,6 +557,28 @@ void HEnvironment::CopyFrom(HEnvironment* env) {
}
}
+void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env,
+ HBasicBlock* loop_header) {
+ DCHECK(loop_header->IsLoopHeader());
+ for (size_t i = 0; i < env->Size(); i++) {
+ HInstruction* instruction = env->GetInstructionAt(i);
+ SetRawEnvAt(i, instruction);
+ if (instruction == nullptr) {
+ continue;
+ }
+ if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) {
+ // At the end of the loop pre-header, the corresponding value for instruction
+ // is the first input of the phi.
+ HInstruction* initial = instruction->AsPhi()->InputAt(0);
+ DCHECK(initial->GetBlock()->Dominates(loop_header));
+ SetRawEnvAt(i, initial);
+ initial->AddEnvUseAt(this, i);
+ } else {
+ instruction->AddEnvUseAt(this, i);
+ }
+ }
+}
+
void HEnvironment::RemoveAsUserOfInput(size_t index) const {
const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
@@ -675,6 +756,9 @@ void HPhi::AddInput(HInstruction* input) {
void HPhi::RemoveInputAt(size_t index) {
RemoveAsUserOfInput(index);
inputs_.DeleteAt(index);
+ for (size_t i = index, e = InputCount(); i < e; ++i) {
+ InputRecordAt(i).GetUseNode()->SetIndex(i);
+ }
}
#define DEFINE_ACCEPT(name, super) \
@@ -923,8 +1007,9 @@ void HBasicBlock::DisconnectAndDelete() {
HLoopInformation* loop_info = it.Current();
loop_info->Remove(this);
if (loop_info->IsBackEdge(*this)) {
- // This deliberately leaves the loop in an inconsistent state and will
- // fail SSAChecker unless the entire loop is removed during the pass.
+ // If this was the last back edge of the loop, we deliberately leave the
+ // loop in an inconsistent state and will fail SSAChecker unless the
+ // entire loop is removed during the pass.
loop_info->RemoveBackEdge(this);
}
}
@@ -1021,8 +1106,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) {
HLoopInformation* loop_info = it.Current();
loop_info->Remove(other);
if (loop_info->IsBackEdge(*other)) {
- loop_info->ClearBackEdges();
- loop_info->AddBackEdge(this);
+ loop_info->ReplaceBackEdge(other, this);
}
}
@@ -1253,11 +1337,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
loop_it.Current()->Add(to);
}
if (info->IsBackEdge(*at)) {
- // Only `at` can become a back edge, as the inlined blocks
- // are predecessors of `at`.
- DCHECK_EQ(1u, info->NumberOfBackEdges());
- info->ClearBackEdges();
- info->AddBackEdge(to);
+ // Only `to` can become a back edge, as the inlined blocks
+ // are predecessors of `to`.
+ info->ReplaceBackEdge(at, to);
}
}
}
@@ -1281,9 +1363,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue()));
} else if (current->IsLongConstant()) {
current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue()));
- } else if (current->IsFloatConstant() || current->IsDoubleConstant()) {
- // TODO: Don't duplicate floating-point constants.
- current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction());
+ } else if (current->IsFloatConstant()) {
+ current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue()));
+ } else if (current->IsDoubleConstant()) {
+ current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue()));
} else if (current->IsParameterValue()) {
if (kIsDebugBuild
&& invoke->IsInvokeStaticOrDirect()
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 938d6fcd64..ef60d7680b 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -48,6 +48,7 @@ class HPhi;
class HSuspendCheck;
class LiveInterval;
class LocationSummary;
+class SlowPathCode;
class SsaBuilder;
static const int kDefaultNumberOfBlocks = 8;
@@ -116,7 +117,12 @@ class HInstructionList {
// Control-flow graph of a method. Contains a list of basic blocks.
class HGraph : public ArenaObject<kArenaAllocMisc> {
public:
- HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0)
+ HGraph(ArenaAllocator* arena,
+ const DexFile& dex_file,
+ uint32_t method_idx,
+ InstructionSet instruction_set,
+ bool debuggable = false,
+ int start_instruction_id = 0)
: arena_(arena),
blocks_(arena, kDefaultNumberOfBlocks),
reverse_post_order_(arena, kDefaultNumberOfBlocks),
@@ -130,9 +136,14 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
has_bounds_checks_(false),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
+ dex_file_(dex_file),
+ method_idx_(method_idx),
+ instruction_set_(instruction_set),
cached_null_constant_(nullptr),
cached_int_constants_(std::less<int32_t>(), arena->Adapter()),
- cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {}
+ cached_float_constants_(std::less<int32_t>(), arena->Adapter()),
+ cached_long_constants_(std::less<int64_t>(), arena->Adapter()),
+ cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {}
ArenaAllocator* GetArena() const { return arena_; }
const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -241,8 +252,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
bool IsDebuggable() const { return debuggable_; }
// Returns a constant of the given type and value. If it does not exist
- // already, it is created and inserted into the graph. Only integral types
- // are currently supported.
+ // already, it is created and inserted into the graph. This method is only for
+ // integral types.
HConstant* GetConstant(Primitive::Type type, int64_t value);
HNullConstant* GetNullConstant();
HIntConstant* GetIntConstant(int32_t value) {
@@ -251,9 +262,23 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
HLongConstant* GetLongConstant(int64_t value) {
return CreateConstant(value, &cached_long_constants_);
}
+ HFloatConstant* GetFloatConstant(float value) {
+ return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_);
+ }
+ HDoubleConstant* GetDoubleConstant(double value) {
+ return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_);
+ }
HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
+ const DexFile& GetDexFile() const {
+ return dex_file_;
+ }
+
+ uint32_t GetMethodIdx() const {
+ return method_idx_;
+ }
+
private:
void VisitBlockForDominatorTree(HBasicBlock* block,
HBasicBlock* predecessor,
@@ -265,10 +290,34 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited);
- template <class InstType, typename ValueType>
- InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache);
+ template <class InstructionType, typename ValueType>
+ InstructionType* CreateConstant(ValueType value,
+ ArenaSafeMap<ValueType, InstructionType*>* cache) {
+ // Try to find an existing constant of the given value.
+ InstructionType* constant = nullptr;
+ auto cached_constant = cache->find(value);
+ if (cached_constant != cache->end()) {
+ constant = cached_constant->second;
+ }
+
+ // If not found or previously deleted, create and cache a new instruction.
+ if (constant == nullptr || constant->GetBlock() == nullptr) {
+ constant = new (arena_) InstructionType(value);
+ cache->Overwrite(value, constant);
+ InsertConstant(constant);
+ }
+ return constant;
+ }
+
void InsertConstant(HConstant* instruction);
+ // Cache a float constant into the graph. This method should only be
+ // called by the SsaBuilder when creating "equivalent" instructions.
+ void CacheFloatConstant(HFloatConstant* constant);
+
+ // See CacheFloatConstant comment.
+ void CacheDoubleConstant(HDoubleConstant* constant);
+
ArenaAllocator* const arena_;
// List of blocks in insertion order.
@@ -306,11 +355,22 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// The current id to assign to a newly added instruction. See HInstruction.id_.
int32_t current_instruction_id_;
- // Cached common constants often needed by optimization passes.
+ // The dex file from which the method is from.
+ const DexFile& dex_file_;
+
+ // The method index in the dex file.
+ const uint32_t method_idx_;
+
+ const InstructionSet instruction_set_;
+
+ // Cached constants.
HNullConstant* cached_null_constant_;
ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_;
+ ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_;
ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_;
+ ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_;
+ friend class SsaBuilder; // For caching constants.
friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
DISALLOW_COPY_AND_ASSIGN(HGraph);
@@ -362,14 +422,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> {
return back_edges_;
}
- void ClearBackEdges() {
- back_edges_.Reset();
+ // Returns the lifetime position of the back edge that has the
+ // greatest lifetime position.
+ size_t GetLifetimeEnd() const;
+
+ void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) {
+ for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) {
+ if (back_edges_.Get(i) == existing) {
+ back_edges_.Put(i, new_back_edge);
+ return;
+ }
+ }
+ UNREACHABLE();
}
- // Find blocks that are part of this loop. Returns whether the loop is a natural loop,
+ // Finds blocks that are part of this loop. Returns whether the loop is a natural loop,
// that is the header dominates the back edge.
bool Populate();
+ // Reanalyzes the loop by removing loop info from its blocks and re-running
+ // Populate(). If there are no back edges left, the loop info is completely
+ // removed as well as its SuspendCheck instruction. It must be run on nested
+ // inner loops first.
+ void Update();
+
// Returns whether this loop information contains `block`.
// Note that this loop information *must* be populated before entering this function.
bool Contains(const HBasicBlock& block) const;
@@ -526,6 +602,13 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
predecessors_.Put(1, temp);
}
+ void SwapSuccessors() {
+ DCHECK_EQ(successors_.Size(), 2u);
+ HBasicBlock* temp = successors_.Get(0);
+ successors_.Put(0, successors_.Get(1));
+ successors_.Put(1, temp);
+ }
+
size_t GetPredecessorIndexOf(HBasicBlock* predecessor) {
for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) {
if (predecessors_.Get(i) == predecessor) {
@@ -578,7 +661,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
void DisconnectAndDelete();
void AddInstruction(HInstruction* instruction);
+ // Insert `instruction` before/after an existing instruction `cursor`.
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
+ void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor);
// Replace instruction `initial` with `replacement` within this block.
void ReplaceAndRemoveInstructionWith(HInstruction* initial,
HInstruction* replacement);
@@ -589,9 +674,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
// instruction is not in use and removes it from the use lists of its inputs.
void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
void RemovePhi(HPhi* phi, bool ensure_safety = true);
+ void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true);
bool IsLoopHeader() const {
- return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
+ return IsInLoop() && (loop_information_->GetHeader() == this);
}
bool IsLoopPreHeaderFirstPredecessor() const {
@@ -610,7 +696,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
void SetInLoop(HLoopInformation* info) {
if (IsLoopHeader()) {
// Nothing to do. This just means `info` is an outer loop.
- } else if (loop_information_ == nullptr) {
+ } else if (!IsInLoop()) {
loop_information_ = info;
} else if (loop_information_->Contains(*info->GetHeader())) {
// Block is currently part of an outer loop. Make it part of this inner loop.
@@ -631,7 +717,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
bool IsInLoop() const { return loop_information_ != nullptr; }
- // Returns wheter this block dominates the blocked passed as parameter.
+ // Returns whether this block dominates the blocked passed as parameter.
bool Dominates(HBasicBlock* block) const;
size_t GetLifetimeStart() const { return lifetime_start_; }
@@ -682,7 +768,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
void Advance() {
DCHECK(!Done());
- current_ = current_->GetHeader()->GetDominator()->GetLoopInformation();
+ current_ = current_->GetPreHeader()->GetLoopInformation();
}
HLoopInformation* Current() const {
@@ -795,13 +881,14 @@ class HUseListNode : public ArenaObject<kArenaAllocMisc> {
HUseListNode* GetNext() const { return next_; }
T GetUser() const { return user_; }
size_t GetIndex() const { return index_; }
+ void SetIndex(size_t index) { index_ = index; }
private:
HUseListNode(T user, size_t index)
: user_(user), index_(index), prev_(nullptr), next_(nullptr) {}
T const user_;
- const size_t index_;
+ size_t index_;
HUseListNode<T>* prev_;
HUseListNode<T>* next_;
@@ -872,6 +959,14 @@ class HUseList : public ValueObject {
return first_ != nullptr && first_->next_ == nullptr;
}
+ size_t SizeSlow() const {
+ size_t count = 0;
+ for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+ ++count;
+ }
+ return count;
+ }
+
private:
HUseListNode<T>* first_;
};
@@ -998,15 +1093,47 @@ class SideEffects : public ValueObject {
// A HEnvironment object contains the values of virtual registers at a given location.
class HEnvironment : public ArenaObject<kArenaAllocMisc> {
public:
- HEnvironment(ArenaAllocator* arena, size_t number_of_vregs)
- : vregs_(arena, number_of_vregs) {
+ HEnvironment(ArenaAllocator* arena,
+ size_t number_of_vregs,
+ const DexFile& dex_file,
+ uint32_t method_idx,
+ uint32_t dex_pc)
+ : vregs_(arena, number_of_vregs),
+ locations_(arena, number_of_vregs),
+ parent_(nullptr),
+ dex_file_(dex_file),
+ method_idx_(method_idx),
+ dex_pc_(dex_pc) {
vregs_.SetSize(number_of_vregs);
for (size_t i = 0; i < number_of_vregs; i++) {
vregs_.Put(i, HUserRecord<HEnvironment*>());
}
+
+ locations_.SetSize(number_of_vregs);
+ for (size_t i = 0; i < number_of_vregs; ++i) {
+ locations_.Put(i, Location());
+ }
+ }
+
+ void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
+ parent_ = new (allocator) HEnvironment(allocator,
+ parent->Size(),
+ parent->GetDexFile(),
+ parent->GetMethodIdx(),
+ parent->GetDexPc());
+ if (parent->GetParent() != nullptr) {
+ parent_->SetAndCopyParentChain(allocator, parent->GetParent());
+ }
+ parent_->CopyFrom(parent);
}
- void CopyFrom(HEnvironment* env);
+ void CopyFrom(const GrowableArray<HInstruction*>& locals);
+ void CopyFrom(HEnvironment* environment);
+
+ // Copy from `env`. If it's a loop phi for `loop_header`, copy the first
+ // input to the loop phi instead. This is for inserting instructions that
+ // require an environment (like HDeoptimization) in the loop pre-header.
+ void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header);
void SetRawEnvAt(size_t index, HInstruction* instruction) {
vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
@@ -1020,6 +1147,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
size_t Size() const { return vregs_.Size(); }
+ HEnvironment* GetParent() const { return parent_; }
+
+ void SetLocationAt(size_t index, Location location) {
+ locations_.Put(index, location);
+ }
+
+ Location GetLocationAt(size_t index) const {
+ return locations_.Get(index);
+ }
+
+ uint32_t GetDexPc() const {
+ return dex_pc_;
+ }
+
+ uint32_t GetMethodIdx() const {
+ return method_idx_;
+ }
+
+ const DexFile& GetDexFile() const {
+ return dex_file_;
+ }
+
private:
// Record instructions' use entries of this environment for constant-time removal.
// It should only be called by HInstruction when a new environment use is added.
@@ -1030,8 +1179,13 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
}
GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+ GrowableArray<Location> locations_;
+ HEnvironment* parent_;
+ const DexFile& dex_file_;
+ const uint32_t method_idx_;
+ const uint32_t dex_pc_;
- friend HInstruction;
+ friend class HInstruction;
DISALLOW_COPY_AND_ASSIGN(HEnvironment);
};
@@ -1161,6 +1315,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
}
virtual bool NeedsEnvironment() const { return false; }
+ virtual uint32_t GetDexPc() const {
+ LOG(FATAL) << "GetDexPc() cannot be called on an instruction that"
+ " does not need an environment";
+ UNREACHABLE();
+ }
virtual bool IsControlFlow() const { return false; }
virtual bool CanThrow() const { return false; }
bool HasSideEffects() const { return side_effects_.HasSideEffects(); }
@@ -1238,8 +1397,31 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
// copying, the uses lists are being updated.
void CopyEnvironmentFrom(HEnvironment* environment) {
ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
- environment_ = new (allocator) HEnvironment(allocator, environment->Size());
+ environment_ = new (allocator) HEnvironment(
+ allocator,
+ environment->Size(),
+ environment->GetDexFile(),
+ environment->GetMethodIdx(),
+ environment->GetDexPc());
environment_->CopyFrom(environment);
+ if (environment->GetParent() != nullptr) {
+ environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+ }
+ }
+
+ void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment,
+ HBasicBlock* block) {
+ ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena();
+ environment_ = new (allocator) HEnvironment(
+ allocator,
+ environment->Size(),
+ environment->GetDexFile(),
+ environment->GetMethodIdx(),
+ environment->GetDexPc());
+ if (environment->GetParent() != nullptr) {
+ environment_->SetAndCopyParentChain(allocator, environment->GetParent());
+ }
+ environment_->CopyFromWithLoopPhiAdjustment(environment, block);
}
// Returns the number of entries in the environment. Typically, that is the
@@ -1615,7 +1797,7 @@ class HDeoptimize : public HTemplateInstruction<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Deoptimize);
@@ -2034,13 +2216,14 @@ class HFloatConstant : public HConstant {
private:
explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {}
+ explicit HFloatConstant(int32_t value)
+ : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {}
const float value_;
- // Only the SsaBuilder can currently create floating-point constants. If we
- // ever need to create them later in the pipeline, we will have to handle them
- // the same way as integral constants.
+ // Only the SsaBuilder and HGraph can create floating-point constants.
friend class SsaBuilder;
+ friend class HGraph;
DISALLOW_COPY_AND_ASSIGN(HFloatConstant);
};
@@ -2071,13 +2254,14 @@ class HDoubleConstant : public HConstant {
private:
explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {}
+ explicit HDoubleConstant(int64_t value)
+ : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {}
const double value_;
- // Only the SsaBuilder can currently create floating-point constants. If we
- // ever need to create them later in the pipeline, we will have to handle them
- // the same way as integral constants.
+ // Only the SsaBuilder and HGraph can create floating-point constants.
friend class SsaBuilder;
+ friend class HGraph;
DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
};
@@ -2174,9 +2358,15 @@ class HInvoke : public HInstruction {
SetRawInputAt(index, argument);
}
+ // Return the number of arguments. This number can be lower than
+ // the number of inputs returned by InputCount(), as some invoke
+ // instructions (e.g. HInvokeStaticOrDirect) can have non-argument
+ // inputs at the end of their list of inputs.
+ uint32_t GetNumberOfArguments() const { return number_of_arguments_; }
+
Primitive::Type GetType() const OVERRIDE { return return_type_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
@@ -2193,16 +2383,19 @@ class HInvoke : public HInstruction {
protected:
HInvoke(ArenaAllocator* arena,
uint32_t number_of_arguments,
+ uint32_t number_of_other_inputs,
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t dex_method_index)
: HInstruction(SideEffects::All()),
+ number_of_arguments_(number_of_arguments),
inputs_(arena, number_of_arguments),
return_type_(return_type),
dex_pc_(dex_pc),
dex_method_index_(dex_method_index),
intrinsic_(Intrinsics::kNone) {
- inputs_.SetSize(number_of_arguments);
+ uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs;
+ inputs_.SetSize(number_of_inputs);
}
const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
@@ -2210,6 +2403,7 @@ class HInvoke : public HInstruction {
inputs_.Put(index, input);
}
+ uint32_t number_of_arguments_;
GrowableArray<HUserRecord<HInstruction*> > inputs_;
const Primitive::Type return_type_;
const uint32_t dex_pc_;
@@ -2236,14 +2430,21 @@ class HInvokeStaticOrDirect : public HInvoke {
uint32_t dex_pc,
uint32_t dex_method_index,
bool is_recursive,
+ int32_t string_init_offset,
InvokeType original_invoke_type,
InvokeType invoke_type,
ClinitCheckRequirement clinit_check_requirement)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+ : HInvoke(arena,
+ number_of_arguments,
+ clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u,
+ return_type,
+ dex_pc,
+ dex_method_index),
original_invoke_type_(original_invoke_type),
invoke_type_(invoke_type),
is_recursive_(is_recursive),
- clinit_check_requirement_(clinit_check_requirement) {}
+ clinit_check_requirement_(clinit_check_requirement),
+ string_init_offset_(string_init_offset) {}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
UNUSED(obj);
@@ -2256,21 +2457,24 @@ class HInvokeStaticOrDirect : public HInvoke {
InvokeType GetInvokeType() const { return invoke_type_; }
bool IsRecursive() const { return is_recursive_; }
bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); }
+ bool IsStringInit() const { return string_init_offset_ != 0; }
+ int32_t GetStringInitOffset() const { return string_init_offset_; }
// Is this instruction a call to a static method?
bool IsStatic() const {
return GetInvokeType() == kStatic;
}
- // Remove the art::HClinitCheck or art::HLoadClass instruction as
- // last input (only relevant for static calls with explicit clinit
- // check).
- void RemoveClinitCheckOrLoadClassAsLastInput() {
+ // Remove the art::HLoadClass instruction set as last input by
+ // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
+ // the initial art::HClinitCheck instruction (only relevant for
+ // static calls with explicit clinit check).
+ void RemoveLoadClassAsLastInput() {
DCHECK(IsStaticWithExplicitClinitCheck());
size_t last_input_index = InputCount() - 1;
HInstruction* last_input = InputAt(last_input_index);
DCHECK(last_input != nullptr);
- DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName();
+ DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
RemoveAsUserOfInput(last_input_index);
inputs_.DeleteAt(last_input_index);
clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
@@ -2311,6 +2515,9 @@ class HInvokeStaticOrDirect : public HInvoke {
const InvokeType invoke_type_;
const bool is_recursive_;
ClinitCheckRequirement clinit_check_requirement_;
+ // Thread entrypoint offset for string init method if this is a string init invoke.
+ // Note that there are multiple string init methods, each having its own offset.
+ int32_t string_init_offset_;
DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
};
@@ -2323,7 +2530,7 @@ class HInvokeVirtual : public HInvoke {
uint32_t dex_pc,
uint32_t dex_method_index,
uint32_t vtable_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+ : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
vtable_index_(vtable_index) {}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -2349,7 +2556,7 @@ class HInvokeInterface : public HInvoke {
uint32_t dex_pc,
uint32_t dex_method_index,
uint32_t imt_index)
- : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
+ : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index),
imt_index_(imt_index) {}
bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
@@ -2376,7 +2583,7 @@ class HNewInstance : public HExpression<0> {
type_index_(type_index),
entrypoint_(entrypoint) {}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
// Calls runtime so needs an environment.
@@ -2428,7 +2635,7 @@ class HNewArray : public HExpression<1> {
SetRawInputAt(0, length);
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
// Calls runtime so needs an environment.
@@ -2523,7 +2730,7 @@ class HDiv : public HBinaryOperation {
return (y == -1) ? -x : x / y;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Div);
@@ -2550,7 +2757,7 @@ class HRem : public HBinaryOperation {
return (y == -1) ? 0 : x % y;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Rem);
@@ -2577,7 +2784,7 @@ class HDivZeroCheck : public HExpression<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(DivZeroCheck);
@@ -2772,7 +2979,7 @@ class HTypeConversion : public HExpression<1> {
// Required by the x86 and ARM code generators when producing calls
// to the runtime.
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; }
@@ -2882,7 +3089,7 @@ class HNullCheck : public HExpression<1> {
bool CanBeNull() const OVERRIDE { return false; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(NullCheck);
@@ -3045,7 +3252,7 @@ class HArraySet : public HTemplateInstruction<3> {
bool NeedsTypeCheck() const { return needs_type_check_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
HInstruction* GetArray() const { return InputAt(0); }
HInstruction* GetIndex() const { return InputAt(1); }
@@ -3115,7 +3322,7 @@ class HBoundsCheck : public HExpression<2> {
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(BoundsCheck);
@@ -3155,19 +3362,25 @@ class HTemporary : public HTemplateInstruction<0> {
class HSuspendCheck : public HTemplateInstruction<0> {
public:
explicit HSuspendCheck(uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {}
+ : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {}
bool NeedsEnvironment() const OVERRIDE {
return true;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
+ void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; }
+ SlowPathCode* GetSlowPath() const { return slow_path_; }
DECLARE_INSTRUCTION(SuspendCheck);
private:
const uint32_t dex_pc_;
+ // Only used for code generation, in order to share the same slow path between back edges
+ // of a same loop.
+ SlowPathCode* slow_path_;
+
DISALLOW_COPY_AND_ASSIGN(HSuspendCheck);
};
@@ -3194,7 +3407,7 @@ class HLoadClass : public HExpression<0> {
size_t ComputeHashCode() const OVERRIDE { return type_index_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint16_t GetTypeIndex() const { return type_index_; }
bool IsReferrersClass() const { return is_referrers_class_; }
@@ -3268,7 +3481,7 @@ class HLoadString : public HExpression<0> {
size_t ComputeHashCode() const OVERRIDE { return string_index_; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
uint32_t GetStringIndex() const { return string_index_; }
// TODO: Can we deopt or debug when we resolve a string?
@@ -3306,7 +3519,7 @@ class HClinitCheck : public HExpression<1> {
return true;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
@@ -3406,7 +3619,7 @@ class HThrow : public HTemplateInstruction<1> {
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
DECLARE_INSTRUCTION(Throw);
@@ -3440,7 +3653,7 @@ class HInstanceOf : public HExpression<2> {
return false;
}
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsClassFinal() const { return class_is_final_; }
@@ -3515,7 +3728,7 @@ class HCheckCast : public HTemplateInstruction<2> {
bool MustDoNullCheck() const { return must_do_null_check_; }
void ClearMustDoNullCheck() { must_do_null_check_ = false; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsClassFinal() const { return class_is_final_; }
@@ -3561,7 +3774,7 @@ class HMonitorOperation : public HTemplateInstruction<1> {
bool NeedsEnvironment() const OVERRIDE { return true; }
bool CanThrow() const OVERRIDE { return true; }
- uint32_t GetDexPc() const { return dex_pc_; }
+ uint32_t GetDexPc() const OVERRIDE { return dex_pc_; }
bool IsEnter() const { return kind_ == kEnter; }
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 4e83ce576c..2736453ccc 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -16,6 +16,7 @@
#include "base/arena_allocator.h"
#include "nodes.h"
+#include "optimizing_unit_test.h"
#include "gtest/gtest.h"
@@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) {
first_block->AddSuccessor(exit_block);
exit_block->AddInstruction(new (&allocator) HExit());
- HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1);
+ HEnvironment* environment = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
null_check->SetRawEnvironment(environment);
environment->SetRawEnvAt(0, parameter);
parameter->AddEnvUseAt(null_check->GetEnvironment(), 0);
@@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -96,7 +98,7 @@ TEST(Node, AddInstruction) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -112,4 +114,51 @@ TEST(Node, AddInstruction) {
ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse());
}
+TEST(Node, ParentEnvironment) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->SetEntryBlock(entry);
+ HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot);
+ HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0);
+ entry->AddInstruction(parameter1);
+ entry->AddInstruction(with_environment);
+ entry->AddInstruction(new (&allocator) HExit());
+
+ ASSERT_TRUE(parameter1->HasUses());
+ ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse());
+
+ HEnvironment* environment = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ GrowableArray<HInstruction*> array(&allocator, 1);
+ array.Add(parameter1);
+
+ environment->CopyFrom(array);
+ with_environment->SetRawEnvironment(environment);
+
+ ASSERT_TRUE(parameter1->HasEnvironmentUses());
+ ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse());
+
+ HEnvironment* parent1 = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ parent1->CopyFrom(array);
+
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u);
+
+ HEnvironment* parent2 = new (&allocator) HEnvironment(
+ &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0);
+ parent2->CopyFrom(array);
+ parent1->SetAndCopyParentChain(&allocator, parent2);
+
+ // One use for parent2, and one other use for the new parent of parent1.
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u);
+
+ // We have copied the parent chain. So we now have two more uses.
+ environment->SetAndCopyParentChain(&allocator, parent1);
+ ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u);
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index b2c13adf35..b0d1433667 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -21,6 +21,7 @@
#include "cfi_test.h"
#include "gtest/gtest.h"
#include "optimizing/code_generator.h"
+#include "optimizing/optimizing_unit_test.h"
#include "utils/assembler.h"
#include "optimizing/optimizing_cfi_test_expected.inc"
@@ -30,7 +31,7 @@ namespace art {
// Run the tests only on host.
#ifndef HAVE_ANDROID_OS
-class OptimizingCFITest : public CFITest {
+class OptimizingCFITest : public CFITest {
public:
// Enable this flag to generate the expected outputs.
static constexpr bool kGenerateExpected = false;
@@ -45,10 +46,10 @@ class OptimizingCFITest : public CFITest {
std::unique_ptr<const InstructionSetFeatures> isa_features;
std::string error;
isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
- HGraph graph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
// Generate simple frame with some spills.
std::unique_ptr<CodeGenerator> code_gen(
- CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+ CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
const int frame_size = 64;
int core_reg = 0;
int fp_reg = 0;
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2125f6eb01..9ccc0113f6 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -32,7 +32,7 @@ static constexpr uint8_t expected_cfi_kThumb2[] = {
// 0x00000012: .cfi_def_cfa_offset: 64
static constexpr uint8_t expected_asm_kArm64[] = {
- 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+ 0xE0, 0x0F, 0x1C, 0xF8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
};
@@ -41,7 +41,7 @@ static constexpr uint8_t expected_cfi_kArm64[] = {
0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: str w0, [sp, #-64]!
+// 0x00000000: str x0, [sp, #-64]!
// 0x00000004: .cfi_def_cfa_offset: 64
// 0x00000004: stp x19, x20, [sp, #40]
// 0x00000008: .cfi_offset: r19 at cfa-24
@@ -99,13 +99,13 @@ static constexpr uint8_t expected_cfi_kX86[] = {
static constexpr uint8_t expected_asm_kX86_64[] = {
0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
- 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
- 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
- 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+ 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x48, 0x89, 0x3C, 0x24,
+ 0xF2, 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C,
+ 0x24, 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
};
static constexpr uint8_t expected_cfi_kX86_64[] = {
0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
- 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+ 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x44, 0x0A, 0x47, 0xDD, 0x47,
0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
0x41, 0x0B, 0x0E, 0x40,
};
@@ -121,21 +121,20 @@ static constexpr uint8_t expected_cfi_kX86_64[] = {
// 0x0000000d: .cfi_offset: r30 at cfa-32
// 0x0000000d: movsd [rsp + 24], xmm12
// 0x00000014: .cfi_offset: r29 at cfa-40
-// 0x00000014: mov [rsp], edi
-// 0x00000017: .cfi_remember_state
-// 0x00000017: movsd xmm12, [rsp + 24]
-// 0x0000001e: .cfi_restore: r29
-// 0x0000001e: movsd xmm13, [rsp + 32]
-// 0x00000025: .cfi_restore: r30
-// 0x00000025: addq rsp, 40
-// 0x00000029: .cfi_def_cfa_offset: 24
-// 0x00000029: pop rbx
-// 0x0000002a: .cfi_def_cfa_offset: 16
-// 0x0000002a: .cfi_restore: r3
-// 0x0000002a: pop rbp
-// 0x0000002b: .cfi_def_cfa_offset: 8
-// 0x0000002b: .cfi_restore: r6
-// 0x0000002b: ret
-// 0x0000002c: .cfi_restore_state
-// 0x0000002c: .cfi_def_cfa_offset: 64
-
+// 0x00000014: movq [rsp], rdi
+// 0x00000018: .cfi_remember_state
+// 0x00000018: movsd xmm12, [rsp + 24]
+// 0x0000001f: .cfi_restore: r29
+// 0x0000001f: movsd xmm13, [rsp + 32]
+// 0x00000026: .cfi_restore: r30
+// 0x00000026: addq rsp, 40
+// 0x0000002a: .cfi_def_cfa_offset: 24
+// 0x0000002a: pop rbx
+// 0x0000002b: .cfi_def_cfa_offset: 16
+// 0x0000002b: .cfi_restore: r3
+// 0x0000002b: pop rbp
+// 0x0000002c: .cfi_def_cfa_offset: 8
+// 0x0000002c: .cfi_restore: r6
+// 0x0000002c: ret
+// 0x0000002d: .cfi_restore_state
+// 0x0000002d: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 05451bcaa6..c7b2c67019 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -19,6 +19,7 @@
#include <fstream>
#include <stdint.h>
+#include "art_method-inl.h"
#include "base/arena_allocator.h"
#include "base/dumpable.h"
#include "base/timing_logger.h"
@@ -44,7 +45,6 @@
#include "intrinsics.h"
#include "licm.h"
#include "jni/quick/jni_compiler.h"
-#include "mirror/art_method-inl.h"
#include "nodes.h"
#include "prepare_for_register_allocation.h"
#include "reference_type_propagation.h"
@@ -196,7 +196,7 @@ class OptimizingCompiler FINAL : public Compiler {
return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file);
}
- uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE
+ uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
@@ -320,8 +320,10 @@ static void RunOptimizations(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer,
StackHandleScopeCollection* handles) {
- HDeadCodeElimination dce1(graph, stats);
- HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final");
+ HDeadCodeElimination dce1(graph, stats,
+ HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
+ HDeadCodeElimination dce2(graph, stats,
+ HDeadCodeElimination::kFinalDeadCodeEliminationPassName);
HConstantFolding fold1(graph);
InstructionSimplifier simplify1(graph, stats);
HBooleanSimplifier boolean_simplify(graph);
@@ -512,7 +514,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
ArenaAllocator arena(Runtime::Current()->GetArenaPool());
HGraph* graph = new (&arena) HGraph(
- &arena, compiler_driver->GetCompilerOptions().GetDebuggable());
+ &arena, dex_file, method_idx, compiler_driver->GetInstructionSet(),
+ compiler_driver->GetCompilerOptions().GetDebuggable());
// For testing purposes, we put a special marker on method names that should be compiled
// with this compiler. This makes sure we're not regressing.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 65c84e6942..b6b1bb1cad 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -29,25 +29,26 @@ enum MethodCompilationStat {
kCompiledBaseline,
kCompiledOptimized,
kCompiledQuick,
- kInstructionSimplifications,
kInlinedInvoke,
- kNotCompiledUnsupportedIsa,
- kNotCompiledPathological,
+ kInstructionSimplifications,
+ kNotCompiledBranchOutsideMethodCode,
+ kNotCompiledCannotBuildSSA,
+ kNotCompiledCantAccesType,
+ kNotCompiledClassNotVerified,
kNotCompiledHugeMethod,
kNotCompiledLargeMethodNoBranches,
- kNotCompiledCannotBuildSSA,
kNotCompiledNoCodegen,
- kNotCompiledUnresolvedMethod,
- kNotCompiledUnresolvedField,
kNotCompiledNonSequentialRegPair,
+ kNotCompiledPathological,
kNotCompiledSpaceFilter,
- kNotOptimizedTryCatch,
- kNotOptimizedDisabled,
- kNotCompiledCantAccesType,
- kNotOptimizedRegisterAllocator,
kNotCompiledUnhandledInstruction,
+ kNotCompiledUnresolvedField,
+ kNotCompiledUnresolvedMethod,
+ kNotCompiledUnsupportedIsa,
kNotCompiledVerifyAtRuntime,
- kNotCompiledClassNotVerified,
+ kNotOptimizedDisabled,
+ kNotOptimizedRegisterAllocator,
+ kNotOptimizedTryCatch,
kRemovedCheckedCast,
kRemovedDeadInstruction,
kRemovedNullCheck,
@@ -98,23 +99,24 @@ class OptimizingCompilerStats {
case kCompiledQuick : return "kCompiledQuick";
case kInlinedInvoke : return "kInlinedInvoke";
case kInstructionSimplifications: return "kInstructionSimplifications";
- case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
- case kNotCompiledPathological : return "kNotCompiledPathological";
+ case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
+ case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
+ case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+ case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
- case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
- case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
- case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair";
- case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
- case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
- case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
+ case kNotCompiledPathological : return "kNotCompiledPathological";
case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
- case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+ case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField";
+ case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod";
+ case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
- case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
+ case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
+ case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
+ case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
case kRemovedCheckedCast: return "kRemovedCheckedCast";
case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
case kRemovedNullCheck: return "kRemovedNullCheck";
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6b236927da..3ef96faab3 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -72,11 +72,17 @@ void RemoveSuspendChecks(HGraph* graph) {
}
}
+inline HGraph* CreateGraph(ArenaAllocator* allocator) {
+ return new (allocator) HGraph(
+ allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1, kRuntimeISA,
+ false);
+}
+
// Create a control-flow graph from Dex instructions.
inline HGraph* CreateCFG(ArenaAllocator* allocator,
const uint16_t* data,
Primitive::Type return_type = Primitive::kPrimInt) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph, return_type);
const DexFile::CodeItem* item =
reinterpret_cast<const DexFile::CodeItem*>(data);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index e89417df7d..9ede91013e 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -20,6 +20,7 @@
#include "base/value_object.h"
#include "utils/growable_array.h"
#include "locations.h"
+#include "primitive.h"
namespace art {
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index fa6b3c292c..78d11857c3 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -91,7 +91,7 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire
// previously) by the graph builder during the creation of the
// static invoke instruction, but is no longer required at this
// stage (i.e., after inlining has been performed).
- invoke->RemoveClinitCheckOrLoadClassAsLastInput();
+ invoke->RemoveLoadClassAsLastInput();
// If the load class instruction is no longer used, remove it from
// the graph.
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 293fde978e..c56100dfa1 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -30,7 +30,7 @@ namespace art {
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 12b1c2b9bd..e93e06118c 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,7 +16,7 @@
#include "reference_type_propagation.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
#include "mirror/class-inl.h"
#include "mirror/dex_cache.h"
#include "scoped_thread_state_change.h"
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 0fdf051957..5f439c86d9 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -71,7 +71,9 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters());
// Always reserve for the current method and the graph's max out registers.
// TODO: compute it instead.
- reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
+ // ArtMethod* takes 2 vregs for 64 bits.
+ reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize +
+ codegen->GetGraph()->GetMaximumNumberOfOutVRegs();
}
bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED,
@@ -768,7 +770,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) {
}
} else {
DCHECK(!current->IsHighInterval());
- int hint = current->FindFirstRegisterHint(free_until);
+ int hint = current->FindFirstRegisterHint(free_until, liveness_);
if (hint != kNoRegister) {
DCHECK(!IsBlocked(hint));
reg = hint;
@@ -1101,8 +1103,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter
}
LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) {
- HBasicBlock* block_from = liveness_.GetBlockFromPosition(from);
- HBasicBlock* block_to = liveness_.GetBlockFromPosition(to);
+ HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2);
+ HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2);
DCHECK(block_from != nullptr);
DCHECK(block_to != nullptr);
@@ -1111,6 +1113,41 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro
return Split(interval, to);
}
+ /*
+ * Non-linear control flow will force moves at every branch instruction to the new location.
+ * To avoid having all branches doing the moves, we find the next non-linear position and
+ * split the interval at this position. Take the following example (block number is the linear
+ * order position):
+ *
+ * B1
+ * / \
+ * B2 B3
+ * \ /
+ * B4
+ *
+ * B2 needs to split an interval, whose next use is in B4. If we were to split at the
+ * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval
+ * is now in the correct location. It makes performance worst if the interval is spilled
+ * and both B2 and B3 need to reload it before entering B4.
+ *
+ * By splitting at B3, we give a chance to the register allocator to allocate the
+ * interval to the same register as in B1, and therefore avoid doing any
+ * moves in B3.
+ */
+ if (block_from->GetDominator() != nullptr) {
+ const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks();
+ for (size_t i = 0; i < dominated.Size(); ++i) {
+ size_t position = dominated.Get(i)->GetLifetimeStart();
+ if ((position > from) && (block_to->GetLifetimeStart() > position)) {
+ // Even if we found a better block, we continue iterating in case
+ // a dominated block is closer.
+ // Note that dominated blocks are not sorted in liveness order.
+ block_to = dominated.Get(i);
+ DCHECK_NE(block_to, block_from);
+ }
+ }
+ }
+
// If `to` is in a loop, find the outermost loop header which does not contain `from`.
for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) {
HBasicBlock* header = it.Current()->GetHeader();
@@ -1455,6 +1492,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
: Location::StackSlot(interval->GetParent()->GetSpillSlot()));
}
UsePosition* use = current->GetFirstUse();
+ UsePosition* env_use = current->GetFirstEnvironmentUse();
// Walk over all siblings, updating locations of use positions, and
// connecting them when they are adjacent.
@@ -1467,15 +1505,14 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
LiveRange* range = current->GetFirstRange();
while (range != nullptr) {
while (use != nullptr && use->GetPosition() < range->GetStart()) {
- DCHECK(use->GetIsEnvironment());
+ DCHECK(use->IsSynthesized());
use = use->GetNext();
}
while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
+ DCHECK(!use->GetIsEnvironment());
DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
- LocationSummary* locations = use->GetUser()->GetLocations();
- if (use->GetIsEnvironment()) {
- locations->SetEnvironmentAt(use->GetInputIndex(), source);
- } else {
+ if (!use->IsSynthesized()) {
+ LocationSummary* locations = use->GetUser()->GetLocations();
Location expected_location = locations->InAt(use->GetInputIndex());
// The expected (actual) location may be invalid in case the input is unused. Currently
// this only happens for intrinsics.
@@ -1492,6 +1529,20 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
}
use = use->GetNext();
}
+
+ // Walk over the environment uses, and update their locations.
+ while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
+ env_use = env_use->GetNext();
+ }
+
+ while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
+ DCHECK(current->CoversSlow(env_use->GetPosition())
+ || (env_use->GetPosition() == range->GetEnd()));
+ HEnvironment* environment = env_use->GetUser()->GetEnvironment();
+ environment->SetLocationAt(env_use->GetInputIndex(), source);
+ env_use = env_use->GetNext();
+ }
+
range = range->GetNext();
}
@@ -1554,10 +1605,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) {
} while (current != nullptr);
if (kIsDebugBuild) {
- // Following uses can only be environment uses. The location for
- // these environments will be none.
+ // Following uses can only be synthesized uses.
while (use != nullptr) {
- DCHECK(use->GetIsEnvironment());
+ DCHECK(use->IsSynthesized());
use = use->GetNext();
}
}
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index dc9c708eea..97bd777e1c 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_
+#include "arch/instruction_set.h"
#include "base/macros.h"
#include "primitive.h"
#include "utils/growable_array.h"
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 8c6d904a4c..b72ffb8bf7 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -38,7 +38,7 @@ namespace art {
static bool Check(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) {
TEST(RegisterAllocatorTest, ValidateIntervals) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
std::unique_ptr<const X86InstructionSetFeatures> features_x86(
X86InstructionSetFeatures::FromCppDefines());
x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
@@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) {
}
static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
builder.BuildGraph(*item);
@@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator,
HPhi** phi,
HInstruction** input1,
HInstruction** input2) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) {
static HGraph* BuildFieldReturn(ArenaAllocator* allocator,
HInstruction** field,
HInstruction** ret) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) {
static HGraph* BuildTwoSubs(ArenaAllocator* allocator,
HInstruction** first_sub,
HInstruction** second_sub) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) {
static HGraph* BuildDiv(ArenaAllocator* allocator,
HInstruction** div) {
- HGraph* graph = new (allocator) HGraph(allocator);
+ HGraph* graph = CreateGraph(allocator);
HBasicBlock* entry = new (allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
@@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) {
// Create a synthesized graph to please the register_allocator and
// ssa_liveness_analysis code.
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
graph->AddBlock(entry);
graph->SetEntryBlock(entry);
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 7a252af2ad..59a2852735 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() {
}
HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) {
- return GetLocalsFor(block)->GetInstructionAt(local);
+ return GetLocalsFor(block)->Get(local);
}
void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
@@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
HPhi* phi = new (GetGraph()->GetArena()) HPhi(
GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
block->AddPhi(phi);
- current_locals_->SetRawEnvAt(local, phi);
+ current_locals_->Put(local, phi);
}
}
// Save the loop header so that the last phase of the analysis knows which
@@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
block->AddPhi(phi);
value = phi;
}
- current_locals_->SetRawEnvAt(local, value);
+ current_locals_->Put(local, value);
}
}
@@ -417,6 +417,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
ArenaAllocator* allocator = graph->GetArena();
result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue()));
constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+ graph->CacheFloatConstant(result);
} else {
// If there is already a constant with the expected type, we know it is
// the floating point equivalent of this constant.
@@ -439,6 +440,7 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) {
ArenaAllocator* allocator = graph->GetArena();
result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue()));
constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext());
+ graph->CacheDoubleConstant(result);
} else {
// If there is already a constant with the expected type, we know it is
// the floating point equivalent of this constant.
@@ -518,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
}
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
- HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
+ HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber());
// If the operation requests a specific type, we make sure its input is of that type.
if (load->GetType() != value->GetType()) {
if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
@@ -532,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
}
void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
- current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1));
+ current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1));
store->GetBlock()->RemoveInstruction(store);
}
@@ -541,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
return;
}
HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment(
- GetGraph()->GetArena(), current_locals_->Size());
- environment->CopyFrom(current_locals_);
+ GetGraph()->GetArena(),
+ current_locals_->Size(),
+ GetGraph()->GetDexFile(),
+ GetGraph()->GetMethodIdx(),
+ instruction->GetDexPc());
+ environment->CopyFrom(*current_locals_);
instruction->SetRawEnvironment(environment);
}
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 265e95b4ac..1c83c4ba48 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor {
void BuildSsa();
- HEnvironment* GetLocalsFor(HBasicBlock* block) {
- HEnvironment* env = locals_for_.Get(block->GetBlockId());
- if (env == nullptr) {
- env = new (GetGraph()->GetArena()) HEnvironment(
+ GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) {
+ GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId());
+ if (locals == nullptr) {
+ locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>(
GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs());
- locals_for_.Put(block->GetBlockId(), env);
+ locals->SetSize(GetGraph()->GetNumberOfVRegs());
+ locals_for_.Put(block->GetBlockId(), locals);
}
- return env;
+ return locals;
}
HInstruction* ValueOfLocal(HBasicBlock* block, size_t local);
@@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor {
static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type);
// Locals for the current block being visited.
- HEnvironment* current_locals_;
+ GrowableArray<HInstruction*>* current_locals_;
// Keep track of loop headers found. The last phase of the analysis iterates
// over these blocks to set the inputs of their phis.
GrowableArray<HBasicBlock*> loop_headers_;
// HEnvironment for each block.
- GrowableArray<HEnvironment*> locals_for_;
+ GrowableArray<GrowableArray<HInstruction*>*> locals_for_;
DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
};
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index ea0e7c3712..250eb04a1c 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() {
HBasicBlock* block = it.Current();
size_t number_of_forward_predecessors = block->GetPredecessors().Size();
if (block->IsLoopHeader()) {
- // We rely on having simplified the CFG.
- DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges());
- number_of_forward_predecessors--;
+ number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
}
forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors);
}
@@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
// Process the environment first, because we know their uses come after
// or at the same liveness position of inputs.
- if (current->HasEnvironment()) {
+ for (HEnvironment* environment = current->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
// Handle environment uses. See statements (b) and (c) of the
// SsaLivenessAnalysis.
- HEnvironment* environment = current->GetEnvironment();
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* instruction = environment->GetInstructionAt(i);
bool should_be_live = ShouldBeLiveForEnvironment(instruction);
@@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (instruction != nullptr) {
instruction->GetLiveInterval()->AddUse(
- current, i, /* is_environment */ true, should_be_live);
+ current, environment, i, should_be_live);
}
}
}
@@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
// to be materialized.
if (input->HasSsaIndex()) {
live_in->SetBit(input->GetSsaIndex());
- input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false);
+ input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i);
}
}
}
@@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
}
if (block->IsLoopHeader()) {
- HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0);
+ size_t last_position = block->GetLoopInformation()->GetLifetimeEnd();
// For all live_in instructions at the loop header, we need to create a range
// that covers the full loop.
for (uint32_t idx : live_in->Indexes()) {
HInstruction* current = instructions_from_ssa_index_.Get(idx);
- current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(),
- back_edge->GetLifetimeEnd());
+ current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position);
}
}
}
@@ -322,7 +320,8 @@ static int RegisterOrLowRegister(Location location) {
return location.IsPair() ? location.low() : location.reg();
}
-int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+int LiveInterval::FindFirstRegisterHint(size_t* free_until,
+ const SsaLivenessAnalysis& liveness) const {
DCHECK(!IsHighInterval());
if (IsTemp()) return kNoRegister;
@@ -336,12 +335,32 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
}
}
+ if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) {
+ // If the start of this interval is at a block boundary, we look at the
+ // location of the interval in blocks preceding the block this interval
+ // starts at. If one location is a register we return it as a hint. This
+ // will avoid a move between the two blocks.
+ HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2);
+ for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) {
+ size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1;
+ // We know positions above GetStart() do not have a location yet.
+ if (position < GetStart()) {
+ LiveInterval* existing = GetParent()->GetSiblingAt(position);
+ if (existing != nullptr
+ && existing->HasRegister()
+ && (free_until[existing->GetRegister()] > GetStart())) {
+ return existing->GetRegister();
+ }
+ }
+ }
+ }
+
UsePosition* use = first_use_;
size_t start = GetStart();
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
size_t use_position = use->GetPosition();
- if (use_position >= start && !use->GetIsEnvironment()) {
+ if (use_position >= start && !use->IsSynthesized()) {
HInstruction* user = use->GetUser();
size_t input_index = use->GetInputIndex();
if (user->IsPhi()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 97254edb5e..82c5454bb0 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -23,6 +23,7 @@
namespace art {
class CodeGenerator;
+class SsaLivenessAnalysis;
static constexpr int kNoRegister = -1;
@@ -103,21 +104,24 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> {
class UsePosition : public ArenaObject<kArenaAllocMisc> {
public:
UsePosition(HInstruction* user,
+ HEnvironment* environment,
size_t input_index,
- bool is_environment,
size_t position,
UsePosition* next)
: user_(user),
+ environment_(environment),
input_index_(input_index),
- is_environment_(is_environment),
position_(position),
next_(next) {
- DCHECK(user->IsPhi()
+ DCHECK((user == nullptr)
+ || user->IsPhi()
|| (GetPosition() == user->GetLifetimePosition() + 1)
|| (GetPosition() == user->GetLifetimePosition()));
DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
}
+ static constexpr size_t kNoInput = -1;
+
size_t GetPosition() const { return position_; }
UsePosition* GetNext() const { return next_; }
@@ -125,27 +129,38 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> {
HInstruction* GetUser() const { return user_; }
- bool GetIsEnvironment() const { return is_environment_; }
+ bool GetIsEnvironment() const { return environment_ != nullptr; }
+ bool IsSynthesized() const { return user_ == nullptr; }
size_t GetInputIndex() const { return input_index_; }
void Dump(std::ostream& stream) const {
stream << position_;
- if (is_environment_) {
- stream << " (env)";
- }
+ }
+
+ HLoopInformation* GetLoopInformation() const {
+ return user_->GetBlock()->GetLoopInformation();
}
UsePosition* Dup(ArenaAllocator* allocator) const {
return new (allocator) UsePosition(
- user_, input_index_, is_environment_, position_,
+ user_, environment_, input_index_, position_,
next_ == nullptr ? nullptr : next_->Dup(allocator));
}
+ bool RequiresRegister() const {
+ if (GetIsEnvironment()) return false;
+ if (IsSynthesized()) return false;
+ Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
+ return location.IsUnallocated()
+ && (location.GetPolicy() == Location::kRequiresRegister
+ || location.GetPolicy() == Location::kRequiresFpuRegister);
+ }
+
private:
HInstruction* const user_;
+ HEnvironment* const environment_;
const size_t input_index_;
- const bool is_environment_;
const size_t position_;
UsePosition* next_;
@@ -219,17 +234,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
void AddTempUse(HInstruction* instruction, size_t temp_index) {
DCHECK(IsTemp());
DCHECK(first_use_ == nullptr) << "A temporary can only have one user";
+ DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
size_t position = instruction->GetLifetimePosition();
first_use_ = new (allocator_) UsePosition(
- instruction, temp_index, /* is_environment */ false, position, first_use_);
+ instruction, /* environment */ nullptr, temp_index, position, first_use_);
AddRange(position, position + 1);
}
void AddUse(HInstruction* instruction,
+ HEnvironment* environment,
size_t input_index,
- bool is_environment,
bool keep_alive = false) {
// Set the use within the instruction.
+ bool is_environment = (environment != nullptr);
size_t position = instruction->GetLifetimePosition() + 1;
LocationSummary* locations = instruction->GetLocations();
if (!is_environment) {
@@ -239,9 +256,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// location of the input just before that instruction (and not potential moves due
// to splitting).
position = instruction->GetLifetimePosition();
+ } else if (!locations->InAt(input_index).IsValid()) {
+ return;
}
}
+ if (!is_environment && instruction->IsInLoop()) {
+ AddBackEdgeUses(*instruction->GetBlock());
+ }
+
DCHECK(position == instruction->GetLifetimePosition()
|| position == instruction->GetLifetimePosition() + 1);
@@ -257,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
}
DCHECK(first_use_->GetPosition() + 1 == position);
UsePosition* new_use = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, cursor->GetNext());
+ instruction, environment, input_index, position, cursor->GetNext());
cursor->SetNext(new_use);
if (first_range_->GetEnd() == first_use_->GetPosition()) {
first_range_->end_ = position;
@@ -265,8 +288,13 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return;
}
- first_use_ = new (allocator_) UsePosition(
- instruction, input_index, is_environment, position, first_use_);
+ if (is_environment) {
+ first_env_use_ = new (allocator_) UsePosition(
+ instruction, environment, input_index, position, first_env_use_);
+ } else {
+ first_use_ = new (allocator_) UsePosition(
+ instruction, environment, input_index, position, first_use_);
+ }
if (is_environment && !keep_alive) {
// If this environment use does not keep the instruction live, it does not
@@ -300,8 +328,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) {
DCHECK(instruction->IsPhi());
+ if (block->IsInLoop()) {
+ AddBackEdgeUses(*block);
+ }
first_use_ = new (allocator_) UsePosition(
- instruction, input_index, false, block->GetLifetimeEnd(), first_use_);
+ instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
}
void AddRange(size_t start, size_t end) {
@@ -450,38 +481,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
if (is_temp_) {
return position == GetStart() ? position : kNoLifetime;
}
- if (position == GetStart() && IsParent()) {
- LocationSummary* locations = defined_by_->GetLocations();
- Location location = locations->Out();
- // This interval is the first interval of the instruction. If the output
- // of the instruction requires a register, we return the position of that instruction
- // as the first register use.
- if (location.IsUnallocated()) {
- if ((location.GetPolicy() == Location::kRequiresRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && (locations->InAt(0).IsRegister()
- || locations->InAt(0).IsRegisterPair()
- || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
- return position;
- } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
- || (location.GetPolicy() == Location::kSameAsFirstInput
- && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) {
- return position;
- }
- } else if (location.IsRegister() || location.IsRegisterPair()) {
- return position;
- }
+
+ if (IsDefiningPosition(position) && DefinitionRequiresRegister()) {
+ return position;
}
UsePosition* use = first_use_;
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
size_t use_position = use->GetPosition();
- if (use_position > position && !use->GetIsEnvironment()) {
- Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
- if (location.IsUnallocated()
- && (location.GetPolicy() == Location::kRequiresRegister
- || location.GetPolicy() == Location::kRequiresFpuRegister)) {
+ if (use_position > position) {
+ if (use->RequiresRegister()) {
return use_position;
}
}
@@ -499,21 +509,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return position == GetStart() ? position : kNoLifetime;
}
- if (position == GetStart() && IsParent()) {
- if (defined_by_->GetLocations()->Out().IsValid()) {
- return position;
- }
+ if (IsDefiningPosition(position)) {
+ DCHECK(defined_by_->GetLocations()->Out().IsValid());
+ return position;
}
UsePosition* use = first_use_;
size_t end = GetEnd();
while (use != nullptr && use->GetPosition() <= end) {
- if (!use->GetIsEnvironment()) {
- Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex());
- size_t use_position = use->GetPosition();
- if (use_position > position && location.IsValid()) {
- return use_position;
- }
+ size_t use_position = use->GetPosition();
+ if (use_position > position) {
+ return use_position;
}
use = use->GetNext();
}
@@ -524,6 +530,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return first_use_;
}
+ UsePosition* GetFirstEnvironmentUse() const {
+ return first_env_use_;
+ }
+
Primitive::Type GetType() const {
return type_;
}
@@ -577,6 +587,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
new_interval->parent_ = parent_;
new_interval->first_use_ = first_use_;
+ new_interval->first_env_use_ = first_env_use_;
LiveRange* current = first_range_;
LiveRange* previous = nullptr;
// Iterate over the ranges, and either find a range that covers this position, or
@@ -655,10 +666,18 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
stream << " ";
} while ((use = use->GetNext()) != nullptr);
}
+ stream << "}, { ";
+ use = first_env_use_;
+ if (use != nullptr) {
+ do {
+ use->Dump(stream);
+ stream << " ";
+ } while ((use = use->GetNext()) != nullptr);
+ }
stream << "}";
stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
- stream << " is_high: " << IsHighInterval();
stream << " is_low: " << IsLowInterval();
+ stream << " is_high: " << IsHighInterval();
}
LiveInterval* GetNextSibling() const { return next_sibling_; }
@@ -673,7 +692,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// Returns the first register hint that is at least free before
// the value contained in `free_until`. If none is found, returns
// `kNoRegister`.
- int FindFirstRegisterHint(size_t* free_until) const;
+ int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const;
// If there is enough at the definition site to find a register (for example
// it uses the same input as the first input), returns the register as a hint.
@@ -754,6 +773,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
if (first_use_ != nullptr) {
high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
}
+
+ if (first_env_use_ != nullptr) {
+ high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_);
+ }
}
// Returns whether an interval, when it is non-split, is using
@@ -851,6 +874,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
first_safepoint_(nullptr),
last_safepoint_(nullptr),
first_use_(nullptr),
+ first_env_use_(nullptr),
type_(type),
next_sibling_(nullptr),
parent_(this),
@@ -888,6 +912,107 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
return range;
}
+ bool DefinitionRequiresRegister() const {
+ DCHECK(IsParent());
+ LocationSummary* locations = defined_by_->GetLocations();
+ Location location = locations->Out();
+ // This interval is the first interval of the instruction. If the output
+ // of the instruction requires a register, we return the position of that instruction
+ // as the first register use.
+ if (location.IsUnallocated()) {
+ if ((location.GetPolicy() == Location::kRequiresRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsRegister()
+ || locations->InAt(0).IsRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) {
+ return true;
+ } else if ((location.GetPolicy() == Location::kRequiresFpuRegister)
+ || (location.GetPolicy() == Location::kSameAsFirstInput
+ && (locations->InAt(0).IsFpuRegister()
+ || locations->InAt(0).IsFpuRegisterPair()
+ || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) {
+ return true;
+ }
+ } else if (location.IsRegister() || location.IsRegisterPair()) {
+ return true;
+ }
+ return false;
+ }
+
+ bool IsDefiningPosition(size_t position) const {
+ return IsParent() && (position == GetStart());
+ }
+
+ bool HasSynthesizeUseAt(size_t position) const {
+ UsePosition* use = first_use_;
+ while (use != nullptr) {
+ size_t use_position = use->GetPosition();
+ if ((use_position == position) && use->IsSynthesized()) {
+ return true;
+ }
+ if (use_position > position) break;
+ use = use->GetNext();
+ }
+ return false;
+ }
+
+ void AddBackEdgeUses(const HBasicBlock& block_at_use) {
+ DCHECK(block_at_use.IsInLoop());
+ // Add synthesized uses at the back edge of loops to help the register allocator.
+ // Note that this method is called in decreasing liveness order, to faciliate adding
+ // uses at the head of the `first_use_` linked list. Because below
+ // we iterate from inner-most to outer-most, which is in increasing liveness order,
+ // we need to take extra care of how the `first_use_` linked list is being updated.
+ UsePosition* first_in_new_list = nullptr;
+ UsePosition* last_in_new_list = nullptr;
+ for (HLoopInformationOutwardIterator it(block_at_use);
+ !it.Done();
+ it.Advance()) {
+ HLoopInformation* current = it.Current();
+ if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) {
+ // This interval is defined in the loop. We can stop going outward.
+ break;
+ }
+
+ // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+ // all back edges is not necessary: anything used in the loop will have its use at the
+ // last back edge. If we want branches in a loop to have better register allocation than
+ // another branch, then it is the linear order we should change.
+ size_t back_edge_use_position = current->GetLifetimeEnd();
+ if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
+ // There was a use already seen in this loop. Therefore the previous call to `AddUse`
+ // already inserted the backedge use. We can stop going outward.
+ DCHECK(HasSynthesizeUseAt(back_edge_use_position));
+ break;
+ }
+
+ DCHECK(last_in_new_list == nullptr
+ || back_edge_use_position > last_in_new_list->GetPosition());
+
+ UsePosition* new_use = new (allocator_) UsePosition(
+ /* user */ nullptr,
+ /* environment */ nullptr,
+ UsePosition::kNoInput,
+ back_edge_use_position,
+ /* next */ nullptr);
+
+ if (last_in_new_list != nullptr) {
+ // Going outward. The latest created use needs to point to the new use.
+ last_in_new_list->SetNext(new_use);
+ } else {
+ // This is the inner-most loop.
+ DCHECK_EQ(current, block_at_use.GetLoopInformation());
+ first_in_new_list = new_use;
+ }
+ last_in_new_list = new_use;
+ }
+ // Link the newly created linked list with `first_use_`.
+ if (last_in_new_list != nullptr) {
+ last_in_new_list->SetNext(first_use_);
+ first_use_ = first_in_new_list;
+ }
+ }
+
ArenaAllocator* const allocator_;
// Ranges of this interval. We need a quick access to the last range to test
@@ -905,6 +1030,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> {
// Uses of this interval. Note that this linked list is shared amongst siblings.
UsePosition* first_use_;
+ UsePosition* first_env_use_;
// The instruction type this interval corresponds to.
const Primitive::Type type_;
@@ -999,14 +1125,18 @@ class SsaLivenessAnalysis : public ValueObject {
}
HBasicBlock* GetBlockFromPosition(size_t index) const {
- HInstruction* instruction = GetInstructionFromPosition(index / 2);
+ HInstruction* instruction = GetInstructionFromPosition(index);
if (instruction == nullptr) {
// If we are at a block boundary, get the block following.
- instruction = GetInstructionFromPosition((index / 2) + 1);
+ instruction = GetInstructionFromPosition(index + 1);
}
return instruction->GetBlock();
}
+ bool IsAtBlockBoundary(size_t index) const {
+ return GetInstructionFromPosition(index) == nullptr;
+ }
+
HInstruction* GetTempUser(LiveInterval* temp) const {
// A temporary shares the same lifetime start as the instruction that requires it.
DCHECK(temp->IsTemp());
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 00c241b85a..fb3e7d798c 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) {
static void TestCode(const uint16_t* data, const char* expected) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);
@@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) {
const char* expected =
"BasicBlock 0, succ: 1\n"
" 0: IntConstant 0 [5]\n"
- " 1: IntConstant 4 [14, 8, 8]\n"
- " 2: IntConstant 5 [14]\n"
+ " 1: IntConstant 4 [5, 8, 8]\n"
+ " 2: IntConstant 5 [5]\n"
" 3: Goto\n"
"BasicBlock 1, pred: 0, succ: 2\n"
" 4: Goto\n"
- "BasicBlock 2, pred: 1, 8, succ: 6, 3\n"
- " 5: Phi(0, 14) [12, 6, 6]\n"
+ "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n"
+ " 5: Phi(0, 2, 1) [12, 6, 6]\n"
" 6: Equal(5, 5) [7]\n"
" 7: If(6)\n"
"BasicBlock 3, pred: 2, succ: 5, 4\n"
" 8: Equal(1, 1) [9]\n"
" 9: If(8)\n"
- "BasicBlock 4, pred: 3, succ: 8\n"
+ "BasicBlock 4, pred: 3, succ: 2\n"
" 10: Goto\n"
- "BasicBlock 5, pred: 3, succ: 8\n"
+ "BasicBlock 5, pred: 3, succ: 2\n"
" 11: Goto\n"
"BasicBlock 6, pred: 2, succ: 7\n"
" 12: Return(5)\n"
"BasicBlock 7, pred: 6\n"
- " 13: Exit\n"
- // Synthesized single back edge of loop.
- "BasicBlock 8, pred: 5, 4, succ: 2\n"
- " 14: Phi(1, 2) [5]\n"
- " 15: Goto\n";
+ " 13: Exit\n";
const uint16_t data[] = ONE_REGISTER_CODE_ITEM(
Instruction::CONST_4 | 0 | 0,
diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc
index a5a0eb2114..5ca66a1de6 100644
--- a/compiler/optimizing/suspend_check_test.cc
+++ b/compiler/optimizing/suspend_check_test.cc
@@ -30,7 +30,7 @@ namespace art {
static void TestCode(const uint16_t* data) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- HGraph* graph = new (&allocator) HGraph(&allocator);
+ HGraph* graph = CreateGraph(&allocator);
HGraphBuilder builder(graph);
const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data);
bool graph_built = builder.BuildGraph(*item);