diff options
Diffstat (limited to 'compiler/optimizing')
63 files changed, 3108 insertions, 1016 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 30c89f2d15..8100a29f32 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -18,6 +18,26 @@ namespace art { +void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) { + DCHECK(block->EndsWithIf()); + + // Check if the condition is a Boolean negation. + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HInstruction* boolean_not = if_instruction->InputAt(0); + if (!boolean_not->IsBooleanNot()) { + return; + } + + // Make BooleanNot's input the condition of the If and swap branches. + if_instruction->ReplaceInput(boolean_not->InputAt(0), 0); + block->SwapSuccessors(); + + // Remove the BooleanNot if it is now unused. + if (!boolean_not->HasUses()) { + boolean_not->GetBlock()->RemoveInstruction(boolean_not); + } +} + // Returns true if 'block1' and 'block2' are empty, merge into the same single // successor and the successor can only be reached from them. static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { @@ -78,58 +98,69 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { } } +void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { + DCHECK(block->EndsWithIf()); + + // Find elements of the pattern. + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + if (!BlocksDoMergeTogether(true_block, false_block)) { + return; + } + HBasicBlock* merge_block = true_block->GetSuccessors().Get(0); + if (!merge_block->HasSinglePhi()) { + return; + } + HPhi* phi = merge_block->GetFirstPhi()->AsPhi(); + HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block)); + HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block)); + + // Check if the selection negates/preserves the value of the condition and + // if so, generate a suitable replacement instruction. + HInstruction* if_condition = if_instruction->InputAt(0); + HInstruction* replacement; + if (NegatesCondition(true_value, false_value)) { + replacement = GetOppositeCondition(if_condition); + if (replacement->GetBlock() == nullptr) { + block->InsertInstructionBefore(replacement, if_instruction); + } + } else if (PreservesCondition(true_value, false_value)) { + replacement = if_condition; + } else { + return; + } + + // Replace the selection outcome with the new instruction. + phi->ReplaceWith(replacement); + merge_block->RemovePhi(phi); + + // Delete the true branch and merge the resulting chain of blocks + // 'block->false_block->merge_block' into one. + true_block->DisconnectAndDelete(); + block->MergeWith(false_block); + block->MergeWith(merge_block); + + // Remove the original condition if it is now unused. + if (!if_condition->HasUses()) { + if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition); + } +} + void HBooleanSimplifier::Run() { // Iterate in post order in the unlikely case that removing one occurrence of - // the pattern empties a branch block of another occurrence. Otherwise the - // order does not matter. + // the selection pattern empties a branch block of another occurrence. + // Otherwise the order does not matter. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (!block->EndsWithIf()) continue; - // Find elements of the pattern. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); - HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); - if (!BlocksDoMergeTogether(true_block, false_block)) { - continue; - } - HBasicBlock* merge_block = true_block->GetSuccessors().Get(0); - if (!merge_block->HasSinglePhi()) { - continue; - } - HPhi* phi = merge_block->GetFirstPhi()->AsPhi(); - HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block)); - HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block)); - - // Check if the selection negates/preserves the value of the condition and - // if so, generate a suitable replacement instruction. - HInstruction* if_condition = if_instruction->InputAt(0); - HInstruction* replacement; - if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } - } else if (PreservesCondition(true_value, false_value)) { - replacement = if_condition; - } else { - continue; - } + // If condition is negated, remove the negation and swap the branches. + TryRemovingNegatedCondition(block); - // Replace the selection outcome with the new instruction. - phi->ReplaceWith(replacement); - merge_block->RemovePhi(phi); - - // Delete the true branch and merge the resulting chain of blocks - // 'block->false_block->merge_block' into one. - true_block->DisconnectAndDelete(); - block->MergeWith(false_block); - block->MergeWith(merge_block); - - // Remove the original condition if it is now unused. - if (!if_condition->HasUses()) { - if_condition->GetBlock()->RemoveInstruction(if_condition); - } + // If this is a boolean-selection diamond pattern, replace its result with + // the condition value (or its negation) and simplify the graph. + TryRemovingBooleanSelection(block); } } diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h index a88733e1af..733ebaac2c 100644 --- a/compiler/optimizing/boolean_simplifier.h +++ b/compiler/optimizing/boolean_simplifier.h @@ -14,11 +14,15 @@ * limitations under the License. */ -// This optimization recognizes a common pattern where a boolean value is -// either cast to an integer or negated by selecting from zero/one integer -// constants with an If statement. Because boolean values are internally -// represented as zero/one, we can safely replace the pattern with a suitable -// condition instruction. +// This optimization recognizes two common patterns: +// (a) Boolean selection: Casting a boolean to an integer or negating it is +// carried out with an If statement selecting from zero/one integer +// constants. Because Boolean values are represented as zero/one, the +// pattern can be replaced with the condition instruction itself or its +// negation, depending on the layout. +// (b) Negated condition: Instruction simplifier may replace an If's condition +// with a boolean value. If this value is the result of a Boolean negation, +// the true/false branches can be swapped and negation removed. // Example: Negating a boolean value // B1: @@ -66,6 +70,9 @@ class HBooleanSimplifier : public HOptimization { static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier"; private: + void TryRemovingNegatedCondition(HBasicBlock* block); + void TryRemovingBooleanSelection(HBasicBlock* block); + DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier); }; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 3645f19f09..b2b54965b5 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -246,6 +246,148 @@ class ValueBound : public ValueObject { int32_t constant_; }; +// Collect array access data for a loop. +// TODO: make it work for multiple arrays inside the loop. +class ArrayAccessInsideLoopFinder : public ValueObject { + public: + explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) + : induction_variable_(induction_variable), + found_array_length_(nullptr), + offset_low_(INT_MAX), + offset_high_(INT_MIN) { + Run(); + } + + HArrayLength* GetFoundArrayLength() const { return found_array_length_; } + bool HasFoundArrayLength() const { return found_array_length_ != nullptr; } + int32_t GetOffsetLow() const { return offset_low_; } + int32_t GetOffsetHigh() const { return offset_high_; } + + // Returns if `block` that is in loop_info may exit the loop, unless it's + // the loop header for loop_info. + static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) { + DCHECK(loop_info->Contains(*block)); + if (block == loop_info->GetHeader()) { + // Loop header of loop_info. Exiting loop is normal. + return false; + } + const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + for (size_t i = 0; i < successors.Size(); i++) { + if (!loop_info->Contains(*successors.Get(i))) { + // One of the successors exits the loop. + return true; + } + } + return false; + } + + static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { + for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i); + if (!block->Dominates(back_edge)) { + return false; + } + } + return true; + } + + void Run() { + HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); + for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { + HBasicBlock* block = it_loop.Current(); + DCHECK(block->IsInLoop()); + if (!DominatesAllBackEdges(block, loop_info)) { + // In order not to trigger deoptimization unnecessarily, make sure + // that all array accesses collected are really executed in the loop. + // For array accesses in a branch inside the loop, don't collect the + // access. The bounds check in that branch might not be eliminated. + continue; + } + if (EarlyExit(block, loop_info)) { + // If the loop body can exit loop (like break, return, etc.), it's not guaranteed + // that the loop will loop through the full monotonic value range from + // initial_ to end_. So adding deoptimization might be too aggressive and can + // trigger deoptimization unnecessarily even if the loop won't actually throw + // AIOOBE. Otherwise, the loop induction variable is going to cover the full + // monotonic value range from initial_ to end_, and deoptimizations are added + // iff the loop will throw AIOOBE. + found_array_length_ = nullptr; + return; + } + for (HInstruction* instruction = block->GetFirstInstruction(); + instruction != nullptr; + instruction = instruction->GetNext()) { + if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + continue; + } + HInstruction* index = instruction->InputAt(1); + if (!index->IsBoundsCheck()) { + continue; + } + + HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); + if (array_length == nullptr) { + DCHECK(index->InputAt(1)->IsIntConstant()); + // TODO: may optimize for constant case. + continue; + } + + HInstruction* array = array_length->InputAt(0); + if (array->IsNullCheck()) { + array = array->AsNullCheck()->InputAt(0); + } + if (loop_info->Contains(*array->GetBlock())) { + // Array is defined inside the loop. Skip. + continue; + } + + if (found_array_length_ != nullptr && found_array_length_ != array_length) { + // There is already access for another array recorded for the loop. + // TODO: handle multiple arrays. + continue; + } + + index = index->AsBoundsCheck()->InputAt(0); + HInstruction* left = index; + int32_t right = 0; + if (left == induction_variable_ || + (ValueBound::IsAddOrSubAConstant(index, &left, &right) && + left == induction_variable_)) { + // For patterns like array[i] or array[i + 2]. + if (right < offset_low_) { + offset_low_ = right; + } + if (right > offset_high_) { + offset_high_ = right; + } + } else { + // Access not in induction_variable/(induction_variable_ + constant) + // format. Skip. + continue; + } + // Record this array. + found_array_length_ = array_length; + } + } + } + + private: + // The instruction that corresponds to a MonotonicValueRange. + HInstruction* induction_variable_; + + // The array length of the array that's accessed inside the loop. + HArrayLength* found_array_length_; + + // The lowest and highest constant offsets relative to induction variable + // instruction_ in all array accesses. + // If array access are: array[i-1], array[i], array[i+1], + // offset_low_ is -1 and offset_high is 1. + int32_t offset_low_; + int32_t offset_high_; + + DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder); +}; + /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: @@ -332,21 +474,31 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { class MonotonicValueRange : public ValueRange { public: MonotonicValueRange(ArenaAllocator* allocator, + HPhi* induction_variable, HInstruction* initial, int32_t increment, ValueBound bound) // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's // used as a regular value range, due to possible overflow/underflow. : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), + induction_variable_(induction_variable), initial_(initial), + end_(nullptr), + inclusive_(false), increment_(increment), bound_(bound) {} virtual ~MonotonicValueRange() {} + HInstruction* GetInductionVariable() const { return induction_variable_; } int32_t GetIncrement() const { return increment_; } - ValueBound GetBound() const { return bound_; } + void SetEnd(HInstruction* end) { end_ = end; } + void SetInclusive(bool inclusive) { inclusive_ = inclusive; } + HBasicBlock* GetLoopHead() const { + DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); + return induction_variable_->GetBlock(); + } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } @@ -371,6 +523,10 @@ class MonotonicValueRange : public ValueRange { if (increment_ > 0) { // Monotonically increasing. ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower()); + if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) { + // Lower bound isn't useful. Leave it to deoptimization. + return this; + } // We currently conservatively assume max array length is INT_MAX. If we can // make assumptions about the max array length, e.g. due to the max heap size, @@ -417,6 +573,11 @@ class MonotonicValueRange : public ValueRange { DCHECK_NE(increment_, 0); // Monotonically decreasing. ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); + if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) && + !upper.IsRelatedToArrayLength()) { + // Upper bound isn't useful. Leave it to deoptimization. + return this; + } // Need to take care of underflow. Try to prove underflow won't happen // for common cases. @@ -432,10 +593,217 @@ class MonotonicValueRange : public ValueRange { } } + // Returns true if adding a (constant >= value) check for deoptimization + // is allowed and will benefit compiled code. + bool CanAddDeoptimizationConstant(HInstruction* value, + int32_t constant, + bool* is_proven) { + *is_proven = false; + // See if we can prove the relationship first. + if (value->IsIntConstant()) { + if (value->AsIntConstant()->GetValue() >= constant) { + // Already true. + *is_proven = true; + return true; + } else { + // May throw exception. Don't add deoptimization. + // Keep bounds checks in the loops. + return false; + } + } + // Can benefit from deoptimization. + return true; + } + + // Adds a check that (value >= constant), and HDeoptimize otherwise. + void AddDeoptimizationConstant(HInstruction* value, + int32_t constant) { + HBasicBlock* block = induction_variable_->GetBlock(); + DCHECK(block->IsLoopHeader()); + HGraph* graph = block->GetGraph(); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + HIntConstant* const_instr = graph->GetIntConstant(constant); + HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); + HDeoptimize* deoptimize = new (graph->GetArena()) + HDeoptimize(cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + + // Returns true if adding a (value <= array_length + offset) check for deoptimization + // is allowed and will benefit compiled code. + bool CanAddDeoptimizationArrayLength(HInstruction* value, + HArrayLength* array_length, + int32_t offset, + bool* is_proven) { + *is_proven = false; + if (offset > 0) { + // There might be overflow issue. + // TODO: handle this, possibly with some distance relationship between + // offset_low and offset_high, or using another deoptimization to make + // sure (array_length + offset) doesn't overflow. + return false; + } + + // See if we can prove the relationship first. + if (value == array_length) { + if (offset >= 0) { + // Already true. + *is_proven = true; + return true; + } else { + // May throw exception. Don't add deoptimization. + // Keep bounds checks in the loops. + return false; + } + } + // Can benefit from deoptimization. + return true; + } + + // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. + void AddDeoptimizationArrayLength(HInstruction* value, + HArrayLength* array_length, + int32_t offset) { + HBasicBlock* block = induction_variable_->GetBlock(); + DCHECK(block->IsLoopHeader()); + HGraph* graph = block->GetGraph(); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + + // We may need to hoist null-check and array_length out of loop first. + if (!array_length->GetBlock()->Dominates(pre_header)) { + HInstruction* array = array_length->InputAt(0); + HNullCheck* null_check = array->AsNullCheck(); + if (null_check != nullptr) { + array = null_check->InputAt(0); + } + // We've already made sure array is defined before the loop when collecting + // array accesses for the loop. + DCHECK(array->GetBlock()->Dominates(pre_header)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + // Hoist null check out of loop with a deoptimization. + HNullConstant* null_constant = graph->GetNullConstant(); + HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); + // TODO: for one dex_pc, share the same deoptimization slow path. + HDeoptimize* null_check_deoptimize = new (graph->GetArena()) + HDeoptimize(null_check_cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore( + null_check_deoptimize, pre_header->GetLastInstruction()); + // Eliminate null check in the loop. + null_check->ReplaceWith(array); + null_check->GetBlock()->RemoveInstruction(null_check); + null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + // Hoist array_length out of loop. + array_length->MoveBefore(pre_header->GetLastInstruction()); + } + + HIntConstant* offset_instr = graph->GetIntConstant(offset); + HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); + HDeoptimize* deoptimize = new (graph->GetArena()) + HDeoptimize(cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + + // Add deoptimizations in loop pre-header with the collected array access + // data so that value ranges can be established in loop body. + // Returns true if deoptimizations are successfully added, or if it's proven + // it's not necessary. + bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) { + int32_t offset_low = finder.GetOffsetLow(); + int32_t offset_high = finder.GetOffsetHigh(); + HArrayLength* array_length = finder.GetFoundArrayLength(); + + HBasicBlock* pre_header = + induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't move initial_ or end_ into pre_header for comparisons. + return false; + } + + bool is_constant_proven, is_length_proven; + if (increment_ == 1) { + // Increasing from initial_ to end_. + int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; + if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && + CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { + if (!is_constant_proven) { + AddDeoptimizationConstant(initial_, -offset_low); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(end_, array_length, offset); + } + return true; + } + } else if (increment_ == -1) { + // Decreasing from initial_ to end_. + int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; + if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && + CanAddDeoptimizationArrayLength( + initial_, array_length, -offset_high - 1, &is_length_proven)) { + if (!is_constant_proven) { + AddDeoptimizationConstant(end_, constant); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); + } + return true; + } + } + return false; + } + + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // TODO: possibly handle overflow/underflow issues with deoptimization. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + private: - HInstruction* const initial_; - const int32_t increment_; - ValueBound bound_; // Additional value bound info for initial_; + HPhi* const induction_variable_; // Induction variable for this monotonic value range. + HInstruction* const initial_; // Initial value. + HInstruction* end_; // End value. + bool inclusive_; // Whether end value is inclusive. + const int32_t increment_; // Increment for each loop iteration. + const ValueBound bound_; // Additional value bound info for initial_. DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); }; @@ -598,6 +966,20 @@ class BCEVisitor : public HGraphVisitor { // There should be no critical edge at this point. DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u); + ValueRange* left_range = LookupValueRange(left, block); + MonotonicValueRange* left_monotonic_range = nullptr; + if (left_range != nullptr) { + left_monotonic_range = left_range->AsMonotonicValueRange(); + if (left_monotonic_range != nullptr) { + HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + if (instruction->GetBlock() != loop_head) { + // For monotonic value range, don't handle `instruction` + // if it's not defined in the loop header. + return; + } + } + } + bool found; ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found); // Each comparison can establish a lower bound and an upper bound @@ -610,7 +992,6 @@ class BCEVisitor : public HGraphVisitor { ValueRange* right_range = LookupValueRange(right, block); if (right_range != nullptr) { if (right_range->IsMonotonicValueRange()) { - ValueRange* left_range = LookupValueRange(left, block); if (left_range != nullptr && left_range->IsMonotonicValueRange()) { HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond, left_range->AsMonotonicValueRange(), @@ -628,6 +1009,17 @@ class BCEVisitor : public HGraphVisitor { bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { + if (left_monotonic_range != nullptr) { + // Update the info for monotonic value range. + if (left_monotonic_range->GetInductionVariable() == left && + left_monotonic_range->GetIncrement() < 0 && + block == left_monotonic_range->GetLoopHead() && + instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { + left_monotonic_range->SetEnd(right); + left_monotonic_range->SetInclusive(cond == kCondLT); + } + } + if (!upper.Equals(ValueBound::Max())) { int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); @@ -651,6 +1043,17 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondGT || cond == kCondGE) { + if (left_monotonic_range != nullptr) { + // Update the info for monotonic value range. + if (left_monotonic_range->GetInductionVariable() == left && + left_monotonic_range->GetIncrement() > 0 && + block == left_monotonic_range->GetLoopHead() && + instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { + left_monotonic_range->SetEnd(right); + left_monotonic_range->SetInclusive(cond == kCondGT); + } + } + // array.length as a lower bound isn't considered useful. if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive @@ -755,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor { bounds_check->GetBlock()->RemoveInstruction(bounds_check); } + static bool HasSameInputAtBackEdges(HPhi* phi) { + DCHECK(phi->IsLoopHeaderPhi()); + // Start with input 1. Input 0 is from the incoming block. + HInstruction* input1 = phi->InputAt(1); + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(1))); + for (size_t i = 2, e = phi->InputCount(); i < e; ++i) { + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(i))); + if (input1 != phi->InputAt(i)) { + return false; + } + } + return true; + } + void VisitPhi(HPhi* phi) { - if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { - DCHECK_EQ(phi->InputCount(), 2U); + if (phi->IsLoopHeaderPhi() + && (phi->GetType() == Primitive::kPrimInt) + && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; int32_t increment; @@ -790,6 +1210,7 @@ class BCEVisitor : public HGraphVisitor { } range = new (GetGraph()->GetArena()) MonotonicValueRange( GetGraph()->GetArena(), + phi, initial_value, increment, bound); @@ -809,6 +1230,36 @@ class BCEVisitor : public HGraphVisitor { HInstruction* left = cond->GetLeft(); HInstruction* right = cond->GetRight(); HandleIf(instruction, left, right, cmp); + + HBasicBlock* block = instruction->GetBlock(); + ValueRange* left_range = LookupValueRange(left, block); + if (left_range == nullptr) { + return; + } + + if (left_range->IsMonotonicValueRange() && + block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + // The comparison is for an induction variable in the loop header. + DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); + HBasicBlock* loop_body_successor; + if (LIKELY(block->GetLoopInformation()-> + Contains(*instruction->IfFalseSuccessor()))) { + loop_body_successor = instruction->IfFalseSuccessor(); + } else { + loop_body_successor = instruction->IfTrueSuccessor(); + } + ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); + if (new_left_range == left_range) { + // We are not successful in narrowing the monotonic value range to + // a regular value range. Try using deoptimization. + new_left_range = left_range->AsMonotonicValueRange()-> + NarrowWithDeoptimization(); + if (new_left_range != left_range) { + GetValueRangeMap(instruction->IfFalseSuccessor())-> + Overwrite(left->GetId(), new_left_range); + } + } + } } } } diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 97be778dbd..163458f75c 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -42,7 +42,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -147,7 +147,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -219,7 +219,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -291,7 +291,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -364,7 +364,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, int initial, int increment, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -501,7 +501,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, int initial, int increment = -1, IfCondition cond = kCondLE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -632,7 +632,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, int initial, int increment, IfCondition cond) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -743,7 +743,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, HInstruction** bounds_check, int initial, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); @@ -868,7 +868,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 96e08fd24c..58416ee93b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -19,6 +19,7 @@ #include "art_field-inl.h" #include "base/logging.h" #include "class_linker.h" +#include "dex/verified_method.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" #include "dex/verified_method.h" @@ -281,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets(code_ptr, code_end, &number_of_branches); + if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode); + return false; + } // Note that the compiler driver is null when unit testing. if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { @@ -348,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { current_block_ = block; } -void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, +bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -373,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + + if (code_ptr >= code_end) { + if (instruction.CanFlowThrough()) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + return false; + } + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -405,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // Fall-through. Add a block if there is more code afterwards. dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + if (code_ptr >= code_end) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + // (A switch can fall-through so we don't need to check CanFlowThrough().) + return false; + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -414,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, dex_pc += instruction.SizeInCodeUnits(); } } + return true; } HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { @@ -612,6 +629,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; // Potential class initialization check, in the case of a static method call. HClinitCheck* clinit_check = nullptr; + // Replace calls to String.<init> with StringFactory. + int32_t string_init_offset = 0; + bool is_string_init = compiler_driver_->IsStringInit(method_idx, dex_file_, &string_init_offset); + if (is_string_init) { + return_type = Primitive::kPrimNot; + is_instance_call = false; + number_of_arguments--; + invoke_type = kStatic; + optimized_invoke_type = kStatic; + } HInvoke* invoke = nullptr; @@ -638,9 +665,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, *dex_compilation_unit_->GetDexFile()))); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); - mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, - optimized_invoke_type); + ArtMethod* resolved_method = compiler_driver_->ResolveMethod( + soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, optimized_invoke_type); if (resolved_method == nullptr) { MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); @@ -691,14 +717,14 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, current_block_->AddInstruction(load_class); clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); current_block_->AddInstruction(clinit_check); - ++number_of_arguments; } } } invoke = new (arena_) HInvokeStaticOrDirect( arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, invoke_type, optimized_invoke_type, clinit_check_requirement); + is_recursive, string_init_offset, invoke_type, optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -714,6 +740,9 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t descriptor_index = 1; uint32_t argument_index = start_index; + if (is_string_init) { + start_index = 1; + } for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); @@ -730,16 +759,38 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, i++; } } + DCHECK_EQ(argument_index, number_of_arguments); if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { // Add the class initialization check as last input of `invoke`. DCHECK(clinit_check != nullptr); - invoke->SetArgumentAt(argument_index++, clinit_check); + invoke->SetArgumentAt(argument_index, clinit_check); } - DCHECK_EQ(argument_index, number_of_arguments); current_block_->AddInstruction(invoke); latest_result_ = invoke; + + // Add move-result for StringFactory method. + if (is_string_init) { + uint32_t orig_this_reg = is_range ? register_index : args[0]; + const VerifiedMethod* verified_method = + compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex()); + if (verified_method == nullptr) { + LOG(WARNING) << "No verified method for method calling String.<init>: " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_); + return false; + } + const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map = + verified_method->GetStringInitPcRegMap(); + auto map_it = string_init_map.find(dex_pc); + if (map_it != string_init_map.end()) { + std::set<uint32_t> reg_set = map_it->second; + for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) { + UpdateLocal(*set_it, invoke); + } + } + UpdateLocal(orig_this_reg, invoke); + } return true; } @@ -1916,12 +1967,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_INSTANCE: { uint16_t type_index = instruction.VRegB_21c(); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint)); - UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) { + // Turn new-instance of string into a const 0. + int32_t register_index = instruction.VRegA(); + HNullConstant* constant = graph_->GetNullConstant(); + UpdateLocal(register_index, constant); + } else { + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + ? kQuickAllocObjectWithAccessCheck + : kQuickAllocObject; + + current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + } break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index dc6d97eb0c..36503ce43a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject { // the newly created blocks. // As a side effect, also compute the number of dex instructions, blocks, and // branches. - void ComputeBranchTargets(const uint16_t* start, + // Returns true if all the branches fall inside the method code, false otherwise. + // (In normal cases this should always return true but someone can artificially + // create a code unit in which branches fall-through out of it). + bool ComputeBranchTargets(const uint16_t* start, const uint16_t* end, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 5163395cac..a5d5305836 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) { for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { if (environment->GetInstructionAt(i) != nullptr) { Primitive::Type type = environment->GetInstructionAt(i)->GetType(); - DCHECK(CheckType(type, locations->GetEnvironmentAt(i))) - << type << " " << locations->GetEnvironmentAt(i); + DCHECK(CheckType(type, environment->GetLocationAt(i))) + << type << " " << environment->GetLocationAt(i); } else { - DCHECK(locations->GetEnvironmentAt(i).IsInvalid()) - << locations->GetEnvironmentAt(i); + DCHECK(environment->GetLocationAt(i).IsInvalid()) + << environment->GetLocationAt(i); } } return true; @@ -114,18 +114,24 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue(); } +size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { + auto pointer_size = InstructionSetPointerSize(GetInstructionSet()); + return mirror::Array::DataOffset(pointer_size).Uint32Value() + pointer_size * index; +} + void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { Initialize(); if (!is_leaf) { MarkNotLeaf(); } + const bool is_64_bit = Is64BitInstructionSet(GetInstructionSet()); InitializeCodeGeneration(GetGraph()->GetNumberOfLocalVRegs() + GetGraph()->GetTemporariesVRegSlots() + 1 /* filler */, 0, /* the baseline compiler does not have live registers at slow path */ 0, /* the baseline compiler does not have live registers at slow path */ GetGraph()->GetMaximumNumberOfOutVRegs() - + 1 /* current method */, + + (is_64_bit ? 2 : 1) /* current method */, GetGraph()->GetBlocks()); CompileInternal(allocator, /* is_baseline */ true); } @@ -153,6 +159,7 @@ HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const { } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { + is_baseline_ = is_baseline; HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); @@ -269,7 +276,8 @@ int32_t CodeGenerator::GetStackSlot(HLocal* local) const { uint16_t number_of_locals = GetGraph()->GetNumberOfLocalVRegs(); if (reg_number >= number_of_locals) { // Local is a parameter of the method. It is stored in the caller's frame. - return GetFrameSize() + kVRegSize // ART method + // TODO: Share this logic with StackVisitor::GetVRegOffsetFromQuickCode. + return GetFrameSize() + InstructionSetPointerSize(GetInstructionSet()) // ART method + (reg_number - number_of_locals) * kVRegSize; } else { // Local is a temporary in this method. It is stored in this method's frame. @@ -679,6 +687,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, locations->GetStackMask(), environment_size, inlining_depth); + if (environment != nullptr) { + // TODO: Handle parent environment. + DCHECK(environment->GetParent() == nullptr); + DCHECK_EQ(environment->GetDexPc(), dex_pc); + } // Walk over the environment, and record the location of dex registers. for (size_t i = 0; i < environment_size; ++i) { @@ -688,7 +701,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, continue; } - Location location = locations->GetEnvironmentAt(i); + Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index e536b2d0ee..c6317f18d3 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -105,6 +105,25 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; +class InvokeDexCallingConventionVisitor { + public: + virtual Location GetNextLocation(Primitive::Type type) = 0; + + protected: + InvokeDexCallingConventionVisitor() {} + virtual ~InvokeDexCallingConventionVisitor() {} + + // The current index for core registers. + uint32_t gp_index_ = 0u; + // The current index for floating-point registers. + uint32_t float_index_ = 0u; + // The current stack index. + uint32_t stack_index_ = 0u; + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); +}; + class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation @@ -126,7 +145,7 @@ class CodeGenerator { size_t GetStackSlotOfParameter(HParameterValue* parameter) const { // Note that this follows the current calling convention. return GetFrameSize() - + kVRegSize // Art method + + InstructionSetPointerSize(GetInstructionSet()) // Art method + parameter->GetIndex() * kVRegSize; } @@ -212,6 +231,10 @@ class CodeGenerator { std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector<uint8_t>* vector); + bool IsBaseline() const { + return is_baseline_; + } + bool IsLeafMethod() const { return is_leaf_; } @@ -243,6 +266,8 @@ class CodeGenerator { // Note: this method assumes we always have the same pointer size, regardless // of the architecture. static size_t GetCacheOffset(uint32_t index); + // Pointer variant for ArtMethod and ArtField arrays. + size_t GetCachePointerOffset(uint32_t index); void EmitParallelMoves(Location from1, Location to1, @@ -304,6 +329,7 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + virtual ParallelMoveResolver* GetMoveResolver() = 0; protected: CodeGenerator(HGraph* graph, @@ -325,6 +351,7 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + is_baseline_(false), graph_(graph), compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), @@ -346,7 +373,6 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; - virtual ParallelMoveResolver* GetMoveResolver() = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -404,6 +430,9 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + // Whether we are using baseline. + bool is_baseline_; + private: void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); @@ -442,11 +471,13 @@ class CallingConvention { CallingConvention(const C* registers, size_t number_of_registers, const F* fpu_registers, - size_t number_of_fpu_registers) + size_t number_of_fpu_registers, + size_t pointer_size) : registers_(registers), number_of_registers_(number_of_registers), fpu_registers_(fpu_registers), - number_of_fpu_registers_(number_of_fpu_registers) {} + number_of_fpu_registers_(number_of_fpu_registers), + pointer_size_(pointer_size) {} size_t GetNumberOfRegisters() const { return number_of_registers_; } size_t GetNumberOfFpuRegisters() const { return number_of_fpu_registers_; } @@ -463,8 +494,8 @@ class CallingConvention { size_t GetStackOffsetOf(size_t index) const { // We still reserve the space for parameters passed by registers. - // Add one for the method pointer. - return (index + 1) * kVRegSize; + // Add space for the method pointer. + return pointer_size_ + index * kVRegSize; } private: @@ -472,6 +503,7 @@ class CallingConvention { const size_t number_of_registers_; const F* fpu_registers_; const size_t number_of_fpu_registers_; + const size_t pointer_size_; DISALLOW_COPY_AND_ASSIGN(CallingConvention); }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 01748a9f5c..2b1131d65f 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -17,13 +17,13 @@ #include "code_generator_arm.h" #include "arch/arm/instruction_set_features_arm.h" +#include "art_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" -#include "mirror/class.h" +#include "mirror/class-inl.h" #include "thread.h" #include "utils/arm/assembler_arm.h" #include "utils/arm/managed_register_arm.h" @@ -112,6 +112,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -489,11 +493,6 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene assembler_(codegen->GetAssembler()), codegen_(codegen) {} -static uint32_t LeastSignificantBit(uint32_t mask) { - // ffs starts at 1. - return ffs(mask) - 1; -} - void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; // Save one extra register for baseline. Note that on thumb2, there is no easy @@ -605,7 +604,7 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -680,7 +679,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return Location(); } -Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1241,13 +1240,9 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation, but this step is not - // run in baseline. So we remove them manually here if we find them. - // TODO: Instead of this local workaround, address this properly. - if (invoke->IsStaticWithExplicitClinitCheck()) { - invoke->RemoveClinitCheckOrLoadClassAsLastInput(); - } + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), codegen_->GetInstructionSetFeatures()); @@ -1273,9 +1268,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) } void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -1292,8 +1287,8 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(R0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -1317,8 +1312,8 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { } Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + - invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -1331,7 +1326,7 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); - uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); @@ -1351,8 +1346,8 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + - (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -1370,7 +1365,7 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); - uint32_t entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value(); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); @@ -3543,8 +3538,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset( kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value()); @@ -3791,12 +3796,12 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); codegen_->LoadCurrentMethod(out); - __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()); + __ LoadFromOffset(kLoadWord, out, out, ArtMethod::DeclaringClassOffset().Int32Value()); } else { DCHECK(cls->CanCallRuntime()); codegen_->LoadCurrentMethod(out); __ LoadFromOffset( - kLoadWord, out, out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value()); + kLoadWord, out, out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM( @@ -3853,7 +3858,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { Register out = load->GetLocations()->Out().AsRegister<Register>(); codegen_->LoadCurrentMethod(out); - __ LoadFromOffset(kLoadWord, out, out, mirror::ArtMethod::DeclaringClassOffset().Int32Value()); + __ LoadFromOffset(kLoadWord, out, out, ArtMethod::DeclaringClassOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); __ cmp(out, ShifterOperand(0)); @@ -4071,23 +4076,33 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset()); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value()); // LR() __ blx(LR); } else { - __ bl(GetFrameEntryLabel()); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, temp, temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // temp = temp[index_in_cache] + __ LoadFromOffset( + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = temp[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); + } else { + __ bl(GetFrameEntryLabel()); + } } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 600903621d..c410fa80ba 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -54,7 +54,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} + kRuntimeParameterFpuRegistersLength, + kArmPointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -72,28 +73,26 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength, kParameterFpuRegisters, - kParameterFpuRegistersLength) {} + kParameterFpuRegistersLength, + kArmPointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() - : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM() {} + virtual ~InvokeDexCallingConventionVisitorARM() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type type); private: InvokeDexCallingConvention calling_convention; - uint32_t gp_index_; - uint32_t float_index_; - uint32_t double_index_; - uint32_t stack_index_; + uint32_t double_index_ = 0; - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM); }; class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { @@ -151,7 +150,7 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index dada4ce5bd..55ef66fa99 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -17,6 +17,7 @@ #include "code_generator_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "art_method.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -24,8 +25,7 @@ #include "intrinsics.h" #include "intrinsics_arm64.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" -#include "mirror/class.h" +#include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" @@ -65,7 +65,6 @@ using helpers::WRegisterFrom; using helpers::XRegisterFrom; using helpers::ARM64EncodableConstantOrRegister; -static constexpr size_t kHeapRefSize = sizeof(mirror::HeapReference<mirror::Object>); static constexpr int kCurrentMethodStackOffset = 0; inline Condition ARM64Condition(IfCondition cond) { @@ -285,6 +284,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -372,15 +375,15 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { #undef __ -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { Location next_location; if (type == Primitive::kPrimVoid) { LOG(FATAL) << "Unreachable type " << type; } if (Primitive::IsFloatingPointType(type) && - (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) { - next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++)); + (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { + next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); } else if (!Primitive::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); @@ -964,7 +967,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { DCHECK(RequiresCurrentMethod()); - DCHECK(current_method.IsW()); + CHECK(current_method.IsX()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); } @@ -1034,8 +1037,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); @@ -1907,8 +1921,8 @@ void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(LocationFrom(x0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -1925,12 +1939,12 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. - Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); - uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + - (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); + Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); Location receiver = invoke->GetLocations()->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); - Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); // The register ip1 is required to be used for the hidden argument in // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. @@ -1942,16 +1956,16 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok // temp = object->GetClass(); if (receiver.IsStackSlot()) { - __ Ldr(temp, StackOperandFrom(receiver)); - __ Ldr(temp, HeapOperand(temp, class_offset)); + __ Ldr(temp.W(), StackOperandFrom(receiver)); + __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { - __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); + __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); - __ Ldr(temp, HeapOperand(temp, method_offset)); + __ Ldr(temp, MemOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); - __ Ldr(lr, HeapOperand(temp, entry_point)); + __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); // lr(); __ Blr(lr); DCHECK(!codegen_->IsLeafMethod()); @@ -1968,13 +1982,9 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation, but this step is not - // run in baseline. So we remove them manually here if we find them. - // TODO: Instead of this local workaround, address this properly. - if (invoke->IsStaticWithExplicitClinitCheck()) { - invoke->RemoveClinitCheckOrLoadClassAsLastInput(); - } + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); if (intrinsic.TryDispatch(invoke)) { @@ -1996,8 +2006,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. DCHECK(temp.Is(kArtMethodRegister)); - size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() + - invoke->GetDexMethodIndex() * kHeapRefSize; + size_t index_in_cache = GetCachePointerOffset(invoke->GetDexMethodIndex()); // TODO: Implement all kinds of calls: // 1) boot -> boot @@ -2006,36 +2015,47 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset())); - // temp = temp[index_in_cache]; - __ Ldr(temp, HeapOperand(temp, index_in_cache)); - // lr = temp->entry_point_from_quick_compiled_code_; - __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64WordSize))); - // lr(); + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ Ldr(temp.X(), MemOperand(tr, invoke->GetStringInitOffset())); + // LR = temp->entry_point_from_quick_compiled_code_; + __ Ldr(lr, MemOperand( + temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); + // lr() __ Blr(lr); } else { - __ Bl(&frame_entry_label_); + // temp = method; + LoadCurrentMethod(temp.X()); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ Ldr(temp.W(), MemOperand(temp.X(), + ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache]; + __ Ldr(temp.X(), MemOperand(temp, index_in_cache)); + // lr = temp->entry_point_from_quick_compiled_code_; + __ Ldr(lr, MemOperand(temp.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64WordSize).Int32Value())); + // lr(); + __ Blr(lr); + } else { + __ Bl(&frame_entry_label_); + } } DCHECK(!IsLeafMethod()); } void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } BlockPoolsScope block_pools(GetVIXLAssembler()); - Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); + Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); codegen_->GenerateStaticOrDirectCall(invoke, temp); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2047,27 +2067,27 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); - Register temp = WRegisterFrom(invoke->GetLocations()->GetTemp(0)); - size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + - invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); + size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); Offset class_offset = mirror::Object::ClassOffset(); - Offset entry_point = mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); + Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); BlockPoolsScope block_pools(GetVIXLAssembler()); // temp = object->GetClass(); if (receiver.IsStackSlot()) { - __ Ldr(temp, MemOperand(sp, receiver.GetStackIndex())); - __ Ldr(temp, HeapOperand(temp, class_offset)); + __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex())); + __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); } else { DCHECK(receiver.IsRegister()); - __ Ldr(temp, HeapOperandFrom(receiver, class_offset)); + __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); - __ Ldr(temp, HeapOperand(temp, method_offset)); + __ Ldr(temp, MemOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); - __ Ldr(lr, HeapOperand(temp, entry_point.SizeValue())); + __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); // lr(); __ Blr(lr); DCHECK(!codegen_->IsLeafMethod()); @@ -2086,12 +2106,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - codegen_->LoadCurrentMethod(out); - __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset())); + codegen_->LoadCurrentMethod(out.X()); + __ Ldr(out, MemOperand(out.X(), ArtMethod::DeclaringClassOffset().Int32Value())); } else { DCHECK(cls->CanCallRuntime()); - codegen_->LoadCurrentMethod(out); - __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DexCacheResolvedTypesOffset())); + codegen_->LoadCurrentMethod(out.X()); + __ Ldr(out, MemOperand(out.X(), ArtMethod::DexCacheResolvedTypesOffset().Int32Value())); __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( @@ -2138,8 +2158,8 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { codegen_->AddSlowPath(slow_path); Register out = OutputRegister(load); - codegen_->LoadCurrentMethod(out); - __ Ldr(out, HeapOperand(out, mirror::ArtMethod::DeclaringClassOffset())); + codegen_->LoadCurrentMethod(out.X()); + __ Ldr(out, MemOperand(out.X(), ArtMethod::DeclaringClassOffset().Int32Value())); __ Ldr(out, HeapOperand(out, mirror::Class::DexCacheStringsOffset())); __ Ldr(out, HeapOperand(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ Cbz(out, slow_path->GetEntryLabel()); @@ -2267,7 +2287,7 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, mirror::ArtMethod*>(); + void*, uint32_t, int32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { @@ -2275,17 +2295,16 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); - Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); - DCHECK(current_method.Is(w2)); - codegen_->LoadCurrentMethod(current_method); + Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimLong); + DCHECK(current_method.Is(x2)); + codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, - void*, uint32_t, int32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2295,7 +2314,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { @@ -2304,14 +2323,14 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) DCHECK(type_index.Is(w0)); Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); DCHECK(current_method.Is(w1)); - codegen_->LoadCurrentMethod(current_method); + codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, instruction->GetDexPc(), nullptr); - CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 5a358671cc..3486cdebec 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -45,7 +45,7 @@ static const vixl::FPRegister kParameterFPRegisters[] = { static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters); const vixl::Register tr = vixl::x18; // Thread Register -static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. +static const vixl::Register kArtMethodRegister = vixl::x0; // Method register on invoke. const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); @@ -94,7 +94,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<vixl::Register, : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} + kRuntimeParameterFpuRegistersLength, + kArm64PointerSize) {} Location GetReturnLocation(Primitive::Type return_type); @@ -108,7 +109,8 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength, kParameterFPRegisters, - kParameterFPRegistersLength) {} + kParameterFPRegistersLength, + kArm64PointerSize) {} Location GetReturnLocation(Primitive::Type return_type) { return ARM64ReturnLocation(return_type); @@ -119,25 +121,20 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM64() {} + virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type return_type) { return calling_convention.GetReturnLocation(return_type); } private: InvokeDexCallingConvention calling_convention; - // The current index for core registers. - uint32_t gp_index_; - // The current index for floating-point registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64); }; class InstructionCodeGeneratorARM64 : public HGraphVisitor { @@ -193,7 +190,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 04999bedb0..60fd29bf74 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -16,6 +16,7 @@ #include "code_generator_x86.h" +#include "art_method.h" #include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -23,8 +24,7 @@ #include "intrinsics.h" #include "intrinsics_x86.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" -#include "mirror/class.h" +#include "mirror/class-inl.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -551,7 +555,7 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -582,7 +586,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -592,7 +596,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -809,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -1194,13 +1197,9 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation, but this step is not - // run in baseline. So we remove them manually here if we find them. - // TODO: Instead of this local workaround, address this properly. - if (invoke->IsStaticWithExplicitClinitCheck()) { - invoke->RemoveClinitCheckOrLoadClassAsLastInput(); - } + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -1220,9 +1219,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) } void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -1242,8 +1241,8 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(EAX)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -1276,8 +1275,8 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + - invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -1293,7 +1292,7 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address( - temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); @@ -1308,8 +1307,8 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + - (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -1329,7 +1328,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) // temp = temp->GetImtEntryAt(method_offset); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86WordSize).Int32Value())); DCHECK(!codegen_->IsLeafMethod()); @@ -2744,17 +2743,12 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); - break; - } + case Primitive::kPrimInt: case Primitive::kPrimLong: { + // Can't have Location::Any() and output SameAsFirstInput() locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2773,6 +2767,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.IsRegister()); Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); @@ -2785,7 +2780,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { __ shrl(first_reg, second_reg); } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); if (op->IsShl()) { __ shll(first_reg, imm); } else if (op->IsShr()) { @@ -2797,14 +2796,29 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2813,6 +2827,34 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 1) { + // This is just an addition. + __ addl(low, low); + __ adcl(high, high); + } else if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), + loc.ToHigh(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToLow(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2824,6 +2866,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ sarl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2835,6 +2898,30 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), + loc.ToLow(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToHigh(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3114,18 +3201,28 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // 3) app -> app // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() __ call(Address( - temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); } else { - __ call(GetFrameEntryLabel()); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, + CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + } else { + __ call(GetFrameEntryLabel()); + } } DCHECK(!IsLeafMethod()); @@ -3904,8 +4001,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ fs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); if (successor == nullptr) { @@ -4171,11 +4279,11 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value())); } else { DCHECK(cls->CanCallRuntime()); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( @@ -4230,7 +4338,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { Register out = load->GetLocations()->Out().AsRegister<Register>(); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value())); __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ testl(out, out); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8bd3cd3585..43214fe7d5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -52,7 +52,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmReg : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} + kRuntimeParameterFpuRegistersLength, + kX86PointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -64,7 +65,8 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste kParameterCoreRegisters, kParameterCoreRegistersLength, kParameterFpuRegisters, - kParameterFpuRegistersLength) {} + kParameterFpuRegistersLength, + kX86PointerSize) {} RegisterPair GetRegisterPairAt(size_t argument_index) { DCHECK_LT(argument_index + 1, GetNumberOfRegisters()); @@ -75,22 +77,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86() {} + virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); }; class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { @@ -137,7 +134,7 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; @@ -171,6 +168,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5ce932928b..b0174b9b16 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -16,14 +16,14 @@ #include "code_generator_x86_64.h" +#include "art_method.h" #include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" -#include "mirror/class.h" +#include "mirror/class-inl.h" #include "mirror/object_reference.h" #include "thread.h" #include "utils/assembler.h" @@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { if (is_div_) { __ negq(cpu_reg_); } else { - __ movq(cpu_reg_, Immediate(0)); + __ xorl(cpu_reg_, cpu_reg_); } } __ jmp(GetExitLabel()); @@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -366,18 +370,27 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); } else { - __ call(&frame_entry_label_); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movq(temp, Address( + temp, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + } else { + __ call(&frame_entry_label_); + } } DCHECK(!IsLeafMethod()); @@ -533,7 +546,7 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } } - __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); } void CodeGeneratorX86_64::GenerateFrameExit() { @@ -573,7 +586,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { DCHECK(RequiresCurrentMethod()); - __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { @@ -663,7 +676,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(constant->IsLongConstant()); value = constant->AsLongConstant()->GetValue(); } - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); @@ -696,9 +709,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, } else if (const_to_move->IsLongConstant()) { int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegister()) { - __ movq(location.AsRegister<CpuRegister>(), Immediate(value)); + Load64BitValue(location.AsRegister<CpuRegister>(), value); } else if (location.IsDoubleStackSlot()) { - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(location.IsConstant()); @@ -763,7 +776,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -948,7 +960,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = comp->GetLocations(); CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. - __ xorq(reg, reg); + __ xorl(reg, reg); Location lhs = locations->InAt(0); Location rhs = locations->InAt(1); if (rhs.IsRegister()) { @@ -1232,7 +1244,7 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { codegen_->GenerateFrameExit(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1262,7 +1274,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1272,7 +1284,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1289,13 +1301,9 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation, but this step is not - // run in baseline. So we remove them manually here if we find them. - // TODO: Instead of this local workaround, address this properly. - if (invoke->IsStaticWithExplicitClinitCheck()) { - invoke->RemoveClinitCheckOrLoadClassAsLastInput(); - } + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -1315,9 +1323,9 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg } void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - // Explicit clinit checks triggered by static invokes must have been - // pruned by art::PrepareForRegisterAllocation. - DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; @@ -1334,8 +1342,8 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(RDI)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -1376,8 +1384,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + - invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); + size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( + invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); @@ -1390,9 +1398,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); - __ movl(temp, Address(temp, method_offset)); + __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); @@ -1408,15 +1416,15 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - uint32_t method_offset = mirror::Class::EmbeddedImTableOffset().Uint32Value() + - (invoke->GetImtIndex() % mirror::Class::kImtSize) * sizeof(mirror::Class::ImTableEntry); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // Set the hidden argument. - __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(), - Immediate(invoke->GetDexMethodIndex())); + CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); // temp = object->GetClass(); if (receiver.IsStackSlot()) { @@ -1427,9 +1435,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo } codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetImtEntryAt(method_offset); - __ movl(temp, Address(temp, method_offset)); + __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); DCHECK(!codegen_->IsLeafMethod()); @@ -1852,7 +1860,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-float(output) __ cvtsi2ss(temp, output, true); // if input >= temp goto done @@ -1865,7 +1873,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -1877,7 +1885,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-double(output) __ cvtsi2sd(temp, output, true); // if input >= temp goto done @@ -1890,7 +1898,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -2479,7 +2487,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr case Primitive::kPrimLong: { if (instruction->IsRem()) { - __ xorq(output_register, output_register); + __ xorl(output_register, output_register); } else { __ movq(output_register, input_register); if (imm == -1) { @@ -2523,7 +2531,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - __ movq(rdx, Immediate(std::abs(imm) - 1)); + codegen_->Load64BitValue(rdx, std::abs(imm) - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -2620,7 +2628,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat __ movq(numerator, rax); // RAX = magic - __ movq(rax, Immediate(magic)); + codegen_->Load64BitValue(rax, magic); // RDX:RAX = magic * numerator __ imulq(numerator); @@ -2649,8 +2657,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat if (IsInt<32>(imm)) { __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); } else { - __ movq(numerator, Immediate(imm)); - __ imulq(rdx, numerator); + __ imulq(rdx, codegen_->LiteralInt64Address(imm)); } __ subq(rax, rdx); @@ -3016,8 +3023,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); - + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3038,7 +3045,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3860,8 +3868,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86_64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ gs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); if (successor == nullptr) { @@ -3934,45 +3953,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); if (destination.IsRegister()) { - __ movq(destination.AsRegister<CpuRegister>(), Immediate(value)); + codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), Immediate(value)); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); int32_t value = bit_cast<int32_t, float>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { // easy FP 0.0. __ xorps(dest, dest); } else { - __ movl(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movss(dest, codegen_->LiteralFloatAddress(fp_value)); } } else { DCHECK(destination.IsStackSlot()) << destination; + Immediate imm(value); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); double fp_value = constant->AsDoubleConstant()->GetValue(); int64_t value = bit_cast<int64_t, double>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { __ xorpd(dest, dest); } else { - __ movq(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value)); } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), imm); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } @@ -4110,11 +4126,11 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value())); } else { DCHECK(cls->CanCallRuntime()); codegen_->LoadCurrentMethod(out); - __ movl(out, Address(out, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DexCacheResolvedTypesOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()))); SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); @@ -4159,7 +4175,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { CpuRegister out = load->GetLocations()->Out().AsRegister<CpuRegister>(); codegen_->LoadCurrentMethod(CpuRegister(out)); - __ movl(out, Address(out, mirror::ArtMethod::DeclaringClassOffset().Int32Value())); + __ movl(out, Address(out, ArtMethod::DeclaringClassOffset().Int32Value())); __ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value())); __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex()))); __ testl(out, out); @@ -4431,6 +4447,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { + if (value == 0) { + __ xorl(dest, dest); + } else if (value > 0 && IsInt<32>(value)) { + // We can use a 32 bit move, as it will zero-extend and is one byte shorter. + __ movl(dest, Immediate(static_cast<int32_t>(value))); + } else { + __ movq(dest, Immediate(value)); + } +} + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 6cdc82262c..4be401a0fa 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -37,7 +37,7 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); -static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; +static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; @@ -50,7 +50,8 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FloatR : CallingConvention(kRuntimeParameterCoreRegisters, kRuntimeParameterCoreRegistersLength, kRuntimeParameterFpuRegisters, - kRuntimeParameterFpuRegistersLength) {} + kRuntimeParameterFpuRegistersLength, + kX86_64PointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -62,28 +63,24 @@ class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegis kParameterCoreRegisters, kParameterCoreRegistersLength, kParameterFloatRegisters, - kParameterFloatRegistersLength) {} + kParameterFloatRegistersLength, + kX86_64PointerSize) {} private: DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86_64() {} + virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); }; class CodeGeneratorX86_64; @@ -147,7 +144,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; @@ -287,6 +284,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { Address LiteralInt32Address(int32_t v); Address LiteralInt64Address(int64_t v); + // Load a 64 bit value into a register in the most efficient manner. + void Load64BitValue(CpuRegister dest, int64_t value); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 94f56e5d3e..bfed1a89de 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph, static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph, Primitive::kPrimLong); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); @@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 91cd60acce..6fbe75e802 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -47,6 +47,12 @@ static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { } } +static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) { + for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) { + set->SetBit(it.Current()->GetHeader()->GetBlockId()); + } +} + void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, @@ -58,17 +64,26 @@ void HDeadCodeElimination::RemoveDeadBlocks() { // Classify blocks as reachable/unreachable. ArenaAllocator* allocator = graph_->GetArena(); ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false); + ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false); + MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); - // Remove all dead blocks. Process blocks in post-order, because removal needs - // the block's chain of dominators. + // Remove all dead blocks. Iterate in post order because removal needs the + // block's chain of dominators and nested loops need to be updated from the + // inside out. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); - if (live_blocks.IsBitSet(block->GetBlockId())) { - continue; + int id = block->GetBlockId(); + if (live_blocks.IsBitSet(id)) { + if (affected_loops.IsBitSet(id)) { + DCHECK(block->IsLoopHeader()); + block->GetLoopInformation()->Update(); + } + } else { + MaybeRecordDeadBlock(block); + MarkLoopHeadersContaining(*block, &affected_loops); + block->DisconnectAndDelete(); } - MaybeRecordDeadBlock(block); - block->DisconnectAndDelete(); } // Connect successive blocks created by dead branches. Order does not matter. diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 0bea0fc1c2..59a57c4345 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -31,13 +31,13 @@ class HDeadCodeElimination : public HOptimization { public: HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats = nullptr, - const char* name = kDeadCodeEliminationPassName) + const char* name = kInitialDeadCodeEliminationPassName) : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; - static constexpr const char* kDeadCodeEliminationPassName = - "dead_code_elimination"; + static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination"; + static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final"; private: void MaybeRecordDeadBlock(HBasicBlock* block); diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 61a7697301..78ae1dd960 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -27,7 +27,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 2bfecc696a..29aa97a83a 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -28,7 +28,7 @@ namespace art { static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) { TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header - const int blocks2[] = {2, 3, 4, 5, 8}; - TestBlock(graph, 2, true, 2, blocks2, 5); // loop header + const int blocks2[] = {2, 3, 4, 5}; + TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header TestBlock(graph, 3, false, 2); // block in loop - TestBlock(graph, 4, false, 2); // original back edge - TestBlock(graph, 5, false, 2); // original back edge + TestBlock(graph, 4, false, 2); // back edge + TestBlock(graph, 5, false, 2); // back edge TestBlock(graph, 6, false, -1); // return block TestBlock(graph, 7, false, -1); // exit block - TestBlock(graph, 8, false, 2); // synthesized back edge } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index dc3124b35f..fd28f0b83f 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -170,7 +170,8 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure the uses of `instruction` are defined in a block of the graph. + // Ensure the uses of `instruction` are defined in a block of the graph, + // and the entry in the use list is consistent. for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); @@ -184,6 +185,27 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { use->GetId(), instruction->GetId())); } + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { + AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + "UseListNode index.", + use->DebugName(), + use->GetId(), + instruction->GetId())); + } + } + + // Ensure the environment uses entries are consistent. + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); use_it.Advance()) { + HEnvironment* use = use_it.Current()->GetUser(); + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) { + AddError(StringPrintf("Environment user of %s:%d has a wrong " + "UseListNode index.", + instruction->DebugName(), + instruction->GetId())); + } } // Ensure 'instruction' has pointers to its inputs' use entries. @@ -191,7 +213,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); HInstruction* input = input_record.GetInstruction(); HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); - if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + size_t use_index = use_node->GetIndex(); + if ((use_node == nullptr) + || !input->GetUses().Contains(use_node) + || (use_index >= e) + || (use_index != i)) { AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " "at input %u (%s:%d).", instruction->DebugName(), @@ -262,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); + HLoopInformation* loop_information = loop_header->GetLoopInformation(); // Ensure the pre-header block is first in the list of // predecessors of a loop header. @@ -271,57 +298,48 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } - // Ensure the loop header has only two predecessors and that only the - // second one is a back edge. + // Ensure the loop header has only one incoming branch and the remaining + // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().Size(); if (num_preds < 2) { AddError(StringPrintf( "Loop header %d has less than two predecessors: %zu.", id, num_preds)); - } else if (num_preds > 2) { - AddError(StringPrintf( - "Loop header %d has more than two predecessors: %zu.", - id, - num_preds)); } else { - HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(*first_predecessor)) { AddError(StringPrintf( "First predecessor of loop header %d is a back edge.", id)); } - HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); - if (!loop_information->IsBackEdge(*second_predecessor)) { - AddError(StringPrintf( - "Second predecessor of loop header %d is not a back edge.", - id)); + for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i); + if (!loop_information->IsBackEdge(*predecessor)) { + AddError(StringPrintf( + "Loop header %d has multiple incoming (non back edge) blocks.", + id)); + } } } - const ArenaBitVector& loop_blocks = loop_header->GetLoopInformation()->GetBlocks(); + const ArenaBitVector& loop_blocks = loop_information->GetBlocks(); - // Ensure there is only one back edge per loop. - size_t num_back_edges = - loop_header->GetLoopInformation()->GetBackEdges().Size(); + // Ensure back edges belong to the loop. + size_t num_back_edges = loop_information->GetBackEdges().Size(); if (num_back_edges == 0) { AddError(StringPrintf( "Loop defined by header %d has no back edge.", id)); - } else if (num_back_edges > 1) { - AddError(StringPrintf( - "Loop defined by header %d has several back edges: %zu.", - id, - num_back_edges)); } else { - DCHECK_EQ(num_back_edges, 1u); - int back_edge_id = loop_header->GetLoopInformation()->GetBackEdges().Get(0)->GetBlockId(); - if (!loop_blocks.IsBitSet(back_edge_id)) { - AddError(StringPrintf( - "Loop defined by header %d has an invalid back edge %d.", - id, - back_edge_id)); + for (size_t i = 0; i < num_back_edges; ++i) { + int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } } } @@ -368,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { // Ensure an instruction having an environment is dominated by the // instructions contained in the environment. - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* env_instruction = environment->GetInstructionAt(i); if (env_instruction != nullptr diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 923468ff16..eca0d9344f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -30,7 +30,7 @@ namespace art { * 1: Exit */ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); entry_block->AddInstruction(new (allocator) HGoto()); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 50398b4790..59d50926ad 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_true = createGotoBlock(graph, &allocator); @@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_false = createGotoBlock(graph, &allocator); @@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* block = createGotoBlock(graph, &allocator); HInstruction* got = block->GetLastInstruction(); ASSERT_TRUE(got->IsControlFlow()); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index ca9cbc3d01..f5c630bf97 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -17,6 +17,7 @@ #include "graph_visualizer.h" #include "code_generator.h" +#include "dead_code_elimination.h" #include "licm.h" #include "nodes.h" #include "optimization.h" @@ -211,17 +212,22 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << "]"; } if (instruction->HasEnvironment()) { - HEnvironment* env = instruction->GetEnvironment(); - output_ << " (env: [ "; - for (size_t i = 0, e = env->Size(); i < e; ++i) { - HInstruction* insn = env->GetInstructionAt(i); - if (insn != nullptr) { - output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; - } else { - output_ << " _ "; + output_ << " (env:"; + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + output_ << " [ "; + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* insn = environment->GetInstructionAt(i); + if (insn != nullptr) { + output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; + } else { + output_ << " _ "; + } } + output_ << "]"; } - output_ << "])"; + output_ << ")"; } if (IsPass(SsaLivenessAnalysis::kLivenessPassName) && is_after_pass_ @@ -248,7 +254,8 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } } output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; - } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) { + } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName) + || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) { output_ << " ( loop_header:"; HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index a81d49aa0c..c3ce7e142a 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ada32db047..e51732396d 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -16,6 +16,7 @@ #include "inliner.h" +#include "art_method-inl.h" #include "builder.h" #include "class_linker.h" #include "constant_folding.h" @@ -23,7 +24,6 @@ #include "driver/compiler_driver-inl.h" #include "driver/dex_compilation_unit.h" #include "instruction_simplifier.h" -#include "mirror/art_method-inl.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" @@ -81,11 +81,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, hs.NewHandle(caller_compilation_unit_.GetClassLinker()->FindDexCache(caller_dex_file))); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( soa.Decode<mirror::ClassLoader*>(caller_compilation_unit_.GetClassLoader()))); - Handle<mirror::ArtMethod> resolved_method(hs.NewHandle( - compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type))); + ArtMethod* resolved_method(compiler_driver_->ResolveMethod( + soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type)); - if (resolved_method.Get() == nullptr) { + if (resolved_method == nullptr) { VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file); return false; } @@ -141,7 +140,6 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, } if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) { - resolved_method->SetShouldNotInline(); return false; } @@ -150,7 +148,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return true; } -bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, +bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, uint32_t method_index, bool can_use_dex_cache) const { @@ -170,7 +168,12 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, nullptr); HGraph* callee_graph = new (graph_->GetArena()) HGraph( - graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); + graph_->GetArena(), + caller_dex_file, + method_index, + compiler_driver_->GetInstructionSet(), + graph_->IsDebuggable(), + graph_->GetCurrentInstructionId()); OptimizingCompilerStats inline_stats; HGraphBuilder builder(callee_graph, @@ -183,6 +186,9 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, if (!builder.BuildGraph(*code_item)) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be built, so cannot be inlined"; + // There could be multiple reasons why the graph could not be built, including + // unaccessible methods/fields due to using a different dex cache. We do not mark + // the method as non-inlineable so that other callers can still try to inline it. return false; } @@ -190,12 +196,14 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, compiler_driver_->GetInstructionSet())) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " cannot be inlined because of the register allocator"; + resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be transformed to SSA"; + resolved_method->SetShouldNotInline(); return false; } @@ -232,6 +240,7 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, if (block->IsLoopHeader()) { VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because it contains a loop"; + resolved_method->SetShouldNotInline(); return false; } @@ -261,6 +270,8 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; + // Do not flag the method as not-inlineable. A caller within the same + // dex file could still successfully inline it. return false; } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 1dbc7d392b..831bdf22a0 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -48,7 +48,7 @@ class HInliner : public HOptimization { private: bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const; - bool TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, + bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, uint32_t method_index, bool can_use_dex_cache) const; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2df7c166d8..46fad17b8f 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && input_cst->IsZero()) { - // Replace code looking like - // SHL dst, src, 0 - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); + if (input_cst != nullptr) { + if (input_cst->IsZero()) { + // Replace code looking like + // SHL dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->IsShl() && input_cst->IsOne()) { + // Replace Shl looking like + // SHL dst, src, 1 + // with + // ADD dst, src, src + HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), + input_other, + input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } } } @@ -377,15 +389,42 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { return; } - if ((input_cst != nullptr) && input_cst->IsMinusOne() && - (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + if ((input_cst != nullptr) && input_cst->IsMinusOne()) { // Replace code looking like // DIV dst, src, -1 // with // NEG dst, src instruction->GetBlock()->ReplaceAndRemoveInstructionWith( - instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other))); + instruction, new (GetGraph()->GetArena()) HNeg(type, input_other)); RecordSimplification(); + return; + } + + if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) { + // Try replacing code looking like + // DIV dst, src, constant + // with + // MUL dst, src, 1 / constant + HConstant* reciprocal = nullptr; + if (type == Primitive::Primitive::kPrimDouble) { + double value = input_cst->AsDoubleConstant()->GetValue(); + if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) { + reciprocal = GetGraph()->GetDoubleConstant(1.0 / value); + } + } else { + DCHECK_EQ(type, Primitive::kPrimFloat); + float value = input_cst->AsFloatConstant()->GetValue(); + if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) { + reciprocal = GetGraph()->GetFloatConstant(1.0f / value); + } + } + + if (reciprocal != nullptr) { + instruction->GetBlock()->ReplaceAndRemoveInstructionWith( + instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal)); + RecordSimplification(); + return; + } } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 20aa45f197..9e18f114ad 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -22,6 +22,7 @@ #include "invoke_type.h" #include "nodes.h" #include "quick/inline_method_analyser.h" +#include "utils.h" namespace art { @@ -186,6 +187,8 @@ static Intrinsics GetIntrinsic(InlineMethod method) { return Intrinsics::kStringCharAt; case kIntrinsicCompareTo: return Intrinsics::kStringCompareTo; + case kIntrinsicGetCharsNoCheck: + return Intrinsics::kStringGetCharsNoCheck; case kIntrinsicIsEmptyOrLength: // The inliner can handle these two cases - and this is the preferred approach // since after inlining the call is no longer visible (as opposed to waiting @@ -194,6 +197,12 @@ static Intrinsics GetIntrinsic(InlineMethod method) { case kIntrinsicIndexOf: return ((method.d.data & kIntrinsicFlagBase0) == 0) ? Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; + case kIntrinsicNewStringFromBytes: + return Intrinsics::kStringNewStringFromBytes; + case kIntrinsicNewStringFromChars: + return Intrinsics::kStringNewStringFromChars; + case kIntrinsicNewStringFromString: + return Intrinsics::kStringNewStringFromString; case kIntrinsicCas: switch (GetType(method.d.data, false)) { @@ -280,6 +289,11 @@ static Intrinsics GetIntrinsic(InlineMethod method) { case kInlineOpIPut: return Intrinsics::kNone; + // String init cases, not intrinsics. + + case kInlineStringInit: + return Intrinsics::kNone; + // No default case to make the compiler warn on missing cases. } return Intrinsics::kNone; @@ -361,4 +375,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) } } // namespace art - diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index dbb7cbaa98..c243ef3f8b 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,8 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "code_generator.h" #include "nodes.h" #include "optimization.h" +#include "parallel_move_resolver.h" namespace art { @@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + static void MoveArguments(HInvoke* invoke, + CodeGenerator* codegen, + InvokeDexCallingConventionVisitor* calling_convention_visitor) { + if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + } + + if (invoke->GetNumberOfArguments() == 0) { + // No argument to move. + return; + } + + LocationSummary* locations = invoke->GetLocations(); + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index abdf04ebb1..db35b8f767 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -17,11 +17,11 @@ #include "intrinsics_arm.h" #include "arch/arm/instruction_set_features_arm.h" +#include "art_method.h" #include "code_generator_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" #include "mirror/string.h" #include "thread.h" #include "utils/arm/assembler_arm.h" @@ -48,7 +48,7 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener DCHECK_NE(type, Primitive::kPrimVoid); - if (Primitive::IsIntegralType(type)) { + if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { if (type == Primitive::kPrimLong) { Register trg_reg_lo = trg.AsRegisterPairLow<Register>(); Register trg_reg_hi = trg.AsRegisterPairHigh<Register>(); @@ -77,28 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { - if (invoke->InputCount() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -117,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); @@ -810,10 +791,6 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { const MemberOffset value_offset = mirror::String::ValueOffset(); // Location of count const MemberOffset count_offset = mirror::String::CountOffset(); - // Starting offset within data array - const MemberOffset offset_offset = mirror::String::OffsetOffset(); - // Start of char data with array_ - const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. @@ -835,15 +812,10 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { __ cmp(idx, ShifterOperand(temp)); __ b(slow_path->GetEntryLabel(), CS); - // Index computation. - __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset. - __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset. - __ add(temp, temp, ShifterOperand(idx)); - DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. - __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2)); + __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value())); // array_temp := str.value. // Load the value. - __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp]. + __ ldrh(out, Address(array_temp, idx, LSL, 1)); // out := array_temp[idx]. __ Bind(slow_path->GetExitLabel()); } @@ -878,6 +850,169 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + ArmAssembler* assembler, + CodeGeneratorARM* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = locations->GetTemp(0).AsRegister<Register>(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = locations->InAt(1).AsRegister<Register>(); + __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max()); + __ cmp(char_reg, ShifterOperand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), HI); + } + + if (start_at_zero) { + DCHECK_EQ(tmp_reg, R2); + // Start-index = 0. + __ LoadImmediate(tmp_reg, 0); + } + + __ LoadFromOffset(kLoadWord, LR, TR, + QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value()); + __ blx(LR); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare, and need to send start-index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = locations->InAt(0).AsRegister<Register>(); + __ cmp(byte_array, ShifterOperand(0)); + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), EQ); + + __ LoadFromOffset( + kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + + __ LoadFromOffset( + kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = locations->InAt(0).AsRegister<Register>(); + __ cmp(string_to_copy, ShifterOperand(0)); + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), EQ); + + __ LoadFromOffset(kLoadWord, + LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -904,9 +1039,8 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) } // namespace arm } // namespace art diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 7a753b2da9..957373f6f9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -17,12 +17,12 @@ #include "intrinsics_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "art_method.h" #include "code_generator_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" #include "mirror/string.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" @@ -75,7 +75,7 @@ static void MoveFromReturnRegister(Location trg, DCHECK_NE(type, Primitive::kPrimVoid); - if (Primitive::IsIntegralType(type)) { + if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { Register trg_reg = RegisterFrom(trg, type); Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); __ Mov(trg_reg, res_reg, kDiscardForSameWReg); @@ -86,28 +86,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { - if (invoke->InputCount() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -126,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); @@ -953,10 +934,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { const MemberOffset value_offset = mirror::String::ValueOffset(); // Location of count const MemberOffset count_offset = mirror::String::CountOffset(); - // Starting offset within data array - const MemberOffset offset_offset = mirror::String::OffsetOffset(); - // Start of char data with array_ - const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); Register obj = WRegisterFrom(locations->InAt(0)); // String object pointer. Register idx = WRegisterFrom(locations->InAt(1)); // Index of character. @@ -979,21 +956,15 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { __ Cmp(idx, temp); __ B(hs, slow_path->GetEntryLabel()); - // Index computation. - __ Ldr(temp, HeapOperand(obj, offset_offset)); // temp := str.offset. - __ Ldr(array_temp, HeapOperand(obj, value_offset)); // array_temp := str.offset. - __ Add(temp, temp, idx); - DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. - __ Add(temp, temp, Operand(data_offset.Int32Value() / 2)); + __ Add(array_temp, obj, Operand(value_offset.Int32Value())); // array_temp := str.value. // Load the value. - __ Ldrh(out, MemOperand(array_temp.X(), temp, UXTW, 1)); // out := array_temp[temp]. + __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1)); // out := array_temp[idx]. __ Bind(slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { - // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, kIntrinsified); @@ -1022,6 +993,169 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + vixl::MacroAssembler* masm, + CodeGeneratorARM64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = WRegisterFrom(locations->InAt(1)); + __ Mov(tmp_reg, 0xFFFF); + __ Cmp(char_reg, Operand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(hi, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + // Start-index = 0. + __ Mov(tmp_reg, 0); + } + + __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value())); + __ Blr(lr); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare, and need to send start_index=0. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = WRegisterFrom(locations->InAt(0)); + __ Cmp(byte_array, 0); + SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = WRegisterFrom(locations->InAt(0)); + __ Cmp(string_to_copy, 0); + SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1031,9 +1165,8 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED } UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 10f6e1d6c7..2c9248f52c 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -60,8 +60,12 @@ V(MemoryPokeShortNative, kStatic) \ V(StringCharAt, kDirect) \ V(StringCompareTo, kDirect) \ + V(StringGetCharsNoCheck, kDirect) \ V(StringIndexOf, kDirect) \ V(StringIndexOfAfter, kDirect) \ + V(StringNewStringFromBytes, kStatic) \ + V(StringNewStringFromChars, kStatic) \ + V(StringNewStringFromString, kStatic) \ V(UnsafeCASInt, kDirect) \ V(UnsafeCASLong, kDirect) \ V(UnsafeCASObject, kDirect) \ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 7275edb695..989dd0df30 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,12 +16,14 @@ #include "intrinsics_x86.h" +#include <limits> + #include "arch/x86/instruction_set_features_x86.h" +#include "art_method.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" #include "mirror/string.h" #include "thread.h" #include "utils/x86/assembler_x86.h" @@ -111,28 +113,9 @@ static void MoveFromReturnRegister(Location target, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { - if (invoke->InputCount() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -143,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX // restored! class IntrinsicSlowPathX86 : public SlowPathCodeX86 { public: - explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp) - : invoke_(invoke) { - // The temporary register has to be EAX for x86 invokes. - DCHECK_EQ(temp, EAX); - } + explicit IntrinsicSlowPathX86(HInvoke* invoke) + : invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in); @@ -155,7 +135,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); @@ -749,7 +729,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); @@ -899,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { @@ -910,23 +888,17 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Starting offset within data array - const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); - // Start of char data with array_ - const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); Register obj = locations->InAt(0).AsRegister<Register>(); Register idx = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); - Location temp_loc = locations->GetTemp(0); - Register temp = temp_loc.AsRegister<Register>(); // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth // the cost. // TODO: For simplicity, the index parameter is requested in a register, so different from Quick // we will not optimize the code for constants (which would save a register). - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); X86Assembler* assembler = GetAssembler(); @@ -935,12 +907,8 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { codegen_->MaybeRecordImplicitNullCheck(invoke); __ j(kAboveEqual, slow_path->GetEntryLabel()); - // Get the actual element. - __ movl(temp, idx); // temp := idx. - __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. - __ movl(out, Address(obj, value_offset)); // obj := obj.array. - // out = out[2*temp]. - __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + // out = out[2*idx]. + __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); __ Bind(slow_path->GetExitLabel()); } @@ -954,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(EAX)); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { @@ -967,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -976,6 +941,227 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in EDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(EDI)); + // If we look for a constant char, we'll still have to copy it into EAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber EAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber EDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses ECX as the counter. + locations->AddTemp(Location::RegisterLocation(ECX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86Assembler* assembler, + CodeGeneratorX86* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + Register string_obj = locations->InAt(0).AsRegister<Register>(); + Register search_value = locations->InAt(1).AsRegister<Register>(); + Register counter = locations->GetTemp(0).AsRegister<Register>(); + Register string_length = locations->GetTemp(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj, EDI); + DCHECK_EQ(search_value, EAX); + DCHECK_EQ(counter, ECX); + DCHECK_EQ(out, EDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a zero-length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addl(string_obj, Immediate(value_offset)); + } else { + Register start_index = locations->InAt(2).AsRegister<Register>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmovl(kGreater, counter, start_index); + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx (the repne scasw work counter). We have string.length - start_index left to + // compare. + __ negl(counter); + __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in EDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = locations->InAt(0).AsRegister<Register>(); + __ testl(byte_array, byte_array); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = locations->InAt(0).AsRegister<Register>(); + __ testl(string_to_copy, string_to_copy); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { Register address = locations->InAt(0).AsRegisterPairLow<Register>(); Location out_loc = locations->Out(); @@ -1536,8 +1722,7 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) } UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 35daaf60bb..c245cb646f 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,12 +16,14 @@ #include "intrinsics_x86_64.h" +#include <limits> + #include "arch/x86_64/instruction_set_features_x86_64.h" +#include "art_method-inl.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "intrinsics.h" #include "mirror/array-inl.h" -#include "mirror/art_method.h" #include "mirror/string.h" #include "thread.h" #include "utils/x86_64/assembler_x86_64.h" @@ -103,28 +105,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { - if (invoke->InputCount() == 0) { - // No argument to move. - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -143,7 +126,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -623,7 +606,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -802,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&nan); // output = 0 - __ xorq(out, out); + __ xorl(out, out); __ Bind(&done); } @@ -824,16 +807,10 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Starting offset within data array - const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); - // Start of char data with array_ - const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - Location temp_loc = locations->GetTemp(0); - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth // the cost. @@ -849,12 +826,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { codegen_->MaybeRecordImplicitNullCheck(invoke); __ j(kAboveEqual, slow_path->GetEntryLabel()); - // Get the actual element. - __ movl(temp, idx); // temp := idx. - __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. - __ movl(out, Address(obj, value_offset)); // obj := obj.array. - // out = out[2*temp]. - __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + // out = out[2*idx]. + __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); __ Bind(slow_path->GetExitLabel()); } @@ -887,6 +860,229 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in RDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(RDI)); + // If we look for a constant char, we'll still have to copy it into RAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber RAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(RAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber RDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses RCX as the counter. + locations->AddTemp(Location::RegisterLocation(RCX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj.AsRegister(), RDI); + DCHECK_EQ(search_value.AsRegister(), RAX); + DCHECK_EQ(counter.AsRegister(), RCX); + DCHECK_EQ(out.AsRegister(), RDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86_64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addq(string_obj, Immediate(value_offset)); + } else { + CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough. + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx, the work counter: it's gonna be string.length - start_index. + __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. + __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in RDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); + __ testl(byte_array, byte_array); + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); + __ testl(string_to_copy, string_to_copy); + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. @@ -1390,8 +1586,7 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index bf9b8e59c5..2535ea274a 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { } } - if (instruction->HasEnvironment()) { - HEnvironment* environment = instruction->GetEnvironment(); + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* input = environment->GetInstructionAt(i); if (input != nullptr) { @@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { * If `environment` has a loop header phi, we replace it with its first input. */ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) { - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* input = environment->GetInstructionAt(i); - if (input != nullptr && IsPhiOf(input, info->GetHeader())) { - environment->RemoveAsUserOfInput(i); - HInstruction* incoming = input->InputAt(0); - environment->SetRawEnvAt(i, incoming); - incoming->AddEnvUseAt(environment, i); + for (; environment != nullptr; environment = environment->GetParent()) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* input = environment->GetInstructionAt(i); + if (input != nullptr && IsPhiOf(input, info->GetHeader())) { + environment->RemoveAsUserOfInput(i); + HInstruction* incoming = input->InputAt(0); + environment->SetRawEnvAt(i, incoming); + incoming->AddEnvUseAt(environment, i); + } } } } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 7818c606db..4f259b5095 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -39,7 +39,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 52367730ed..7cb00a1923 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -32,7 +32,7 @@ namespace art { static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 8a96ee9ace..9d7d0b6c67 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector, static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, phi in block 8) + // (constant0, constant4, constant5, phi in block 2) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01110)\n" - " kill: (00010)\n" + " live in: (0110)\n" + " live out: (0111)\n" + " kill: (0001)\n" "Block 3\n" - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 4\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 5\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 4\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 5\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00010)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" - "Block 8\n" // synthesized back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00001)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index a1ae67009e..42aba04828 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), - environment_(instruction->GetBlock()->GetGraph()->GetArena(), - instruction->EnvironmentSize()), output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), @@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); } - environment_.SetSize(instruction->EnvironmentSize()); - for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { - environment_.Put(i, Location()); - } instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index c3a99150c4..09bbb33042 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return temps_.Size(); } - void SetEnvironmentAt(uint32_t at, Location location) { - environment_.Put(at, location); - } - - Location GetEnvironmentAt(uint32_t at) const { - return environment_.Get(at); - } - Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } @@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; - GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index c158ddf4ee..2bad68217b 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -17,6 +17,8 @@ #include "nodes.h" #include "ssa_builder.h" +#include "base/bit_vector-inl.h" +#include "base/bit_utils.h" #include "utils/growable_array.h" #include "scoped_thread_state_change.h" @@ -37,8 +39,9 @@ static void RemoveAsUser(HInstruction* instruction) { instruction->RemoveAsUserOfInput(i); } - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { if (environment->GetInstructionAt(i) != nullptr) { environment->RemoveAsUserOfInput(i); @@ -191,24 +194,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); - // If there are more than one back edge, make them branch to the same block that - // will become the only back edge. This simplifies finding natural loops in the - // graph. - // Also, if the loop is a do/while (that is the back edge is an if), change the - // back edge to be a goto. This simplifies code generation of suspend cheks. - if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(new_back_edge); - new_back_edge->AddInstruction(new (arena_) HGoto()); - for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { - HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - back_edge->ReplaceSuccessor(header, new_back_edge); - } - info->ClearBackEdges(); - info->AddBackEdge(new_back_edge); - new_back_edge->AddSuccessor(header); - } - // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the // loop. @@ -218,11 +203,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); - ArenaBitVector back_edges(arena_, GetBlocks().Size(), false); - HBasicBlock* back_edge = info->GetBackEdges().Get(0); for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); - if (predecessor != back_edge) { + if (!info->IsBackEdge(*predecessor)) { predecessor->ReplaceSuccessor(header, pre_header); pred--; } @@ -230,9 +213,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the second predecessor of a loop header is the back edge. - if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { - header->SwapPredecessors(); + // Make sure the first predecessor of a loop header is the incoming block. + if (info->IsBackEdge(*header->GetPredecessors().Get(0))) { + HBasicBlock* to_swap = header->GetPredecessors().Get(0); + for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors().Get(pred); + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_.Put(pred, to_swap); + header->predecessors_.Put(0, predecessor); + break; + } + } } // Place the suspend check at the beginning of the header, so that live registers @@ -303,25 +294,6 @@ HNullConstant* HGraph::GetNullConstant() { return cached_null_constant_; } -template <class InstructionType, typename ValueType> -InstructionType* HGraph::CreateConstant(ValueType value, - ArenaSafeMap<ValueType, InstructionType*>* cache) { - // Try to find an existing constant of the given value. - InstructionType* constant = nullptr; - auto cached_constant = cache->find(value); - if (cached_constant != cache->end()) { - constant = cached_constant->second; - } - - // If not found or previously deleted, create and cache a new instruction. - if (constant == nullptr || constant->GetBlock() == nullptr) { - constant = new (arena_) InstructionType(value); - cache->Overwrite(value, constant); - InsertConstant(constant); - } - return constant; -} - HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) { switch (type) { case Primitive::Type::kPrimBoolean: @@ -343,6 +315,18 @@ HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) { } } +void HGraph::CacheFloatConstant(HFloatConstant* constant) { + int32_t value = bit_cast<int32_t, float>(constant->GetValue()); + DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end()); + cached_float_constants_.Overwrite(value, constant); +} + +void HGraph::CacheDoubleConstant(HDoubleConstant* constant) { + int64_t value = bit_cast<int64_t, double>(constant->GetValue()); + DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end()); + cached_double_constants_.Overwrite(value, constant); +} + void HLoopInformation::Add(HBasicBlock* block) { blocks_.SetBit(block->GetBlockId()); } @@ -364,26 +348,60 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } bool HLoopInformation::Populate() { - DCHECK_EQ(GetBackEdges().Size(), 1u); - HBasicBlock* back_edge = GetBackEdges().Get(0); - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } + DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated"; + for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = GetBackEdges().Get(i); + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + // This loop is not natural. Do not bother going further. + return false; + } - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + PopulateRecursive(back_edge); + } return true; } +void HLoopInformation::Update() { + HGraph* graph = header_->GetGraph(); + for (uint32_t id : blocks_.Indexes()) { + HBasicBlock* block = graph->GetBlocks().Get(id); + // Reset loop information of non-header blocks inside the loop, except + // members of inner nested loops because those should already have been + // updated by their own LoopInformation. + if (block->GetLoopInformation() == this && block != header_) { + block->SetLoopInformation(nullptr); + } + } + blocks_.ClearAllBits(); + + if (back_edges_.IsEmpty()) { + // The loop has been dismantled, delete its suspend check and remove info + // from the header. + DCHECK(HasSuspendCheck()); + header_->RemoveInstruction(suspend_check_); + header_->SetLoopInformation(nullptr); + header_ = nullptr; + suspend_check_ = nullptr; + } else { + if (kIsDebugBuild) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + DCHECK(header_->Dominates(back_edges_.Get(i))); + } + } + // This loop still has reachable back edges. Repopulate the list of blocks. + bool populate_successful = Populate(); + DCHECK(populate_successful); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { - DCHECK_EQ(header_->GetPredecessors().Size(), 2u); return header_->GetDominator(); } @@ -395,6 +413,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } +size_t HLoopInformation::GetLifetimeEnd() const { + size_t last_position = 0; + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position); + } + return last_position; +} + bool HBasicBlock::Dominates(HBasicBlock* other) const { // Walk up the dominator tree from `other`, to find out if `this` // is an ancestor. @@ -456,6 +482,20 @@ void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstructio instructions_.InsertInstructionBefore(instruction, cursor); } +void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!instruction->IsPhi()); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); + DCHECK(!cursor->IsControlFlow()); + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(instruction); + instructions_.InsertInstructionAfter(instruction, cursor); +} + void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { DCHECK_EQ(phi->GetId(), -1); DCHECK_NE(cursor->GetId(), -1); @@ -481,6 +521,7 @@ static void Remove(HInstructionList* instruction_list, } void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) { + DCHECK(!instruction->IsPhi()); Remove(&instructions_, this, instruction, ensure_safety); } @@ -488,6 +529,24 @@ void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) { Remove(&phis_, this, phi, ensure_safety); } +void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety) { + if (instruction->IsPhi()) { + RemovePhi(instruction->AsPhi(), ensure_safety); + } else { + RemoveInstruction(instruction, ensure_safety); + } +} + +void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) { + for (size_t i = 0; i < locals.Size(); i++) { + HInstruction* instruction = locals.Get(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::CopyFrom(HEnvironment* env) { for (size_t i = 0; i < env->Size(); i++) { HInstruction* instruction = env->GetInstructionAt(i); @@ -498,6 +557,28 @@ void HEnvironment::CopyFrom(HEnvironment* env) { } } +void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env, + HBasicBlock* loop_header) { + DCHECK(loop_header->IsLoopHeader()); + for (size_t i = 0; i < env->Size(); i++) { + HInstruction* instruction = env->GetInstructionAt(i); + SetRawEnvAt(i, instruction); + if (instruction == nullptr) { + continue; + } + if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) { + // At the end of the loop pre-header, the corresponding value for instruction + // is the first input of the phi. + HInstruction* initial = instruction->AsPhi()->InputAt(0); + DCHECK(initial->GetBlock()->Dominates(loop_header)); + SetRawEnvAt(i, initial); + initial->AddEnvUseAt(this, i); + } else { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::RemoveAsUserOfInput(size_t index) const { const HUserRecord<HEnvironment*> user_record = vregs_.Get(index); user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); @@ -675,6 +756,9 @@ void HPhi::AddInput(HInstruction* input) { void HPhi::RemoveInputAt(size_t index) { RemoveAsUserOfInput(index); inputs_.DeleteAt(index); + for (size_t i = index, e = InputCount(); i < e; ++i) { + InputRecordAt(i).GetUseNode()->SetIndex(i); + } } #define DEFINE_ACCEPT(name, super) \ @@ -923,8 +1007,9 @@ void HBasicBlock::DisconnectAndDelete() { HLoopInformation* loop_info = it.Current(); loop_info->Remove(this); if (loop_info->IsBackEdge(*this)) { - // This deliberately leaves the loop in an inconsistent state and will - // fail SSAChecker unless the entire loop is removed during the pass. + // If this was the last back edge of the loop, we deliberately leave the + // loop in an inconsistent state and will fail SSAChecker unless the + // entire loop is removed during the pass. loop_info->RemoveBackEdge(this); } } @@ -1021,8 +1106,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) { HLoopInformation* loop_info = it.Current(); loop_info->Remove(other); if (loop_info->IsBackEdge(*other)) { - loop_info->ClearBackEdges(); - loop_info->AddBackEdge(this); + loop_info->ReplaceBackEdge(other, this); } } @@ -1253,11 +1337,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { loop_it.Current()->Add(to); } if (info->IsBackEdge(*at)) { - // Only `at` can become a back edge, as the inlined blocks - // are predecessors of `at`. - DCHECK_EQ(1u, info->NumberOfBackEdges()); - info->ClearBackEdges(); - info->AddBackEdge(to); + // Only `to` can become a back edge, as the inlined blocks + // are predecessors of `to`. + info->ReplaceBackEdge(at, to); } } } @@ -1281,9 +1363,10 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue())); } else if (current->IsLongConstant()) { current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue())); - } else if (current->IsFloatConstant() || current->IsDoubleConstant()) { - // TODO: Don't duplicate floating-point constants. - current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction()); + } else if (current->IsFloatConstant()) { + current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue())); + } else if (current->IsDoubleConstant()) { + current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue())); } else if (current->IsParameterValue()) { if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect() diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 938d6fcd64..ef60d7680b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -48,6 +48,7 @@ class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SlowPathCode; class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; @@ -116,7 +117,12 @@ class HInstructionList { // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocMisc> { public: - HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0) + HGraph(ArenaAllocator* arena, + const DexFile& dex_file, + uint32_t method_idx, + InstructionSet instruction_set, + bool debuggable = false, + int start_instruction_id = 0) : arena_(arena), blocks_(arena, kDefaultNumberOfBlocks), reverse_post_order_(arena, kDefaultNumberOfBlocks), @@ -130,9 +136,14 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { has_bounds_checks_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), + dex_file_(dex_file), + method_idx_(method_idx), + instruction_set_(instruction_set), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter()), - cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {} + cached_float_constants_(std::less<int32_t>(), arena->Adapter()), + cached_long_constants_(std::less<int64_t>(), arena->Adapter()), + cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {} ArenaAllocator* GetArena() const { return arena_; } const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -241,8 +252,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { bool IsDebuggable() const { return debuggable_; } // Returns a constant of the given type and value. If it does not exist - // already, it is created and inserted into the graph. Only integral types - // are currently supported. + // already, it is created and inserted into the graph. This method is only for + // integral types. HConstant* GetConstant(Primitive::Type type, int64_t value); HNullConstant* GetNullConstant(); HIntConstant* GetIntConstant(int32_t value) { @@ -251,9 +262,23 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { HLongConstant* GetLongConstant(int64_t value) { return CreateConstant(value, &cached_long_constants_); } + HFloatConstant* GetFloatConstant(float value) { + return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_); + } + HDoubleConstant* GetDoubleConstant(double value) { + return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_); + } HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; + const DexFile& GetDexFile() const { + return dex_file_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + private: void VisitBlockForDominatorTree(HBasicBlock* block, HBasicBlock* predecessor, @@ -265,10 +290,34 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); - template <class InstType, typename ValueType> - InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache); + template <class InstructionType, typename ValueType> + InstructionType* CreateConstant(ValueType value, + ArenaSafeMap<ValueType, InstructionType*>* cache) { + // Try to find an existing constant of the given value. + InstructionType* constant = nullptr; + auto cached_constant = cache->find(value); + if (cached_constant != cache->end()) { + constant = cached_constant->second; + } + + // If not found or previously deleted, create and cache a new instruction. + if (constant == nullptr || constant->GetBlock() == nullptr) { + constant = new (arena_) InstructionType(value); + cache->Overwrite(value, constant); + InsertConstant(constant); + } + return constant; + } + void InsertConstant(HConstant* instruction); + // Cache a float constant into the graph. This method should only be + // called by the SsaBuilder when creating "equivalent" instructions. + void CacheFloatConstant(HFloatConstant* constant); + + // See CacheFloatConstant comment. + void CacheDoubleConstant(HDoubleConstant* constant); + ArenaAllocator* const arena_; // List of blocks in insertion order. @@ -306,11 +355,22 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; - // Cached common constants often needed by optimization passes. + // The dex file from which the method is from. + const DexFile& dex_file_; + + // The method index in the dex file. + const uint32_t method_idx_; + + const InstructionSet instruction_set_; + + // Cached constants. HNullConstant* cached_null_constant_; ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_; + ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_; ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_; + ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); @@ -362,14 +422,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } - void ClearBackEdges() { - back_edges_.Reset(); + // Returns the lifetime position of the back edge that has the + // greatest lifetime position. + size_t GetLifetimeEnd() const; + + void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + if (back_edges_.Get(i) == existing) { + back_edges_.Put(i, new_back_edge); + return; + } + } + UNREACHABLE(); } - // Find blocks that are part of this loop. Returns whether the loop is a natural loop, + // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, // that is the header dominates the back edge. bool Populate(); + // Reanalyzes the loop by removing loop info from its blocks and re-running + // Populate(). If there are no back edges left, the loop info is completely + // removed as well as its SuspendCheck instruction. It must be run on nested + // inner loops first. + void Update(); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -526,6 +602,13 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { predecessors_.Put(1, temp); } + void SwapSuccessors() { + DCHECK_EQ(successors_.Size(), 2u); + HBasicBlock* temp = successors_.Get(0); + successors_.Put(0, successors_.Get(1)); + successors_.Put(1, temp); + } + size_t GetPredecessorIndexOf(HBasicBlock* predecessor) { for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { if (predecessors_.Get(i) == predecessor) { @@ -578,7 +661,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void DisconnectAndDelete(); void AddInstruction(HInstruction* instruction); + // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); @@ -589,9 +674,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // instruction is not in use and removes it from the use lists of its inputs. void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true); void RemovePhi(HPhi* phi, bool ensure_safety = true); + void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true); bool IsLoopHeader() const { - return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); + return IsInLoop() && (loop_information_->GetHeader() == this); } bool IsLoopPreHeaderFirstPredecessor() const { @@ -610,7 +696,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. - } else if (loop_information_ == nullptr) { + } else if (!IsInLoop()) { loop_information_ = info; } else if (loop_information_->Contains(*info->GetHeader())) { // Block is currently part of an outer loop. Make it part of this inner loop. @@ -631,7 +717,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool IsInLoop() const { return loop_information_ != nullptr; } - // Returns wheter this block dominates the blocked passed as parameter. + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; size_t GetLifetimeStart() const { return lifetime_start_; } @@ -682,7 +768,7 @@ class HLoopInformationOutwardIterator : public ValueObject { void Advance() { DCHECK(!Done()); - current_ = current_->GetHeader()->GetDominator()->GetLoopInformation(); + current_ = current_->GetPreHeader()->GetLoopInformation(); } HLoopInformation* Current() const { @@ -795,13 +881,14 @@ class HUseListNode : public ArenaObject<kArenaAllocMisc> { HUseListNode* GetNext() const { return next_; } T GetUser() const { return user_; } size_t GetIndex() const { return index_; } + void SetIndex(size_t index) { index_ = index; } private: HUseListNode(T user, size_t index) : user_(user), index_(index), prev_(nullptr), next_(nullptr) {} T const user_; - const size_t index_; + size_t index_; HUseListNode<T>* prev_; HUseListNode<T>* next_; @@ -872,6 +959,14 @@ class HUseList : public ValueObject { return first_ != nullptr && first_->next_ == nullptr; } + size_t SizeSlow() const { + size_t count = 0; + for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) { + ++count; + } + return count; + } + private: HUseListNode<T>* first_; }; @@ -998,15 +1093,47 @@ class SideEffects : public ValueObject { // A HEnvironment object contains the values of virtual registers at a given location. class HEnvironment : public ArenaObject<kArenaAllocMisc> { public: - HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) - : vregs_(arena, number_of_vregs) { + HEnvironment(ArenaAllocator* arena, + size_t number_of_vregs, + const DexFile& dex_file, + uint32_t method_idx, + uint32_t dex_pc) + : vregs_(arena, number_of_vregs), + locations_(arena, number_of_vregs), + parent_(nullptr), + dex_file_(dex_file), + method_idx_(method_idx), + dex_pc_(dex_pc) { vregs_.SetSize(number_of_vregs); for (size_t i = 0; i < number_of_vregs; i++) { vregs_.Put(i, HUserRecord<HEnvironment*>()); } + + locations_.SetSize(number_of_vregs); + for (size_t i = 0; i < number_of_vregs; ++i) { + locations_.Put(i, Location()); + } + } + + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { + parent_ = new (allocator) HEnvironment(allocator, + parent->Size(), + parent->GetDexFile(), + parent->GetMethodIdx(), + parent->GetDexPc()); + if (parent->GetParent() != nullptr) { + parent_->SetAndCopyParentChain(allocator, parent->GetParent()); + } + parent_->CopyFrom(parent); } - void CopyFrom(HEnvironment* env); + void CopyFrom(const GrowableArray<HInstruction*>& locals); + void CopyFrom(HEnvironment* environment); + + // Copy from `env`. If it's a loop phi for `loop_header`, copy the first + // input to the loop phi instead. This is for inserting instructions that + // require an environment (like HDeoptimization) in the loop pre-header. + void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header); void SetRawEnvAt(size_t index, HInstruction* instruction) { vregs_.Put(index, HUserRecord<HEnvironment*>(instruction)); @@ -1020,6 +1147,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { size_t Size() const { return vregs_.Size(); } + HEnvironment* GetParent() const { return parent_; } + + void SetLocationAt(size_t index, Location location) { + locations_.Put(index, location); + } + + Location GetLocationAt(size_t index) const { + return locations_.Get(index); + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + + const DexFile& GetDexFile() const { + return dex_file_; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -1030,8 +1179,13 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { } GrowableArray<HUserRecord<HEnvironment*> > vregs_; + GrowableArray<Location> locations_; + HEnvironment* parent_; + const DexFile& dex_file_; + const uint32_t method_idx_; + const uint32_t dex_pc_; - friend HInstruction; + friend class HInstruction; DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; @@ -1161,6 +1315,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } virtual bool NeedsEnvironment() const { return false; } + virtual uint32_t GetDexPc() const { + LOG(FATAL) << "GetDexPc() cannot be called on an instruction that" + " does not need an environment"; + UNREACHABLE(); + } virtual bool IsControlFlow() const { return false; } virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -1238,8 +1397,31 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // copying, the uses lists are being updated. void CopyEnvironmentFrom(HEnvironment* environment) { ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); - environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); environment_->CopyFrom(environment); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } + } + + void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment, + HBasicBlock* block) { + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } + environment_->CopyFromWithLoopPhiAdjustment(environment, block); } // Returns the number of entries in the environment. Typically, that is the @@ -1615,7 +1797,7 @@ class HDeoptimize : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Deoptimize); @@ -2034,13 +2216,14 @@ class HFloatConstant : public HConstant { private: explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} + explicit HFloatConstant(int32_t value) + : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {} const float value_; - // Only the SsaBuilder can currently create floating-point constants. If we - // ever need to create them later in the pipeline, we will have to handle them - // the same way as integral constants. + // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HFloatConstant); }; @@ -2071,13 +2254,14 @@ class HDoubleConstant : public HConstant { private: explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} + explicit HDoubleConstant(int64_t value) + : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {} const double value_; - // Only the SsaBuilder can currently create floating-point constants. If we - // ever need to create them later in the pipeline, we will have to handle them - // the same way as integral constants. + // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; @@ -2174,9 +2358,15 @@ class HInvoke : public HInstruction { SetRawInputAt(index, argument); } + // Return the number of arguments. This number can be lower than + // the number of inputs returned by InputCount(), as some invoke + // instructions (e.g. HInvokeStaticOrDirect) can have non-argument + // inputs at the end of their list of inputs. + uint32_t GetNumberOfArguments() const { return number_of_arguments_; } + Primitive::Type GetType() const OVERRIDE { return return_type_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } @@ -2193,16 +2383,19 @@ class HInvoke : public HInstruction { protected: HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, + uint32_t number_of_other_inputs, Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index) : HInstruction(SideEffects::All()), + number_of_arguments_(number_of_arguments), inputs_(arena, number_of_arguments), return_type_(return_type), dex_pc_(dex_pc), dex_method_index_(dex_method_index), intrinsic_(Intrinsics::kNone) { - inputs_.SetSize(number_of_arguments); + uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs; + inputs_.SetSize(number_of_inputs); } const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } @@ -2210,6 +2403,7 @@ class HInvoke : public HInstruction { inputs_.Put(index, input); } + uint32_t number_of_arguments_; GrowableArray<HUserRecord<HInstruction*> > inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; @@ -2236,14 +2430,21 @@ class HInvokeStaticOrDirect : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, bool is_recursive, + int32_t string_init_offset, InvokeType original_invoke_type, InvokeType invoke_type, ClinitCheckRequirement clinit_check_requirement) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + : HInvoke(arena, + number_of_arguments, + clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u, + return_type, + dex_pc, + dex_method_index), original_invoke_type_(original_invoke_type), invoke_type_(invoke_type), is_recursive_(is_recursive), - clinit_check_requirement_(clinit_check_requirement) {} + clinit_check_requirement_(clinit_check_requirement), + string_init_offset_(string_init_offset) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { UNUSED(obj); @@ -2256,21 +2457,24 @@ class HInvokeStaticOrDirect : public HInvoke { InvokeType GetInvokeType() const { return invoke_type_; } bool IsRecursive() const { return is_recursive_; } bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } + bool IsStringInit() const { return string_init_offset_ != 0; } + int32_t GetStringInitOffset() const { return string_init_offset_; } // Is this instruction a call to a static method? bool IsStatic() const { return GetInvokeType() == kStatic; } - // Remove the art::HClinitCheck or art::HLoadClass instruction as - // last input (only relevant for static calls with explicit clinit - // check). - void RemoveClinitCheckOrLoadClassAsLastInput() { + // Remove the art::HLoadClass instruction set as last input by + // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of + // the initial art::HClinitCheck instruction (only relevant for + // static calls with explicit clinit check). + void RemoveLoadClassAsLastInput() { DCHECK(IsStaticWithExplicitClinitCheck()); size_t last_input_index = InputCount() - 1; HInstruction* last_input = InputAt(last_input_index); DCHECK(last_input != nullptr); - DCHECK(last_input->IsClinitCheck() || last_input->IsLoadClass()) << last_input->DebugName(); + DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); RemoveAsUserOfInput(last_input_index); inputs_.DeleteAt(last_input_index); clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; @@ -2311,6 +2515,9 @@ class HInvokeStaticOrDirect : public HInvoke { const InvokeType invoke_type_; const bool is_recursive_; ClinitCheckRequirement clinit_check_requirement_; + // Thread entrypoint offset for string init method if this is a string init invoke. + // Note that there are multiple string init methods, each having its own offset. + int32_t string_init_offset_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; @@ -2323,7 +2530,7 @@ class HInvokeVirtual : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -2349,7 +2556,7 @@ class HInvokeInterface : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -2376,7 +2583,7 @@ class HNewInstance : public HExpression<0> { type_index_(type_index), entrypoint_(entrypoint) {} - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2428,7 +2635,7 @@ class HNewArray : public HExpression<1> { SetRawInputAt(0, length); } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2523,7 +2730,7 @@ class HDiv : public HBinaryOperation { return (y == -1) ? -x : x / y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Div); @@ -2550,7 +2757,7 @@ class HRem : public HBinaryOperation { return (y == -1) ? 0 : x % y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Rem); @@ -2577,7 +2784,7 @@ class HDivZeroCheck : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(DivZeroCheck); @@ -2772,7 +2979,7 @@ class HTypeConversion : public HExpression<1> { // Required by the x86 and ARM code generators when producing calls // to the runtime. - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } @@ -2882,7 +3089,7 @@ class HNullCheck : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(NullCheck); @@ -3045,7 +3252,7 @@ class HArraySet : public HTemplateInstruction<3> { bool NeedsTypeCheck() const { return needs_type_check_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -3115,7 +3322,7 @@ class HBoundsCheck : public HExpression<2> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(BoundsCheck); @@ -3155,19 +3362,25 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } + SlowPathCode* GetSlowPath() const { return slow_path_; } DECLARE_INSTRUCTION(SuspendCheck); private: const uint32_t dex_pc_; + // Only used for code generation, in order to share the same slow path between back edges + // of a same loop. + SlowPathCode* slow_path_; + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; @@ -3194,7 +3407,7 @@ class HLoadClass : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return type_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } bool IsReferrersClass() const { return is_referrers_class_; } @@ -3268,7 +3481,7 @@ class HLoadString : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return string_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetStringIndex() const { return string_index_; } // TODO: Can we deopt or debug when we resolve a string? @@ -3306,7 +3519,7 @@ class HClinitCheck : public HExpression<1> { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } @@ -3406,7 +3619,7 @@ class HThrow : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Throw); @@ -3440,7 +3653,7 @@ class HInstanceOf : public HExpression<2> { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3515,7 +3728,7 @@ class HCheckCast : public HTemplateInstruction<2> { bool MustDoNullCheck() const { return must_do_null_check_; } void ClearMustDoNullCheck() { must_do_null_check_ = false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3561,7 +3774,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsEnter() const { return kind_ == kEnter; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4e83ce576c..2736453ccc 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -16,6 +16,7 @@ #include "base/arena_allocator.h" #include "nodes.h" +#include "optimizing_unit_test.h" #include "gtest/gtest.h" @@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) { first_block->AddSuccessor(exit_block); exit_block->AddInstruction(new (&allocator) HExit()); - HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); @@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -96,7 +98,7 @@ TEST(Node, AddInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -112,4 +114,51 @@ TEST(Node, AddInstruction) { ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse()); } +TEST(Node, ParentEnvironment) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = CreateGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0); + entry->AddInstruction(parameter1); + entry->AddInstruction(with_environment); + entry->AddInstruction(new (&allocator) HExit()); + + ASSERT_TRUE(parameter1->HasUses()); + ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse()); + + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + GrowableArray<HInstruction*> array(&allocator, 1); + array.Add(parameter1); + + environment->CopyFrom(array); + with_environment->SetRawEnvironment(environment); + + ASSERT_TRUE(parameter1->HasEnvironmentUses()); + ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse()); + + HEnvironment* parent1 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent1->CopyFrom(array); + + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u); + + HEnvironment* parent2 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent2->CopyFrom(array); + parent1->SetAndCopyParentChain(&allocator, parent2); + + // One use for parent2, and one other use for the new parent of parent1. + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u); + + // We have copied the parent chain. So we now have two more uses. + environment->SetAndCopyParentChain(&allocator, parent1); + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b2c13adf35..b0d1433667 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -21,6 +21,7 @@ #include "cfi_test.h" #include "gtest/gtest.h" #include "optimizing/code_generator.h" +#include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -30,7 +31,7 @@ namespace art { // Run the tests only on host. #ifndef HAVE_ANDROID_OS -class OptimizingCFITest : public CFITest { +class OptimizingCFITest : public CFITest { public: // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; @@ -45,10 +46,10 @@ class OptimizingCFITest : public CFITest { std::unique_ptr<const InstructionSetFeatures> isa_features; std::string error; isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - HGraph graph(&allocator); + HGraph* graph = CreateGraph(&allocator); // Generate simple frame with some spills. std::unique_ptr<CodeGenerator> code_gen( - CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + CodeGenerator::Create(graph, isa, *isa_features.get(), opts)); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 2125f6eb01..9ccc0113f6 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -32,7 +32,7 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000012: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, + 0xE0, 0x0F, 0x1C, 0xF8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9, 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; @@ -41,7 +41,7 @@ static constexpr uint8_t expected_cfi_kArm64[] = { 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: str w0, [sp, #-64]! +// 0x00000000: str x0, [sp, #-64]! // 0x00000004: .cfi_def_cfa_offset: 64 // 0x00000004: stp x19, x20, [sp, #40] // 0x00000008: .cfi_offset: r19 at cfa-24 @@ -99,13 +99,13 @@ static constexpr uint8_t expected_cfi_kX86[] = { static constexpr uint8_t expected_asm_kX86_64[] = { 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24, - 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2, - 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24, - 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, + 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x48, 0x89, 0x3C, 0x24, + 0xF2, 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, + 0x24, 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3, }; static constexpr uint8_t expected_cfi_kX86_64[] = { 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E, - 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47, + 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x44, 0x0A, 0x47, 0xDD, 0x47, 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6, 0x41, 0x0B, 0x0E, 0x40, }; @@ -121,21 +121,20 @@ static constexpr uint8_t expected_cfi_kX86_64[] = { // 0x0000000d: .cfi_offset: r30 at cfa-32 // 0x0000000d: movsd [rsp + 24], xmm12 // 0x00000014: .cfi_offset: r29 at cfa-40 -// 0x00000014: mov [rsp], edi -// 0x00000017: .cfi_remember_state -// 0x00000017: movsd xmm12, [rsp + 24] -// 0x0000001e: .cfi_restore: r29 -// 0x0000001e: movsd xmm13, [rsp + 32] -// 0x00000025: .cfi_restore: r30 -// 0x00000025: addq rsp, 40 -// 0x00000029: .cfi_def_cfa_offset: 24 -// 0x00000029: pop rbx -// 0x0000002a: .cfi_def_cfa_offset: 16 -// 0x0000002a: .cfi_restore: r3 -// 0x0000002a: pop rbp -// 0x0000002b: .cfi_def_cfa_offset: 8 -// 0x0000002b: .cfi_restore: r6 -// 0x0000002b: ret -// 0x0000002c: .cfi_restore_state -// 0x0000002c: .cfi_def_cfa_offset: 64 - +// 0x00000014: movq [rsp], rdi +// 0x00000018: .cfi_remember_state +// 0x00000018: movsd xmm12, [rsp + 24] +// 0x0000001f: .cfi_restore: r29 +// 0x0000001f: movsd xmm13, [rsp + 32] +// 0x00000026: .cfi_restore: r30 +// 0x00000026: addq rsp, 40 +// 0x0000002a: .cfi_def_cfa_offset: 24 +// 0x0000002a: pop rbx +// 0x0000002b: .cfi_def_cfa_offset: 16 +// 0x0000002b: .cfi_restore: r3 +// 0x0000002b: pop rbp +// 0x0000002c: .cfi_def_cfa_offset: 8 +// 0x0000002c: .cfi_restore: r6 +// 0x0000002c: ret +// 0x0000002d: .cfi_restore_state +// 0x0000002d: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 05451bcaa6..c7b2c67019 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -19,6 +19,7 @@ #include <fstream> #include <stdint.h> +#include "art_method-inl.h" #include "base/arena_allocator.h" #include "base/dumpable.h" #include "base/timing_logger.h" @@ -44,7 +45,6 @@ #include "intrinsics.h" #include "licm.h" #include "jni/quick/jni_compiler.h" -#include "mirror/art_method-inl.h" #include "nodes.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" @@ -196,7 +196,7 @@ class OptimizingCompiler FINAL : public Compiler { return ArtQuickJniCompileMethod(GetCompilerDriver(), access_flags, method_idx, dex_file); } - uintptr_t GetEntryPointOf(mirror::ArtMethod* method) const OVERRIDE + uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); @@ -320,8 +320,10 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats); - HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final"); + HDeadCodeElimination dce1(graph, stats, + HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination dce2(graph, stats, + HDeadCodeElimination::kFinalDeadCodeEliminationPassName); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); HBooleanSimplifier boolean_simplify(graph); @@ -512,7 +514,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite ArenaAllocator arena(Runtime::Current()->GetArenaPool()); HGraph* graph = new (&arena) HGraph( - &arena, compiler_driver->GetCompilerOptions().GetDebuggable()); + &arena, dex_file, method_idx, compiler_driver->GetInstructionSet(), + compiler_driver->GetCompilerOptions().GetDebuggable()); // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 65c84e6942..b6b1bb1cad 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,25 +29,26 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, - kInstructionSimplifications, kInlinedInvoke, - kNotCompiledUnsupportedIsa, - kNotCompiledPathological, + kInstructionSimplifications, + kNotCompiledBranchOutsideMethodCode, + kNotCompiledCannotBuildSSA, + kNotCompiledCantAccesType, + kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, - kNotCompiledCannotBuildSSA, kNotCompiledNoCodegen, - kNotCompiledUnresolvedMethod, - kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, + kNotCompiledPathological, kNotCompiledSpaceFilter, - kNotOptimizedTryCatch, - kNotOptimizedDisabled, - kNotCompiledCantAccesType, - kNotOptimizedRegisterAllocator, kNotCompiledUnhandledInstruction, + kNotCompiledUnresolvedField, + kNotCompiledUnresolvedMethod, + kNotCompiledUnsupportedIsa, kNotCompiledVerifyAtRuntime, - kNotCompiledClassNotVerified, + kNotOptimizedDisabled, + kNotOptimizedRegisterAllocator, + kNotOptimizedTryCatch, kRemovedCheckedCast, kRemovedDeadInstruction, kRemovedNullCheck, @@ -98,23 +99,24 @@ class OptimizingCompilerStats { case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; case kInstructionSimplifications: return "kInstructionSimplifications"; - case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; - case kNotCompiledPathological : return "kNotCompiledPathological"; + case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; + case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; + case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; - case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; - case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; - case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; - case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; - case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; + case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; + case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; + case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; - case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; + case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; + case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; + case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6b236927da..3ef96faab3 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -72,11 +72,17 @@ void RemoveSuspendChecks(HGraph* graph) { } } +inline HGraph* CreateGraph(ArenaAllocator* allocator) { + return new (allocator) HGraph( + allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1, kRuntimeISA, + false); +} + // Create a control-flow graph from Dex instructions. inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data, Primitive::Type return_type = Primitive::kPrimInt) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph, return_type); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index e89417df7d..9ede91013e 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -20,6 +20,7 @@ #include "base/value_object.h" #include "utils/growable_array.h" #include "locations.h" +#include "primitive.h" namespace art { diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index fa6b3c292c..78d11857c3 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -91,7 +91,7 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // previously) by the graph builder during the creation of the // static invoke instruction, but is no longer required at this // stage (i.e., after inlining has been performed). - invoke->RemoveClinitCheckOrLoadClassAsLastInput(); + invoke->RemoveLoadClassAsLastInput(); // If the load class instruction is no longer used, remove it from // the graph. diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 293fde978e..c56100dfa1 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 12b1c2b9bd..e93e06118c 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -16,7 +16,7 @@ #include "reference_type_propagation.h" -#include "class_linker.h" +#include "class_linker-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" #include "scoped_thread_state_change.h" diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 0fdf051957..5f439c86d9 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -71,7 +71,9 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, physical_fp_register_intervals_.SetSize(codegen->GetNumberOfFloatingPointRegisters()); // Always reserve for the current method and the graph's max out registers. // TODO: compute it instead. - reserved_out_slots_ = 1 + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); + // ArtMethod* takes 2 vregs for 64 bits. + reserved_out_slots_ = InstructionSetPointerSize(codegen->GetInstructionSet()) / kVRegSize + + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, @@ -768,7 +770,7 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } else { DCHECK(!current->IsHighInterval()); - int hint = current->FindFirstRegisterHint(free_until); + int hint = current->FindFirstRegisterHint(free_until, liveness_); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; @@ -1101,8 +1103,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { - HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); - HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2); DCHECK(block_from != nullptr); DCHECK(block_to != nullptr); @@ -1111,6 +1113,41 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro return Split(interval, to); } + /* + * Non-linear control flow will force moves at every branch instruction to the new location. + * To avoid having all branches doing the moves, we find the next non-linear position and + * split the interval at this position. Take the following example (block number is the linear + * order position): + * + * B1 + * / \ + * B2 B3 + * \ / + * B4 + * + * B2 needs to split an interval, whose next use is in B4. If we were to split at the + * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval + * is now in the correct location. It makes performance worst if the interval is spilled + * and both B2 and B3 need to reload it before entering B4. + * + * By splitting at B3, we give a chance to the register allocator to allocate the + * interval to the same register as in B1, and therefore avoid doing any + * moves in B3. + */ + if (block_from->GetDominator() != nullptr) { + const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks(); + for (size_t i = 0; i < dominated.Size(); ++i) { + size_t position = dominated.Get(i)->GetLifetimeStart(); + if ((position > from) && (block_to->GetLifetimeStart() > position)) { + // Even if we found a better block, we continue iterating in case + // a dominated block is closer. + // Note that dominated blocks are not sorted in liveness order. + block_to = dominated.Get(i); + DCHECK_NE(block_to, block_from); + } + } + } + // If `to` is in a loop, find the outermost loop header which does not contain `from`. for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { HBasicBlock* header = it.Current()->GetHeader(); @@ -1455,6 +1492,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { : Location::StackSlot(interval->GetParent()->GetSpillSlot())); } UsePosition* use = current->GetFirstUse(); + UsePosition* env_use = current->GetFirstEnvironmentUse(); // Walk over all siblings, updating locations of use positions, and // connecting them when they are adjacent. @@ -1467,15 +1505,14 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { while (use != nullptr && use->GetPosition() < range->GetStart()) { - DCHECK(use->GetIsEnvironment()); + DCHECK(use->IsSynthesized()); use = use->GetNext(); } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - LocationSummary* locations = use->GetUser()->GetLocations(); - if (use->GetIsEnvironment()) { - locations->SetEnvironmentAt(use->GetInputIndex(), source); - } else { + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); Location expected_location = locations->InAt(use->GetInputIndex()); // The expected (actual) location may be invalid in case the input is unused. Currently // this only happens for intrinsics. @@ -1492,6 +1529,20 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } use = use->GetNext(); } + + // Walk over the environment uses, and update their locations. + while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) { + env_use = env_use->GetNext(); + } + + while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { + DCHECK(current->CoversSlow(env_use->GetPosition()) + || (env_use->GetPosition() == range->GetEnd())); + HEnvironment* environment = env_use->GetUser()->GetEnvironment(); + environment->SetLocationAt(env_use->GetInputIndex(), source); + env_use = env_use->GetNext(); + } + range = range->GetNext(); } @@ -1554,10 +1605,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } while (current != nullptr); if (kIsDebugBuild) { - // Following uses can only be environment uses. The location for - // these environments will be none. + // Following uses can only be synthesized uses. while (use != nullptr) { - DCHECK(use->GetIsEnvironment()); + DCHECK(use->IsSynthesized()); use = use->GetNext(); } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index dc9c708eea..97bd777e1c 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ +#include "arch/instruction_set.h" #include "base/macros.h" #include "primitive.h" #include "utils/growable_array.h" diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 8c6d904a4c..b72ffb8bf7 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -38,7 +38,7 @@ namespace art { static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) { TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); @@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) { } static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HPhi** phi, HInstruction** input1, HInstruction** input2) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) { static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { static HGraph* BuildTwoSubs(ArenaAllocator* allocator, HInstruction** first_sub, HInstruction** second_sub) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { static HGraph* BuildDiv(ArenaAllocator* allocator, HInstruction** div) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) { // Create a synthesized graph to please the register_allocator and // ssa_liveness_analysis code. ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 7a252af2ad..59a2852735 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->GetInstructionAt(local); + return GetLocalsFor(block)->Get(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->SetRawEnvAt(local, phi); + current_locals_->Put(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->SetRawEnvAt(local, value); + current_locals_->Put(local, value); } } @@ -417,6 +417,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { ArenaAllocator* allocator = graph->GetArena(); result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue())); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + graph->CacheFloatConstant(result); } else { // If there is already a constant with the expected type, we know it is // the floating point equivalent of this constant. @@ -439,6 +440,7 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { ArenaAllocator* allocator = graph->GetArena(); result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue())); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + graph->CacheDoubleConstant(result); } else { // If there is already a constant with the expected type, we know it is // the floating point equivalent of this constant. @@ -518,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); // If the operation requests a specific type, we make sure its input is of that type. if (load->GetType() != value->GetType()) { if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { @@ -532,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -541,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { return; } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( - GetGraph()->GetArena(), current_locals_->Size()); - environment->CopyFrom(current_locals_); + GetGraph()->GetArena(), + current_locals_->Size(), + GetGraph()->GetDexFile(), + GetGraph()->GetMethodIdx(), + instruction->GetDexPc()); + environment->CopyFrom(*current_locals_); instruction->SetRawEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 265e95b4ac..1c83c4ba48 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - HEnvironment* GetLocalsFor(HBasicBlock* block) { - HEnvironment* env = locals_for_.Get(block->GetBlockId()); - if (env == nullptr) { - env = new (GetGraph()->GetArena()) HEnvironment( + GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); + if (locals == nullptr) { + locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals_for_.Put(block->GetBlockId(), env); + locals->SetSize(GetGraph()->GetNumberOfVRegs()); + locals_for_.Put(block->GetBlockId(), locals); } - return env; + return locals; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor { static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); // Locals for the current block being visited. - HEnvironment* current_locals_; + GrowableArray<HInstruction*>* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. GrowableArray<HBasicBlock*> loop_headers_; // HEnvironment for each block. - GrowableArray<HEnvironment*> locals_for_; + GrowableArray<GrowableArray<HInstruction*>*> locals_for_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index ea0e7c3712..250eb04a1c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { - // We rely on having simplified the CFG. - DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); - number_of_forward_predecessors--; + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); } @@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // Process the environment first, because we know their uses come after // or at the same liveness position of inputs. - if (current->HasEnvironment()) { + for (HEnvironment* environment = current->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. - HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); bool should_be_live = ShouldBeLiveForEnvironment(instruction); @@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (instruction != nullptr) { instruction->GetLiveInterval()->AddUse( - current, i, /* is_environment */ true, should_be_live); + current, environment, i, should_be_live); } } } @@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // to be materialized. if (input->HasSsaIndex()) { live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); + input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); } } } @@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0); + size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); - current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), - back_edge->GetLifetimeEnd()); + current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } } @@ -322,7 +320,8 @@ static int RegisterOrLowRegister(Location location) { return location.IsPair() ? location.low() : location.reg(); } -int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { +int LiveInterval::FindFirstRegisterHint(size_t* free_until, + const SsaLivenessAnalysis& liveness) const { DCHECK(!IsHighInterval()); if (IsTemp()) return kNoRegister; @@ -336,12 +335,32 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { } } + if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. If one location is a register we return it as a hint. This + // will avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2); + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1; + // We know positions above GetStart() do not have a location yet. + if (position < GetStart()) { + LiveInterval* existing = GetParent()->GetSiblingAt(position); + if (existing != nullptr + && existing->HasRegister() + && (free_until[existing->GetRegister()] > GetStart())) { + return existing->GetRegister(); + } + } + } + } + UsePosition* use = first_use_; size_t start = GetStart(); size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= start && !use->GetIsEnvironment()) { + if (use_position >= start && !use->IsSynthesized()) { HInstruction* user = use->GetUser(); size_t input_index = use->GetInputIndex(); if (user->IsPhi()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 97254edb5e..82c5454bb0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -23,6 +23,7 @@ namespace art { class CodeGenerator; +class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; @@ -103,21 +104,24 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { class UsePosition : public ArenaObject<kArenaAllocMisc> { public: UsePosition(HInstruction* user, + HEnvironment* environment, size_t input_index, - bool is_environment, size_t position, UsePosition* next) : user_(user), + environment_(environment), input_index_(input_index), - is_environment_(is_environment), position_(position), next_(next) { - DCHECK(user->IsPhi() + DCHECK((user == nullptr) + || user->IsPhi() || (GetPosition() == user->GetLifetimePosition() + 1) || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } + static constexpr size_t kNoInput = -1; + size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } @@ -125,27 +129,38 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } - bool GetIsEnvironment() const { return is_environment_; } + bool GetIsEnvironment() const { return environment_ != nullptr; } + bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } void Dump(std::ostream& stream) const { stream << position_; - if (is_environment_) { - stream << " (env)"; - } + } + + HLoopInformation* GetLoopInformation() const { + return user_->GetBlock()->GetLoopInformation(); } UsePosition* Dup(ArenaAllocator* allocator) const { return new (allocator) UsePosition( - user_, input_index_, is_environment_, position_, + user_, environment_, input_index_, position_, next_ == nullptr ? nullptr : next_->Dup(allocator)); } + bool RequiresRegister() const { + if (GetIsEnvironment()) return false; + if (IsSynthesized()) return false; + Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); + return location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister); + } + private: HInstruction* const user_; + HEnvironment* const environment_; const size_t input_index_; - const bool is_environment_; const size_t position_; UsePosition* next_; @@ -219,17 +234,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddTempUse(HInstruction* instruction, size_t temp_index) { DCHECK(IsTemp()); DCHECK(first_use_ == nullptr) << "A temporary can only have one user"; + DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user"; size_t position = instruction->GetLifetimePosition(); first_use_ = new (allocator_) UsePosition( - instruction, temp_index, /* is_environment */ false, position, first_use_); + instruction, /* environment */ nullptr, temp_index, position, first_use_); AddRange(position, position + 1); } void AddUse(HInstruction* instruction, + HEnvironment* environment, size_t input_index, - bool is_environment, bool keep_alive = false) { // Set the use within the instruction. + bool is_environment = (environment != nullptr); size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); if (!is_environment) { @@ -239,9 +256,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // location of the input just before that instruction (and not potential moves due // to splitting). position = instruction->GetLifetimePosition(); + } else if (!locations->InAt(input_index).IsValid()) { + return; } } + if (!is_environment && instruction->IsInLoop()) { + AddBackEdgeUses(*instruction->GetBlock()); + } + DCHECK(position == instruction->GetLifetimePosition() || position == instruction->GetLifetimePosition() + 1); @@ -257,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } DCHECK(first_use_->GetPosition() + 1 == position); UsePosition* new_use = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, cursor->GetNext()); + instruction, environment, input_index, position, cursor->GetNext()); cursor->SetNext(new_use); if (first_range_->GetEnd() == first_use_->GetPosition()) { first_range_->end_ = position; @@ -265,8 +288,13 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return; } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); + if (is_environment) { + first_env_use_ = new (allocator_) UsePosition( + instruction, environment, input_index, position, first_env_use_); + } else { + first_use_ = new (allocator_) UsePosition( + instruction, environment, input_index, position, first_use_); + } if (is_environment && !keep_alive) { // If this environment use does not keep the instruction live, it does not @@ -300,8 +328,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->IsPhi()); + if (block->IsInLoop()) { + AddBackEdgeUses(*block); + } first_use_ = new (allocator_) UsePosition( - instruction, input_index, false, block->GetLifetimeEnd(), first_use_); + instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_); } void AddRange(size_t start, size_t end) { @@ -450,38 +481,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return position; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { - return position; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return position; - } + + if (IsDefiningPosition(position) && DefinitionRequiresRegister()) { + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position > position && !use->GetIsEnvironment()) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister)) { + if (use_position > position) { + if (use->RequiresRegister()) { return use_position; } } @@ -499,21 +509,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - if (defined_by_->GetLocations()->Out().IsValid()) { - return position; - } + if (IsDefiningPosition(position)) { + DCHECK(defined_by_->GetLocations()->Out().IsValid()); + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - if (!use->GetIsEnvironment()) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - size_t use_position = use->GetPosition(); - if (use_position > position && location.IsValid()) { - return use_position; - } + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; } use = use->GetNext(); } @@ -524,6 +530,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return first_use_; } + UsePosition* GetFirstEnvironmentUse() const { + return first_env_use_; + } + Primitive::Type GetType() const { return type_; } @@ -577,6 +587,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { new_interval->parent_ = parent_; new_interval->first_use_ = first_use_; + new_interval->first_env_use_ = first_env_use_; LiveRange* current = first_range_; LiveRange* previous = nullptr; // Iterate over the ranges, and either find a range that covers this position, or @@ -655,10 +666,18 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { stream << " "; } while ((use = use->GetNext()) != nullptr); } + stream << "}, { "; + use = first_env_use_; + if (use != nullptr) { + do { + use->Dump(stream); + stream << " "; + } while ((use = use->GetNext()) != nullptr); + } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); - stream << " is_high: " << IsHighInterval(); stream << " is_low: " << IsLowInterval(); + stream << " is_high: " << IsHighInterval(); } LiveInterval* GetNextSibling() const { return next_sibling_; } @@ -673,7 +692,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns the first register hint that is at least free before // the value contained in `free_until`. If none is found, returns // `kNoRegister`. - int FindFirstRegisterHint(size_t* free_until) const; + int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const; // If there is enough at the definition site to find a register (for example // it uses the same input as the first input), returns the register as a hint. @@ -754,6 +773,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (first_use_ != nullptr) { high_or_low_interval_->first_use_ = first_use_->Dup(allocator_); } + + if (first_env_use_ != nullptr) { + high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_); + } } // Returns whether an interval, when it is non-split, is using @@ -851,6 +874,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { first_safepoint_(nullptr), last_safepoint_(nullptr), first_use_(nullptr), + first_env_use_(nullptr), type_(type), next_sibling_(nullptr), parent_(this), @@ -888,6 +912,107 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return range; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + + bool IsDefiningPosition(size_t position) const { + return IsParent() && (position == GetStart()); + } + + bool HasSynthesizeUseAt(size_t position) const { + UsePosition* use = first_use_; + while (use != nullptr) { + size_t use_position = use->GetPosition(); + if ((use_position == position) && use->IsSynthesized()) { + return true; + } + if (use_position > position) break; + use = use->GetNext(); + } + return false; + } + + void AddBackEdgeUses(const HBasicBlock& block_at_use) { + DCHECK(block_at_use.IsInLoop()); + // Add synthesized uses at the back edge of loops to help the register allocator. + // Note that this method is called in decreasing liveness order, to faciliate adding + // uses at the head of the `first_use_` linked list. Because below + // we iterate from inner-most to outer-most, which is in increasing liveness order, + // we need to take extra care of how the `first_use_` linked list is being updated. + UsePosition* first_in_new_list = nullptr; + UsePosition* last_in_new_list = nullptr; + for (HLoopInformationOutwardIterator it(block_at_use); + !it.Done(); + it.Advance()) { + HLoopInformation* current = it.Current(); + if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) { + // This interval is defined in the loop. We can stop going outward. + break; + } + + // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // all back edges is not necessary: anything used in the loop will have its use at the + // last back edge. If we want branches in a loop to have better register allocation than + // another branch, then it is the linear order we should change. + size_t back_edge_use_position = current->GetLifetimeEnd(); + if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + // There was a use already seen in this loop. Therefore the previous call to `AddUse` + // already inserted the backedge use. We can stop going outward. + DCHECK(HasSynthesizeUseAt(back_edge_use_position)); + break; + } + + DCHECK(last_in_new_list == nullptr + || back_edge_use_position > last_in_new_list->GetPosition()); + + UsePosition* new_use = new (allocator_) UsePosition( + /* user */ nullptr, + /* environment */ nullptr, + UsePosition::kNoInput, + back_edge_use_position, + /* next */ nullptr); + + if (last_in_new_list != nullptr) { + // Going outward. The latest created use needs to point to the new use. + last_in_new_list->SetNext(new_use); + } else { + // This is the inner-most loop. + DCHECK_EQ(current, block_at_use.GetLoopInformation()); + first_in_new_list = new_use; + } + last_in_new_list = new_use; + } + // Link the newly created linked list with `first_use_`. + if (last_in_new_list != nullptr) { + last_in_new_list->SetNext(first_use_); + first_use_ = first_in_new_list; + } + } + ArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test @@ -905,6 +1030,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Uses of this interval. Note that this linked list is shared amongst siblings. UsePosition* first_use_; + UsePosition* first_env_use_; // The instruction type this interval corresponds to. const Primitive::Type type_; @@ -999,14 +1125,18 @@ class SsaLivenessAnalysis : public ValueObject { } HBasicBlock* GetBlockFromPosition(size_t index) const { - HInstruction* instruction = GetInstructionFromPosition(index / 2); + HInstruction* instruction = GetInstructionFromPosition(index); if (instruction == nullptr) { // If we are at a block boundary, get the block following. - instruction = GetInstructionFromPosition((index / 2) + 1); + instruction = GetInstructionFromPosition(index + 1); } return instruction->GetBlock(); } + bool IsAtBlockBoundary(size_t index) const { + return GetInstructionFromPosition(index) == nullptr; + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 00c241b85a..fb3e7d798c 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [5]\n" - " 1: IntConstant 4 [14, 8, 8]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [5, 8, 8]\n" + " 2: IntConstant 5 [5]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 8, succ: 6, 3\n" - " 5: Phi(0, 14) [12, 6, 6]\n" + "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n" + " 5: Phi(0, 2, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" "BasicBlock 3, pred: 2, succ: 5, 4\n" " 8: Equal(1, 1) [9]\n" " 9: If(8)\n" - "BasicBlock 4, pred: 3, succ: 8\n" + "BasicBlock 4, pred: 3, succ: 2\n" " 10: Goto\n" - "BasicBlock 5, pred: 3, succ: 8\n" + "BasicBlock 5, pred: 3, succ: 2\n" " 11: Goto\n" "BasicBlock 6, pred: 2, succ: 7\n" " 12: Return(5)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" - // Synthesized single back edge of loop. - "BasicBlock 8, pred: 5, 4, succ: 2\n" - " 14: Phi(1, 2) [5]\n" - " 15: Goto\n"; + " 13: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index a5a0eb2114..5ca66a1de6 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); |