diff options
Diffstat (limited to 'compiler/optimizing')
70 files changed, 4758 insertions, 1337 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc index 6ebfb45074..8100a29f32 100644 --- a/compiler/optimizing/boolean_simplifier.cc +++ b/compiler/optimizing/boolean_simplifier.cc @@ -18,6 +18,26 @@ namespace art { +void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) { + DCHECK(block->EndsWithIf()); + + // Check if the condition is a Boolean negation. + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HInstruction* boolean_not = if_instruction->InputAt(0); + if (!boolean_not->IsBooleanNot()) { + return; + } + + // Make BooleanNot's input the condition of the If and swap branches. + if_instruction->ReplaceInput(boolean_not->InputAt(0), 0); + block->SwapSuccessors(); + + // Remove the BooleanNot if it is now unused. + if (!boolean_not->HasUses()) { + boolean_not->GetBlock()->RemoveInstruction(boolean_not); + } +} + // Returns true if 'block1' and 'block2' are empty, merge into the same single // successor and the successor can only be reached from them. static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { @@ -78,55 +98,69 @@ static HInstruction* GetOppositeCondition(HInstruction* cond) { } } +void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { + DCHECK(block->EndsWithIf()); + + // Find elements of the pattern. + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + if (!BlocksDoMergeTogether(true_block, false_block)) { + return; + } + HBasicBlock* merge_block = true_block->GetSuccessors().Get(0); + if (!merge_block->HasSinglePhi()) { + return; + } + HPhi* phi = merge_block->GetFirstPhi()->AsPhi(); + HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block)); + HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block)); + + // Check if the selection negates/preserves the value of the condition and + // if so, generate a suitable replacement instruction. + HInstruction* if_condition = if_instruction->InputAt(0); + HInstruction* replacement; + if (NegatesCondition(true_value, false_value)) { + replacement = GetOppositeCondition(if_condition); + if (replacement->GetBlock() == nullptr) { + block->InsertInstructionBefore(replacement, if_instruction); + } + } else if (PreservesCondition(true_value, false_value)) { + replacement = if_condition; + } else { + return; + } + + // Replace the selection outcome with the new instruction. + phi->ReplaceWith(replacement); + merge_block->RemovePhi(phi); + + // Delete the true branch and merge the resulting chain of blocks + // 'block->false_block->merge_block' into one. + true_block->DisconnectAndDelete(); + block->MergeWith(false_block); + block->MergeWith(merge_block); + + // Remove the original condition if it is now unused. + if (!if_condition->HasUses()) { + if_condition->GetBlock()->RemoveInstructionOrPhi(if_condition); + } +} + void HBooleanSimplifier::Run() { // Iterate in post order in the unlikely case that removing one occurrence of - // the pattern empties a branch block of another occurrence. Otherwise the - // order does not matter. + // the selection pattern empties a branch block of another occurrence. + // Otherwise the order does not matter. for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (!block->EndsWithIf()) continue; - // Find elements of the pattern. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); - HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); - if (!BlocksDoMergeTogether(true_block, false_block)) { - continue; - } - HBasicBlock* merge_block = true_block->GetSuccessors().Get(0); - if (!merge_block->HasSinglePhi()) { - continue; - } - HPhi* phi = merge_block->GetFirstPhi()->AsPhi(); - HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block)); - HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block)); - - // Check if the selection negates/preserves the value of the condition and - // if so, generate a suitable replacement instruction. - HInstruction* if_condition = if_instruction->InputAt(0); - HInstruction* replacement; - if (NegatesCondition(true_value, false_value)) { - replacement = GetOppositeCondition(if_condition); - if (replacement->GetBlock() == nullptr) { - block->InsertInstructionBefore(replacement, if_instruction); - } - } else if (PreservesCondition(true_value, false_value)) { - replacement = if_condition; - } else { - continue; - } + // If condition is negated, remove the negation and swap the branches. + TryRemovingNegatedCondition(block); - // Replace the selection outcome with the new instruction. - phi->ReplaceWith(replacement); - merge_block->RemovePhi(phi); - - // Link the start/end blocks and remove empty branches. - graph_->MergeEmptyBranches(block, merge_block); - - // Remove the original condition if it is now unused. - if (!if_condition->HasUses()) { - if_condition->GetBlock()->RemoveInstruction(if_condition); - } + // If this is a boolean-selection diamond pattern, replace its result with + // the condition value (or its negation) and simplify the graph. + TryRemovingBooleanSelection(block); } } diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h index a88733e1af..733ebaac2c 100644 --- a/compiler/optimizing/boolean_simplifier.h +++ b/compiler/optimizing/boolean_simplifier.h @@ -14,11 +14,15 @@ * limitations under the License. */ -// This optimization recognizes a common pattern where a boolean value is -// either cast to an integer or negated by selecting from zero/one integer -// constants with an If statement. Because boolean values are internally -// represented as zero/one, we can safely replace the pattern with a suitable -// condition instruction. +// This optimization recognizes two common patterns: +// (a) Boolean selection: Casting a boolean to an integer or negating it is +// carried out with an If statement selecting from zero/one integer +// constants. Because Boolean values are represented as zero/one, the +// pattern can be replaced with the condition instruction itself or its +// negation, depending on the layout. +// (b) Negated condition: Instruction simplifier may replace an If's condition +// with a boolean value. If this value is the result of a Boolean negation, +// the true/false branches can be swapped and negation removed. // Example: Negating a boolean value // B1: @@ -66,6 +70,9 @@ class HBooleanSimplifier : public HOptimization { static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier"; private: + void TryRemovingNegatedCondition(HBasicBlock* block); + void TryRemovingBooleanSelection(HBasicBlock* block); + DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier); }; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 6511120794..b2b54965b5 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -246,6 +246,148 @@ class ValueBound : public ValueObject { int32_t constant_; }; +// Collect array access data for a loop. +// TODO: make it work for multiple arrays inside the loop. +class ArrayAccessInsideLoopFinder : public ValueObject { + public: + explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable) + : induction_variable_(induction_variable), + found_array_length_(nullptr), + offset_low_(INT_MAX), + offset_high_(INT_MIN) { + Run(); + } + + HArrayLength* GetFoundArrayLength() const { return found_array_length_; } + bool HasFoundArrayLength() const { return found_array_length_ != nullptr; } + int32_t GetOffsetLow() const { return offset_low_; } + int32_t GetOffsetHigh() const { return offset_high_; } + + // Returns if `block` that is in loop_info may exit the loop, unless it's + // the loop header for loop_info. + static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) { + DCHECK(loop_info->Contains(*block)); + if (block == loop_info->GetHeader()) { + // Loop header of loop_info. Exiting loop is normal. + return false; + } + const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + for (size_t i = 0; i < successors.Size(); i++) { + if (!loop_info->Contains(*successors.Get(i))) { + // One of the successors exits the loop. + return true; + } + } + return false; + } + + static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) { + for (size_t i = 0, e = loop_info->GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = loop_info->GetBackEdges().Get(i); + if (!block->Dominates(back_edge)) { + return false; + } + } + return true; + } + + void Run() { + HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); + for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { + HBasicBlock* block = it_loop.Current(); + DCHECK(block->IsInLoop()); + if (!DominatesAllBackEdges(block, loop_info)) { + // In order not to trigger deoptimization unnecessarily, make sure + // that all array accesses collected are really executed in the loop. + // For array accesses in a branch inside the loop, don't collect the + // access. The bounds check in that branch might not be eliminated. + continue; + } + if (EarlyExit(block, loop_info)) { + // If the loop body can exit loop (like break, return, etc.), it's not guaranteed + // that the loop will loop through the full monotonic value range from + // initial_ to end_. So adding deoptimization might be too aggressive and can + // trigger deoptimization unnecessarily even if the loop won't actually throw + // AIOOBE. Otherwise, the loop induction variable is going to cover the full + // monotonic value range from initial_ to end_, and deoptimizations are added + // iff the loop will throw AIOOBE. + found_array_length_ = nullptr; + return; + } + for (HInstruction* instruction = block->GetFirstInstruction(); + instruction != nullptr; + instruction = instruction->GetNext()) { + if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + continue; + } + HInstruction* index = instruction->InputAt(1); + if (!index->IsBoundsCheck()) { + continue; + } + + HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); + if (array_length == nullptr) { + DCHECK(index->InputAt(1)->IsIntConstant()); + // TODO: may optimize for constant case. + continue; + } + + HInstruction* array = array_length->InputAt(0); + if (array->IsNullCheck()) { + array = array->AsNullCheck()->InputAt(0); + } + if (loop_info->Contains(*array->GetBlock())) { + // Array is defined inside the loop. Skip. + continue; + } + + if (found_array_length_ != nullptr && found_array_length_ != array_length) { + // There is already access for another array recorded for the loop. + // TODO: handle multiple arrays. + continue; + } + + index = index->AsBoundsCheck()->InputAt(0); + HInstruction* left = index; + int32_t right = 0; + if (left == induction_variable_ || + (ValueBound::IsAddOrSubAConstant(index, &left, &right) && + left == induction_variable_)) { + // For patterns like array[i] or array[i + 2]. + if (right < offset_low_) { + offset_low_ = right; + } + if (right > offset_high_) { + offset_high_ = right; + } + } else { + // Access not in induction_variable/(induction_variable_ + constant) + // format. Skip. + continue; + } + // Record this array. + found_array_length_ = array_length; + } + } + } + + private: + // The instruction that corresponds to a MonotonicValueRange. + HInstruction* induction_variable_; + + // The array length of the array that's accessed inside the loop. + HArrayLength* found_array_length_; + + // The lowest and highest constant offsets relative to induction variable + // instruction_ in all array accesses. + // If array access are: array[i-1], array[i], array[i+1], + // offset_low_ is -1 and offset_high is 1. + int32_t offset_low_; + int32_t offset_high_; + + DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder); +}; + /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: @@ -332,21 +474,31 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { class MonotonicValueRange : public ValueRange { public: MonotonicValueRange(ArenaAllocator* allocator, + HPhi* induction_variable, HInstruction* initial, int32_t increment, ValueBound bound) // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's // used as a regular value range, due to possible overflow/underflow. : ValueRange(allocator, ValueBound::Min(), ValueBound::Max()), + induction_variable_(induction_variable), initial_(initial), + end_(nullptr), + inclusive_(false), increment_(increment), bound_(bound) {} virtual ~MonotonicValueRange() {} + HInstruction* GetInductionVariable() const { return induction_variable_; } int32_t GetIncrement() const { return increment_; } - ValueBound GetBound() const { return bound_; } + void SetEnd(HInstruction* end) { end_ = end; } + void SetInclusive(bool inclusive) { inclusive_ = inclusive; } + HBasicBlock* GetLoopHead() const { + DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); + return induction_variable_->GetBlock(); + } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } @@ -371,6 +523,10 @@ class MonotonicValueRange : public ValueRange { if (increment_ > 0) { // Monotonically increasing. ValueBound lower = ValueBound::NarrowLowerBound(bound_, range->GetLower()); + if (!lower.IsConstant() || lower.GetConstant() == INT_MIN) { + // Lower bound isn't useful. Leave it to deoptimization. + return this; + } // We currently conservatively assume max array length is INT_MAX. If we can // make assumptions about the max array length, e.g. due to the max heap size, @@ -417,6 +573,11 @@ class MonotonicValueRange : public ValueRange { DCHECK_NE(increment_, 0); // Monotonically decreasing. ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); + if ((!upper.IsConstant() || upper.GetConstant() == INT_MAX) && + !upper.IsRelatedToArrayLength()) { + // Upper bound isn't useful. Leave it to deoptimization. + return this; + } // Need to take care of underflow. Try to prove underflow won't happen // for common cases. @@ -432,10 +593,217 @@ class MonotonicValueRange : public ValueRange { } } + // Returns true if adding a (constant >= value) check for deoptimization + // is allowed and will benefit compiled code. + bool CanAddDeoptimizationConstant(HInstruction* value, + int32_t constant, + bool* is_proven) { + *is_proven = false; + // See if we can prove the relationship first. + if (value->IsIntConstant()) { + if (value->AsIntConstant()->GetValue() >= constant) { + // Already true. + *is_proven = true; + return true; + } else { + // May throw exception. Don't add deoptimization. + // Keep bounds checks in the loops. + return false; + } + } + // Can benefit from deoptimization. + return true; + } + + // Adds a check that (value >= constant), and HDeoptimize otherwise. + void AddDeoptimizationConstant(HInstruction* value, + int32_t constant) { + HBasicBlock* block = induction_variable_->GetBlock(); + DCHECK(block->IsLoopHeader()); + HGraph* graph = block->GetGraph(); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + HIntConstant* const_instr = graph->GetIntConstant(constant); + HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); + HDeoptimize* deoptimize = new (graph->GetArena()) + HDeoptimize(cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + + // Returns true if adding a (value <= array_length + offset) check for deoptimization + // is allowed and will benefit compiled code. + bool CanAddDeoptimizationArrayLength(HInstruction* value, + HArrayLength* array_length, + int32_t offset, + bool* is_proven) { + *is_proven = false; + if (offset > 0) { + // There might be overflow issue. + // TODO: handle this, possibly with some distance relationship between + // offset_low and offset_high, or using another deoptimization to make + // sure (array_length + offset) doesn't overflow. + return false; + } + + // See if we can prove the relationship first. + if (value == array_length) { + if (offset >= 0) { + // Already true. + *is_proven = true; + return true; + } else { + // May throw exception. Don't add deoptimization. + // Keep bounds checks in the loops. + return false; + } + } + // Can benefit from deoptimization. + return true; + } + + // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. + void AddDeoptimizationArrayLength(HInstruction* value, + HArrayLength* array_length, + int32_t offset) { + HBasicBlock* block = induction_variable_->GetBlock(); + DCHECK(block->IsLoopHeader()); + HGraph* graph = block->GetGraph(); + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + + // We may need to hoist null-check and array_length out of loop first. + if (!array_length->GetBlock()->Dominates(pre_header)) { + HInstruction* array = array_length->InputAt(0); + HNullCheck* null_check = array->AsNullCheck(); + if (null_check != nullptr) { + array = null_check->InputAt(0); + } + // We've already made sure array is defined before the loop when collecting + // array accesses for the loop. + DCHECK(array->GetBlock()->Dominates(pre_header)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + // Hoist null check out of loop with a deoptimization. + HNullConstant* null_constant = graph->GetNullConstant(); + HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); + // TODO: for one dex_pc, share the same deoptimization slow path. + HDeoptimize* null_check_deoptimize = new (graph->GetArena()) + HDeoptimize(null_check_cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore( + null_check_deoptimize, pre_header->GetLastInstruction()); + // Eliminate null check in the loop. + null_check->ReplaceWith(array); + null_check->GetBlock()->RemoveInstruction(null_check); + null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + // Hoist array_length out of loop. + array_length->MoveBefore(pre_header->GetLastInstruction()); + } + + HIntConstant* offset_instr = graph->GetIntConstant(offset); + HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); + HDeoptimize* deoptimize = new (graph->GetArena()) + HDeoptimize(cond, suspend_check->GetDexPc()); + pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); + pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend_check->GetEnvironment(), block); + } + + // Add deoptimizations in loop pre-header with the collected array access + // data so that value ranges can be established in loop body. + // Returns true if deoptimizations are successfully added, or if it's proven + // it's not necessary. + bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) { + int32_t offset_low = finder.GetOffsetLow(); + int32_t offset_high = finder.GetOffsetHigh(); + HArrayLength* array_length = finder.GetFoundArrayLength(); + + HBasicBlock* pre_header = + induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't move initial_ or end_ into pre_header for comparisons. + return false; + } + + bool is_constant_proven, is_length_proven; + if (increment_ == 1) { + // Increasing from initial_ to end_. + int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; + if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && + CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { + if (!is_constant_proven) { + AddDeoptimizationConstant(initial_, -offset_low); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(end_, array_length, offset); + } + return true; + } + } else if (increment_ == -1) { + // Decreasing from initial_ to end_. + int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; + if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && + CanAddDeoptimizationArrayLength( + initial_, array_length, -offset_high - 1, &is_length_proven)) { + if (!is_constant_proven) { + AddDeoptimizationConstant(end_, constant); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); + } + return true; + } + } + return false; + } + + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // TODO: possibly handle overflow/underflow issues with deoptimization. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + private: - HInstruction* const initial_; - const int32_t increment_; - ValueBound bound_; // Additional value bound info for initial_; + HPhi* const induction_variable_; // Induction variable for this monotonic value range. + HInstruction* const initial_; // Initial value. + HInstruction* end_; // End value. + bool inclusive_; // Whether end value is inclusive. + const int32_t increment_; // Increment for each loop iteration. + const ValueBound bound_; // Additional value bound info for initial_. DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); }; @@ -598,6 +966,20 @@ class BCEVisitor : public HGraphVisitor { // There should be no critical edge at this point. DCHECK_EQ(false_successor->GetPredecessors().Size(), 1u); + ValueRange* left_range = LookupValueRange(left, block); + MonotonicValueRange* left_monotonic_range = nullptr; + if (left_range != nullptr) { + left_monotonic_range = left_range->AsMonotonicValueRange(); + if (left_monotonic_range != nullptr) { + HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + if (instruction->GetBlock() != loop_head) { + // For monotonic value range, don't handle `instruction` + // if it's not defined in the loop header. + return; + } + } + } + bool found; ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found); // Each comparison can establish a lower bound and an upper bound @@ -610,7 +992,6 @@ class BCEVisitor : public HGraphVisitor { ValueRange* right_range = LookupValueRange(right, block); if (right_range != nullptr) { if (right_range->IsMonotonicValueRange()) { - ValueRange* left_range = LookupValueRange(left, block); if (left_range != nullptr && left_range->IsMonotonicValueRange()) { HandleIfBetweenTwoMonotonicValueRanges(instruction, left, right, cond, left_range->AsMonotonicValueRange(), @@ -628,6 +1009,17 @@ class BCEVisitor : public HGraphVisitor { bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { + if (left_monotonic_range != nullptr) { + // Update the info for monotonic value range. + if (left_monotonic_range->GetInductionVariable() == left && + left_monotonic_range->GetIncrement() < 0 && + block == left_monotonic_range->GetLoopHead() && + instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { + left_monotonic_range->SetEnd(right); + left_monotonic_range->SetInclusive(cond == kCondLT); + } + } + if (!upper.Equals(ValueBound::Max())) { int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); @@ -651,6 +1043,17 @@ class BCEVisitor : public HGraphVisitor { ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondGT || cond == kCondGE) { + if (left_monotonic_range != nullptr) { + // Update the info for monotonic value range. + if (left_monotonic_range->GetInductionVariable() == left && + left_monotonic_range->GetIncrement() > 0 && + block == left_monotonic_range->GetLoopHead() && + instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { + left_monotonic_range->SetEnd(right); + left_monotonic_range->SetInclusive(cond == kCondGT); + } + } + // array.length as a lower bound isn't considered useful. if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive @@ -755,9 +1158,26 @@ class BCEVisitor : public HGraphVisitor { bounds_check->GetBlock()->RemoveInstruction(bounds_check); } + static bool HasSameInputAtBackEdges(HPhi* phi) { + DCHECK(phi->IsLoopHeaderPhi()); + // Start with input 1. Input 0 is from the incoming block. + HInstruction* input1 = phi->InputAt(1); + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(1))); + for (size_t i = 2, e = phi->InputCount(); i < e; ++i) { + DCHECK(phi->GetBlock()->GetLoopInformation()->IsBackEdge( + *phi->GetBlock()->GetPredecessors().Get(i))); + if (input1 != phi->InputAt(i)) { + return false; + } + } + return true; + } + void VisitPhi(HPhi* phi) { - if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { - DCHECK_EQ(phi->InputCount(), 2U); + if (phi->IsLoopHeaderPhi() + && (phi->GetType() == Primitive::kPrimInt) + && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; int32_t increment; @@ -790,6 +1210,7 @@ class BCEVisitor : public HGraphVisitor { } range = new (GetGraph()->GetArena()) MonotonicValueRange( GetGraph()->GetArena(), + phi, initial_value, increment, bound); @@ -809,6 +1230,36 @@ class BCEVisitor : public HGraphVisitor { HInstruction* left = cond->GetLeft(); HInstruction* right = cond->GetRight(); HandleIf(instruction, left, right, cmp); + + HBasicBlock* block = instruction->GetBlock(); + ValueRange* left_range = LookupValueRange(left, block); + if (left_range == nullptr) { + return; + } + + if (left_range->IsMonotonicValueRange() && + block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + // The comparison is for an induction variable in the loop header. + DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); + HBasicBlock* loop_body_successor; + if (LIKELY(block->GetLoopInformation()-> + Contains(*instruction->IfFalseSuccessor()))) { + loop_body_successor = instruction->IfFalseSuccessor(); + } else { + loop_body_successor = instruction->IfTrueSuccessor(); + } + ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); + if (new_left_range == left_range) { + // We are not successful in narrowing the monotonic value range to + // a regular value range. Try using deoptimization. + new_left_range = left_range->AsMonotonicValueRange()-> + NarrowWithDeoptimization(); + if (new_left_range != left_range) { + GetValueRangeMap(instruction->IfFalseSuccessor())-> + Overwrite(left->GetId(), new_left_range); + } + } + } } } } @@ -1064,7 +1515,7 @@ class BCEVisitor : public HGraphVisitor { }; void BoundsCheckElimination::Run() { - if (!graph_->HasArrayAccesses()) { + if (!graph_->HasBoundsChecks()) { return; } diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 75cf1cf063..163458f75c 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -42,8 +42,8 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(&allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -147,8 +147,8 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(&allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -219,8 +219,8 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(&allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -291,8 +291,8 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(&allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -364,8 +364,8 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, int initial, int increment, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -501,8 +501,8 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, int initial, int increment = -1, IfCondition cond = kCondLE) { - HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -632,8 +632,8 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, int initial, int increment, IfCondition cond) { - HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -743,8 +743,8 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, HInstruction** bounds_check, int initial, IfCondition cond = kCondGE) { - HGraph* graph = new (allocator) HGraph(allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); @@ -868,8 +868,8 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); - graph->SetHasArrayAccesses(true); + HGraph* graph = CreateGraph(&allocator); + graph->SetHasBoundsChecks(true); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 818d671b5b..a5c6f23343 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -19,8 +19,10 @@ #include "art_field-inl.h" #include "base/logging.h" #include "class_linker.h" +#include "dex/verified_method.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" +#include "dex/verified_method.h" #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "mirror/class_loader.h" @@ -280,7 +282,10 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // To avoid splitting blocks, we compute ahead of time the instructions that // start a new block, and create these blocks. - ComputeBranchTargets(code_ptr, code_end, &number_of_branches); + if (!ComputeBranchTargets(code_ptr, code_end, &number_of_branches)) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledBranchOutsideMethodCode); + return false; + } // Note that the compiler driver is null when unit testing. if ((compiler_driver_ != nullptr) && SkipCompilation(code_item, number_of_branches)) { @@ -347,7 +352,7 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t index) { current_block_ = block; } -void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, +bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { branch_targets_.SetSize(code_end - code_ptr); @@ -372,7 +377,14 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, } dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + + if (code_ptr >= code_end) { + if (instruction.CanFlowThrough()) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + return false; + } + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -404,7 +416,12 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, // Fall-through. Add a block if there is more code afterwards. dex_pc += instruction.SizeInCodeUnits(); code_ptr += instruction.SizeInCodeUnits(); - if ((code_ptr < code_end) && (FindBlockStartingAt(dex_pc) == nullptr)) { + if (code_ptr >= code_end) { + // In the normal case we should never hit this but someone can artificially forge a dex + // file to fall-through out the method code. In this case we bail out compilation. + // (A switch can fall-through so we don't need to check CanFlowThrough().) + return false; + } else if (FindBlockStartingAt(dex_pc) == nullptr) { block = new (arena_) HBasicBlock(graph_, dex_pc); branch_targets_.Put(dex_pc, block); } @@ -413,6 +430,7 @@ void HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, dex_pc += instruction.SizeInCodeUnits(); } } + return true; } HBasicBlock* HGraphBuilder::FindBlockStartingAt(int32_t index) const { @@ -587,7 +605,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); bool is_instance_call = invoke_type != kStatic; - const size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); + size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); MethodReference target_method(dex_file_, method_idx); uintptr_t direct_code; @@ -605,7 +623,25 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } DCHECK(optimized_invoke_type != kSuper); + // By default, consider that the called method implicitly requires + // an initialization check of its declaring method. + HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement = + HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; + // Potential class initialization check, in the case of a static method call. + HClinitCheck* clinit_check = nullptr; + // Replace calls to String.<init> with StringFactory. + int32_t string_init_offset = 0; + bool is_string_init = compiler_driver_->IsStringInit(method_idx, dex_file_, &string_init_offset); + if (is_string_init) { + return_type = Primitive::kPrimNot; + is_instance_call = false; + number_of_arguments--; + invoke_type = kStatic; + optimized_invoke_type = kStatic; + } + HInvoke* invoke = nullptr; + if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); @@ -620,9 +656,76 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, bool is_recursive = (target_method.dex_method_index == dex_compilation_unit_->GetDexMethodIndex()); DCHECK(!is_recursive || (target_method.dex_file == dex_compilation_unit_->GetDexFile())); + + if (optimized_invoke_type == kStatic) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<4> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache(hs.NewHandle( + dex_compilation_unit_->GetClassLinker()->FindDexCache( + *dex_compilation_unit_->GetDexFile()))); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader()))); + mirror::ArtMethod* resolved_method = compiler_driver_->ResolveMethod( + soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, + optimized_invoke_type); + + if (resolved_method == nullptr) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); + return false; + } + + const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile(); + Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle( + outer_compilation_unit_->GetClassLinker()->FindDexCache(outer_dex_file))); + Handle<mirror::Class> referrer_class(hs.NewHandle(GetOutermostCompilingClass())); + + // The index at which the method's class is stored in the DexCache's type array. + uint32_t storage_index = DexFile::kDexNoIndex; + bool is_referrer_class = (resolved_method->GetDeclaringClass() == referrer_class.Get()); + if (is_referrer_class) { + storage_index = referrer_class->GetDexTypeIndex(); + } else if (outer_dex_cache.Get() == dex_cache.Get()) { + // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer. + compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(), + referrer_class.Get(), + resolved_method, + method_idx, + &storage_index); + } + + if (referrer_class.Get()->IsSubClass(resolved_method->GetDeclaringClass())) { + // If the referrer class is the declaring class or a subclass + // of the declaring class, no class initialization is needed + // before the static method call. + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; + } else if (storage_index != DexFile::kDexNoIndex) { + // If the method's class type index is available, check + // whether we should add an explicit class initialization + // check for its declaring class before the static method call. + + // TODO: find out why this check is needed. + bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache( + *outer_compilation_unit_->GetDexFile(), storage_index); + bool is_initialized = + resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache; + + if (is_initialized) { + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone; + } else { + clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; + HLoadClass* load_class = + new (arena_) HLoadClass(storage_index, is_referrer_class, dex_pc); + current_block_->AddInstruction(load_class); + clinit_check = new (arena_) HClinitCheck(load_class, dex_pc); + current_block_->AddInstruction(clinit_check); + } + } + } + invoke = new (arena_) HInvokeStaticOrDirect( arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, invoke_type, optimized_invoke_type); + is_recursive, string_init_offset, invoke_type, optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -638,6 +741,9 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, uint32_t descriptor_index = 1; uint32_t argument_index = start_index; + if (is_string_init) { + start_index = 1; + } for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); @@ -654,10 +760,38 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, i++; } } - DCHECK_EQ(argument_index, number_of_arguments); + + if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { + // Add the class initialization check as last input of `invoke`. + DCHECK(clinit_check != nullptr); + invoke->SetArgumentAt(argument_index, clinit_check); + } + current_block_->AddInstruction(invoke); latest_result_ = invoke; + + // Add move-result for StringFactory method. + if (is_string_init) { + uint32_t orig_this_reg = is_range ? register_index : args[0]; + const VerifiedMethod* verified_method = + compiler_driver_->GetVerifiedMethod(dex_file_, dex_compilation_unit_->GetDexMethodIndex()); + if (verified_method == nullptr) { + LOG(WARNING) << "No verified method for method calling String.<init>: " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_); + return false; + } + const SafeMap<uint32_t, std::set<uint32_t>>& string_init_map = + verified_method->GetStringInitPcRegMap(); + auto map_it = string_init_map.find(dex_pc); + if (map_it != string_init_map.end()) { + std::set<uint32_t> reg_set = map_it->second; + for (auto set_it = reg_set.begin(); set_it != reg_set.end(); ++set_it) { + UpdateLocal(*set_it, invoke); + } + } + UpdateLocal(orig_this_reg, invoke); + } return true; } @@ -732,7 +866,6 @@ bool HGraphBuilder::IsOutermostCompilingClass(uint16_t type_index) const { return compiling_class.Get() == cls.Get(); } - bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { @@ -764,7 +897,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction, if (is_referrer_class) { storage_index = referrer_class->GetDexTypeIndex(); } else if (outer_dex_cache.Get() != dex_cache.Get()) { - // The compiler driver cannot currently understand multple dex caches involved. Just bailout. + // The compiler driver cannot currently understand multiple dex caches involved. Just bailout. return false; } else { std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField( @@ -882,7 +1015,7 @@ void HGraphBuilder::BuildArrayAccess(const Instruction& instruction, current_block_->AddInstruction(new (arena_) HArrayGet(object, index, anticipated_type)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } - graph_->SetHasArrayAccesses(true); + graph_->SetHasBoundsChecks(true); } void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, @@ -984,6 +1117,7 @@ void HGraphBuilder::BuildFillArrayData(const Instruction& instruction, uint32_t default: LOG(FATAL) << "Unknown element width for " << payload->element_width; } + graph_->SetHasBoundsChecks(true); } void HGraphBuilder::BuildFillWideArrayData(HInstruction* object, @@ -1834,12 +1968,19 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 case Instruction::NEW_INSTANCE: { uint16_t type_index = instruction.VRegB_21c(); - QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) - ? kQuickAllocObjectWithAccessCheck - : kQuickAllocObject; - - current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint)); - UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + if (compiler_driver_->IsStringTypeIndex(type_index, dex_file_)) { + // Turn new-instance of string into a const 0. + int32_t register_index = instruction.VRegA(); + HNullConstant* constant = graph_->GetNullConstant(); + UpdateLocal(register_index, constant); + } else { + QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) + ? kQuickAllocObjectWithAccessCheck + : kQuickAllocObject; + + current_block_->AddInstruction(new (arena_) HNewInstance(dex_pc, type_index, entrypoint)); + UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); + } break; } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index dc6d97eb0c..36503ce43a 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -88,7 +88,10 @@ class HGraphBuilder : public ValueObject { // the newly created blocks. // As a side effect, also compute the number of dex instructions, blocks, and // branches. - void ComputeBranchTargets(const uint16_t* start, + // Returns true if all the branches fall inside the method code, false otherwise. + // (In normal cases this should always return true but someone can artificially + // create a code unit in which branches fall-through out of it). + bool ComputeBranchTargets(const uint16_t* start, const uint16_t* end, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t index); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index b14b69ba39..4805ceed20 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -100,11 +100,11 @@ static bool CheckTypeConsistency(HInstruction* instruction) { for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { if (environment->GetInstructionAt(i) != nullptr) { Primitive::Type type = environment->GetInstructionAt(i)->GetType(); - DCHECK(CheckType(type, locations->GetEnvironmentAt(i))) - << type << " " << locations->GetEnvironmentAt(i); + DCHECK(CheckType(type, environment->GetLocationAt(i))) + << type << " " << environment->GetLocationAt(i); } else { - DCHECK(locations->GetEnvironmentAt(i).IsInvalid()) - << locations->GetEnvironmentAt(i); + DCHECK(environment->GetLocationAt(i).IsInvalid()) + << environment->GetLocationAt(i); } } return true; @@ -153,6 +153,7 @@ HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const { } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { + is_baseline_ = is_baseline; HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); @@ -612,7 +613,7 @@ void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const { } void CodeGenerator::BuildStackMaps(std::vector<uint8_t>* data) { - uint32_t size = stack_map_stream_.ComputeNeededSize(); + uint32_t size = stack_map_stream_.PrepareForFillIn(); data->resize(size); MemoryRegion region(data->data(), size); stack_map_stream_.FillIn(region); @@ -644,22 +645,34 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } + uint32_t outer_dex_pc = dex_pc; + uint32_t outer_environment_size = 0; + uint32_t inlining_depth = 0; + if (instruction != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + outer_dex_pc = environment->GetDexPc(); + outer_environment_size = environment->Size(); + if (environment != instruction->GetEnvironment()) { + inlining_depth++; + } + } + } + // Collect PC infos for the mapping table. struct PcInfo pc_info; - pc_info.dex_pc = dex_pc; + pc_info.dex_pc = outer_dex_pc; pc_info.native_pc = GetAssembler()->CodeSize(); pc_infos_.Add(pc_info); - uint32_t inlining_depth = 0; - if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.AddStackMapEntry(dex_pc, pc_info.native_pc, 0, 0, 0, inlining_depth); + stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.EndStackMapEntry(); return; } LocationSummary* locations = instruction->GetLocations(); - HEnvironment* environment = instruction->GetEnvironment(); - size_t environment_size = instruction->EnvironmentSize(); uint32_t register_mask = locations->GetRegisterMask(); if (locations->OnlyCallsOnSlowPath()) { @@ -672,63 +685,80 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.AddStackMapEntry(dex_pc, - pc_info.native_pc, - register_mask, - locations->GetStackMask(), - environment_size, - inlining_depth); + stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, + pc_info.native_pc, + register_mask, + locations->GetStackMask(), + outer_environment_size, + inlining_depth); + + EmitEnvironment(instruction->GetEnvironment(), slow_path); + stack_map_stream_.EndStackMapEntry(); +} + +void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) { + if (environment == nullptr) return; + + if (environment->GetParent() != nullptr) { + // We emit the parent environment first. + EmitEnvironment(environment->GetParent(), slow_path); + stack_map_stream_.BeginInlineInfoEntry( + environment->GetMethodIdx(), environment->GetDexPc(), environment->Size()); + } // Walk over the environment, and record the location of dex registers. - for (size_t i = 0; i < environment_size; ++i) { + for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* current = environment->GetInstructionAt(i); if (current == nullptr) { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); continue; } - Location location = locations->GetEnvironmentAt(i); + Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); stack_map_stream_.AddDexRegisterEntry( - i, DexRegisterLocation::Kind::kConstant, Low32Bits(value)); + DexRegisterLocation::Kind::kConstant, Low32Bits(value)); stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value)); + DexRegisterLocation::Kind::kConstant, High32Bits(value)); + ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue()); stack_map_stream_.AddDexRegisterEntry( - i, DexRegisterLocation::Kind::kConstant, Low32Bits(value)); + DexRegisterLocation::Kind::kConstant, Low32Bits(value)); stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kConstant, High32Bits(value)); + DexRegisterLocation::Kind::kConstant, High32Bits(value)); + ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, 0); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kConstant, value); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } break; } case Location::kStackSlot: { stack_map_stream_.AddDexRegisterEntry( - i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { stack_map_stream_.AddDexRegisterEntry( - i, DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); + DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); + ++i; DCHECK_LT(i, environment_size); break; } @@ -737,16 +767,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); if (current->GetType() == Primitive::kPrimLong) { stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); if (current->GetType() == Primitive::kPrimLong) { - stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); + ++i; DCHECK_LT(i, environment_size); } } @@ -757,17 +789,18 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); if (current->GetType() == Primitive::kPrimDouble) { stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); if (current->GetType() == Primitive::kPrimDouble) { - stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInFpuRegister, id); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); + ++i; DCHECK_LT(i, environment_size); } } @@ -779,16 +812,17 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInFpuRegister, low); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); } if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); - stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + ++i; } else { - stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInFpuRegister, high); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); + ++i; } DCHECK_LT(i, environment_size); break; @@ -799,23 +833,23 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kInRegister, low); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); } if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); - stack_map_stream_.AddDexRegisterEntry(++i, DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry( - ++i, DexRegisterLocation::Kind::kInRegister, high); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); } + ++i; DCHECK_LT(i, environment_size); break; } case Location::kInvalid: { - stack_map_stream_.AddDexRegisterEntry(i, DexRegisterLocation::Kind::kNone, 0); + stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); break; } @@ -823,6 +857,10 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, LOG(FATAL) << "Unexpected kind " << location.GetKind(); } } + + if (environment->GetParent() != nullptr) { + stack_map_stream_.EndInlineInfoEntry(); + } } bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 9b3cf8a45f..740beabc5d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -34,10 +34,15 @@ static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); // Binary encoding of 2^31 for type double. static int64_t constexpr k2Pow31EncodingForDouble = INT64_C(0x41E0000000000000); +// Minimum value for a primitive integer. +static int32_t constexpr kPrimIntMin = 0x80000000; +// Minimum value for a primitive long. +static int64_t constexpr kPrimLongMin = INT64_C(0x8000000000000000); + // Maximum value for a primitive integer. static int32_t constexpr kPrimIntMax = 0x7fffffff; // Maximum value for a primitive long. -static int64_t constexpr kPrimLongMax = 0x7fffffffffffffff; +static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); class Assembler; class CodeGenerator; @@ -107,6 +112,25 @@ class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { DISALLOW_COPY_AND_ASSIGN(SlowPathCode); }; +class InvokeDexCallingConventionVisitor { + public: + virtual Location GetNextLocation(Primitive::Type type) = 0; + + protected: + InvokeDexCallingConventionVisitor() {} + virtual ~InvokeDexCallingConventionVisitor() {} + + // The current index for core registers. + uint32_t gp_index_ = 0u; + // The current index for floating-point registers. + uint32_t float_index_ = 0u; + // The current stack index. + uint32_t stack_index_ = 0u; + + private: + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); +}; + class CodeGenerator { public: // Compiles the graph to executable instructions. Returns whether the compilation @@ -214,6 +238,10 @@ class CodeGenerator { std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; void BuildStackMaps(std::vector<uint8_t>* vector); + bool IsBaseline() const { + return is_baseline_; + } + bool IsLeafMethod() const { return is_leaf_; } @@ -306,6 +334,7 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + virtual ParallelMoveResolver* GetMoveResolver() = 0; protected: CodeGenerator(HGraph* graph, @@ -327,6 +356,7 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + is_baseline_(false), graph_(graph), compiler_options_(compiler_options), pc_infos_(graph->GetArena(), 32), @@ -348,7 +378,6 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; - virtual ParallelMoveResolver* GetMoveResolver() = 0; virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -406,11 +435,15 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + // Whether we are using baseline. + bool is_baseline_; + private: void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); void BlockIfInRegister(Location location, bool is_out = false) const; + void EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path); HGraph* const graph_; const CompilerOptions& compiler_options_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ae1fb537bb..672e55ea0b 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -17,6 +17,7 @@ #include "code_generator_arm.h" #include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" @@ -112,6 +113,10 @@ class SuspendCheckSlowPathARM : public SlowPathCodeARM { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -176,7 +181,6 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { InvokeRuntimeCallingConvention calling_convention; __ LoadImmediate(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex()); - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); @@ -222,7 +226,6 @@ class LoadStringSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - arm_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex()); arm_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); @@ -345,11 +348,11 @@ inline Condition ARMOppositeCondition(IfCondition cond) { } void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << ArmManagedRegister::FromCoreRegister(Register(reg)); + stream << Register(reg); } void CodeGeneratorARM::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << ArmManagedRegister::FromSRegister(SRegister(reg)); + stream << SRegister(reg); } size_t CodeGeneratorARM::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { @@ -607,7 +610,7 @@ Location CodeGeneratorARM::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -682,7 +685,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type return Location(); } -Location InvokeDexCallingConventionVisitor::GetReturnLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1243,6 +1246,10 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), codegen_->GetInstructionSetFeatures()); if (intrinsic.TryDispatch(invoke)) { @@ -1267,6 +1274,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) } void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -1282,8 +1293,8 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(R0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -2175,11 +2186,134 @@ void InstructionCodeGeneratorARM::VisitMul(HMul* mul) { } } +void InstructionCodeGeneratorARM::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ LoadImmediate(out, 0); + } else { + if (imm == 1) { + __ Mov(out, dividend); + } else { + __ rsb(out, dividend, ShifterOperand(0)); + } + } +} + +void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + int32_t abs_imm = std::abs(imm); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (ctz_imm == 1) { + __ Lsr(temp, dividend, 32 - ctz_imm); + } else { + __ Asr(temp, dividend, 31); + __ Lsr(temp, temp, 32 - ctz_imm); + } + __ add(out, temp, ShifterOperand(dividend)); + + if (instruction->IsDiv()) { + __ Asr(out, out, ctz_imm); + if (imm < 0) { + __ rsb(out, out, ShifterOperand(0)); + } + } else { + __ ubfx(out, out, 0, ctz_imm); + __ sub(out, out, ShifterOperand(temp)); + } +} + +void InstructionCodeGeneratorARM::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + int64_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + + __ LoadImmediate(temp1, magic); + __ smull(temp2, temp1, dividend, temp1); + + if (imm > 0 && magic < 0) { + __ add(temp1, temp1, ShifterOperand(dividend)); + } else if (imm < 0 && magic > 0) { + __ sub(temp1, temp1, ShifterOperand(dividend)); + } + + if (shift != 0) { + __ Asr(temp1, temp1, shift); + } + + if (instruction->IsDiv()) { + __ sub(out, temp1, ShifterOperand(temp1, ASR, 31)); + } else { + __ sub(temp1, temp1, ShifterOperand(temp1, ASR, 31)); + // TODO: Strength reduction for mls. + __ LoadImmediate(temp2, imm); + __ mls(out, temp1, temp2, dividend); + } +} + +void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } +} + void LocationsBuilderARM::VisitDiv(HDiv* div) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; if (div->GetResultType() == Primitive::kPrimLong) { // pLdiv runtime call. call_kind = LocationSummary::kCall; + } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) { + // sdiv will be replaced by other instruction sequence. } else if (div->GetResultType() == Primitive::kPrimInt && !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { // pIdivmod runtime call. @@ -2190,7 +2324,20 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { - if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + if (div->InputAt(1)->IsConstant()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue()); + if (abs_imm <= 1) { + // No temp register required. + } else { + locations->AddTemp(Location::RequiresRegister()); + if (!IsPowerOfTwo(abs_imm)) { + locations->AddTemp(Location::RequiresRegister()); + } + } + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2234,7 +2381,9 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) { switch (div->GetResultType()) { case Primitive::kPrimInt: { - if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + if (second.IsConstant()) { + GenerateDivRemConstantIntegral(div); + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { __ sdiv(out.AsRegister<Register>(), first.AsRegister<Register>(), second.AsRegister<Register>()); @@ -2286,8 +2435,11 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { // Most remainders are implemented in the runtime. LocationSummary::CallKind call_kind = LocationSummary::kCall; - if (rem->GetResultType() == Primitive::kPrimInt && - codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) { + // sdiv will be replaced by other instruction sequence. + call_kind = LocationSummary::kNoCall; + } else if ((rem->GetResultType() == Primitive::kPrimInt) + && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { // Have hardware divide instruction for int, do it with three instructions. call_kind = LocationSummary::kNoCall; } @@ -2296,7 +2448,20 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: { - if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + if (rem->InputAt(1)->IsConstant()) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue()); + if (abs_imm <= 1) { + // No temp register required. + } else { + locations->AddTemp(Location::RequiresRegister()); + if (!IsPowerOfTwo(abs_imm)) { + locations->AddTemp(Location::RequiresRegister()); + } + } + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2353,7 +2518,9 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) { Primitive::Type type = rem->GetResultType(); switch (type) { case Primitive::kPrimInt: { - if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { + if (second.IsConstant()) { + GenerateDivRemConstantIntegral(rem); + } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { Register reg1 = first.AsRegister<Register>(); Register reg2 = second.AsRegister<Register>(); Register temp = locations->GetTemp(0).AsRegister<Register>(); @@ -3533,8 +3700,18 @@ void InstructionCodeGeneratorARM::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset( kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value()); @@ -4061,15 +4238,9 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset()); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( @@ -4077,7 +4248,24 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // LR() __ blx(LR); } else { - __ bl(GetFrameEntryLabel()); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // temp = temp[index_in_cache] + __ LoadFromOffset( + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = temp[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); + } else { + __ bl(GetFrameEntryLabel()); + } } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 600903621d..2edbcf8ad7 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -78,22 +78,19 @@ class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() - : gp_index_(0), float_index_(0), double_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM() {} + virtual ~InvokeDexCallingConventionVisitorARM() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type type); private: InvokeDexCallingConvention calling_convention; - uint32_t gp_index_; - uint32_t float_index_; - uint32_t double_index_; - uint32_t stack_index_; + uint32_t double_index_ = 0; - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM); }; class ParallelMoveResolverARM : public ParallelMoveResolverWithSwap { @@ -151,7 +148,7 @@ class LocationsBuilderARM : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorARM* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; @@ -192,6 +189,10 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { Label* true_target, Label* false_target, Label* always_true_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); ArmAssembler* const assembler_; CodeGeneratorARM* const codegen_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7e9cdac394..34720e258a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -17,6 +17,7 @@ #include "code_generator_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "code_generator_utils.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" @@ -256,14 +257,13 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { InvokeRuntimeCallingConvention calling_convention; __ Mov(calling_convention.GetRegisterAt(0).W(), cls_->GetTypeIndex()); - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); int32_t entry_point_offset = do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage) : QUICK_ENTRY_POINT(pInitializeType); arm64_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this); if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } // Move the class to the desired location. @@ -308,11 +308,10 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - arm64_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1).W()); __ Mov(calling_convention.GetRegisterAt(0).W(), instruction_->GetStringIndex()); arm64_codegen->InvokeRuntime( QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickResolveString, void*, uint32_t, mirror::ArtMethod*>(); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); @@ -370,6 +369,10 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; // If not null, the block to branch to after the suspend check. @@ -457,15 +460,15 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { #undef __ -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { Location next_location; if (type == Primitive::kPrimVoid) { LOG(FATAL) << "Unreachable type " << type; } if (Primitive::IsFloatingPointType(type) && - (fp_index_ < calling_convention.GetNumberOfFpuRegisters())) { - next_location = LocationFrom(calling_convention.GetFpuRegisterAt(fp_index_++)); + (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { + next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); } else if (!Primitive::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); @@ -785,11 +788,11 @@ size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index, uint } void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << Arm64ManagedRegister::FromXRegister(XRegister(reg)); + stream << XRegister(reg); } void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << Arm64ManagedRegister::FromDRegister(DRegister(reg)); + stream << DRegister(reg); } void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { @@ -1073,14 +1076,12 @@ void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, BlockPoolsScope block_pools(GetVIXLAssembler()); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); - if (instruction != nullptr) { - RecordPcInfo(instruction, dex_pc, slow_path); - DCHECK(instruction->IsSuspendCheck() - || instruction->IsBoundsCheck() - || instruction->IsNullCheck() - || instruction->IsDivZeroCheck() - || !IsLeafMethod()); - } + RecordPcInfo(instruction, dex_pc, slow_path); + DCHECK(instruction->IsSuspendCheck() + || instruction->IsBoundsCheck() + || instruction->IsNullCheck() + || instruction->IsDivZeroCheck() + || !IsLeafMethod()); } void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, @@ -1132,8 +1133,19 @@ void InstructionCodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); @@ -1688,6 +1700,152 @@ FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION +void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Mov(out, 0); + } else { + if (imm == 1) { + __ Mov(out, dividend); + } else { + __ Neg(out, dividend); + } + } +} + +void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + int64_t imm = Int64FromConstant(second.GetConstant()); + int64_t abs_imm = std::abs(imm); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + + if (instruction->IsDiv()) { + __ Add(temp, dividend, abs_imm - 1); + __ Cmp(dividend, 0); + __ Csel(out, temp, dividend, lt); + if (imm > 0) { + __ Asr(out, out, ctz_imm); + } else { + __ Neg(out, Operand(out, ASR, ctz_imm)); + } + } else { + int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64; + __ Asr(temp, dividend, bits - 1); + __ Lsr(temp, temp, bits - ctz_imm); + __ Add(out, dividend, temp); + __ And(out, out, abs_imm - 1); + __ Sub(out, out, temp); + } +} + +void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + int64_t imm = Int64FromConstant(second.GetConstant()); + + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + + // temp = get_high(dividend * magic) + __ Mov(temp, magic); + if (type == Primitive::kPrimLong) { + __ Smulh(temp, dividend, temp); + } else { + __ Smull(temp.X(), dividend, temp); + __ Lsr(temp.X(), temp.X(), 32); + } + + if (imm > 0 && magic < 0) { + __ Add(temp, temp, dividend); + } else if (imm < 0 && magic > 0) { + __ Sub(temp, temp, dividend); + } + + if (shift != 0) { + __ Asr(temp, temp, shift); + } + + if (instruction->IsDiv()) { + __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); + } else { + __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); + // TODO: Strength reduction for msub. + Register temp_imm = temps.AcquireSameSizeAs(out); + __ Mov(temp_imm, imm); + __ Msub(out, temp, temp_imm, dividend); + } +} + +void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); + + LocationSummary* locations = instruction->GetLocations(); + Register out = OutputRegister(instruction); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + Register dividend = InputRegisterAt(instruction, 0); + Register divisor = InputRegisterAt(instruction, 1); + if (instruction->IsDiv()) { + __ Sdiv(out, dividend, divisor); + } else { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + __ Sdiv(temp, dividend, divisor); + __ Msub(out, temp, divisor, dividend); + } + } +} + void LocationsBuilderARM64::VisitDiv(HDiv* div) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); @@ -1695,7 +1853,7 @@ void LocationsBuilderARM64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1716,7 +1874,7 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: - __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1)); + GenerateDivRemIntegral(div); break; case Primitive::kPrimFloat: @@ -2005,8 +2163,8 @@ void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(LocationFrom(x0)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -2066,6 +2224,10 @@ void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); if (intrinsic.TryDispatch(invoke)) { return; @@ -2096,26 +2258,40 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset())); - // temp = temp[index_in_cache]; - __ Ldr(temp, HeapOperand(temp, index_in_cache)); - // lr = temp->entry_point_from_quick_compiled_code_; + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ Ldr(temp, HeapOperand(tr, invoke->GetStringInitOffset())); + // LR = temp->entry_point_from_quick_compiled_code_; __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArm64WordSize))); - // lr(); + // lr() __ Blr(lr); } else { - __ Bl(&frame_entry_label_); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset())); + // temp = temp[index_in_cache]; + __ Ldr(temp, HeapOperand(temp, index_in_cache)); + // lr = temp->entry_point_from_quick_compiled_code_; + __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64WordSize))); + // lr(); + __ Blr(lr); + } else { + __ Bl(&frame_entry_label_); + } } DCHECK(!IsLeafMethod()); } void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -2521,7 +2697,7 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -2546,14 +2722,7 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { switch (type) { case Primitive::kPrimInt: case Primitive::kPrimLong: { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register dividend = InputRegisterAt(rem, 0); - Register divisor = InputRegisterAt(rem, 1); - Register output = OutputRegister(rem); - Register temp = temps.AcquireSameSizeAs(output); - - __ Sdiv(temp, dividend, divisor); - __ Msub(output, temp, divisor, dividend); + GenerateDivRemIntegral(rem); break; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 913d881323..702bcd45f3 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -122,25 +122,20 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::Register, vixl DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorARM64() {} + virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; Location GetReturnLocation(Primitive::Type return_type) { return calling_convention.GetReturnLocation(return_type); } private: InvokeDexCallingConvention calling_convention; - // The current index for core registers. - uint32_t gp_index_; - // The current index for floating-point registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARM64); }; class InstructionCodeGeneratorARM64 : public HGraphVisitor { @@ -171,6 +166,11 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { vixl::Label* true_target, vixl::Label* false_target, vixl::Label* always_true_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); + Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -196,7 +196,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorARM64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index c604842d86..0212da106b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -153,6 +153,10 @@ class SuspendCheckSlowPathX86 : public SlowPathCodeX86 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -174,7 +178,6 @@ class LoadStringSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction_->GetStringIndex())); __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pResolveString))); RecordPcInfo(codegen, instruction_, instruction_->GetDexPc()); @@ -208,7 +211,6 @@ class LoadClassSlowPathX86 : public SlowPathCodeX86 { InvokeRuntimeCallingConvention calling_convention; __ movl(calling_convention.GetRegisterAt(0), Immediate(cls_->GetTypeIndex())); - x86_codegen->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ fs()->call(Address::Absolute(do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pInitializeType))); @@ -338,11 +340,11 @@ inline Condition X86Condition(IfCondition cond) { } void CodeGeneratorX86::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << X86ManagedRegister::FromCpuRegister(Register(reg)); + stream << Register(reg); } void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << X86ManagedRegister::FromXmmRegister(XmmRegister(reg)); + stream << XmmRegister(reg); } size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { @@ -553,7 +555,7 @@ Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { UNREACHABLE(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -584,7 +586,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -594,7 +596,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -811,7 +813,6 @@ void InstructionCodeGeneratorX86::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -1196,6 +1197,10 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { } void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; @@ -1214,6 +1219,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) } void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -1232,8 +1241,8 @@ void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(EAX)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -2734,17 +2743,12 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: { - locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); - locations->SetOut(Location::SameAsFirstInput()); - break; - } + case Primitive::kPrimInt: case Primitive::kPrimLong: { + // Can't have Location::Any() and output SameAsFirstInput() locations->SetInAt(0, Location::RequiresRegister()); - // The shift count needs to be in CL. - locations->SetInAt(1, Location::RegisterLocation(ECX)); + // The shift count needs to be in CL or a constant. + locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2763,6 +2767,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { switch (op->GetResultType()) { case Primitive::kPrimInt: { + DCHECK(first.IsRegister()); Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); @@ -2775,7 +2780,11 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { __ shrl(first_reg, second_reg); } } else { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue); + int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue; + if (shift == 0) { + return; + } + Immediate imm(shift); if (op->IsShl()) { __ shll(first_reg, imm); } else if (op->IsShr()) { @@ -2787,14 +2796,29 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { break; } case Primitive::kPrimLong: { - Register second_reg = second.AsRegister<Register>(); - DCHECK_EQ(ECX, second_reg); - if (op->IsShl()) { - GenerateShlLong(first, second_reg); - } else if (op->IsShr()) { - GenerateShrLong(first, second_reg); + if (second.IsRegister()) { + Register second_reg = second.AsRegister<Register>(); + DCHECK_EQ(ECX, second_reg); + if (op->IsShl()) { + GenerateShlLong(first, second_reg); + } else if (op->IsShr()) { + GenerateShrLong(first, second_reg); + } else { + GenerateUShrLong(first, second_reg); + } } else { - GenerateUShrLong(first, second_reg); + // Shift by a constant. + int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue; + // Nothing to do if the shift is 0, as the input is already the output. + if (shift != 0) { + if (op->IsShl()) { + GenerateShlLong(first, shift); + } else if (op->IsShr()) { + GenerateShrLong(first, shift); + } else { + GenerateUShrLong(first, shift); + } + } } break; } @@ -2803,6 +2827,34 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } } +void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 1) { + // This is just an addition. + __ addl(low, low); + __ adcl(high, high); + } else if (shift == 32) { + // Shift by 32 is easy. High gets low, and low gets 0. + codegen_->EmitParallelMoves( + loc.ToLow(), + loc.ToHigh(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToLow(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part becomes 0. High part is low part << (shift-32). + __ movl(high, low); + __ shll(high, Immediate(shift - 32)); + __ xorl(low, low); + } else { + // Between 1 and 31. + __ shld(high, low, Immediate(shift)); + __ shll(low, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) { Label done; __ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter); @@ -2814,6 +2866,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Need to copy the sign. + DCHECK_NE(low, high); + __ movl(low, high); + __ sarl(high, Immediate(31)); + } else if (shift > 32) { + DCHECK_NE(low, high); + // High part becomes sign. Low part is shifted by shift - 32. + __ movl(low, high); + __ sarl(high, Immediate(31)); + __ sarl(low, Immediate(shift - 32)); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ sarl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -2825,6 +2898,30 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register __ Bind(&done); } +void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) { + Register low = loc.AsRegisterPairLow<Register>(); + Register high = loc.AsRegisterPairHigh<Register>(); + if (shift == 32) { + // Shift by 32 is easy. Low gets high, and high gets 0. + codegen_->EmitParallelMoves( + loc.ToHigh(), + loc.ToLow(), + Primitive::kPrimInt, + Location::ConstantLocation(GetGraph()->GetIntConstant(0)), + loc.ToHigh(), + Primitive::kPrimInt); + } else if (shift > 32) { + // Low part is high >> (shift - 32). High part becomes 0. + __ movl(low, high); + __ shrl(low, Immediate(shift - 32)); + __ xorl(high, high); + } else { + // Between 1 and 31. + __ shrd(low, high, Immediate(shift)); + __ shrl(high, Immediate(shift)); + } +} + void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) { Label done; __ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter); @@ -3104,18 +3201,27 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // 3) app -> app // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() __ call(Address( temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); } else { - __ call(GetFrameEntryLabel()); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + } else { + __ call(GetFrameEntryLabel()); + } } DCHECK(!IsLeafMethod()); @@ -3809,7 +3915,7 @@ void LocationsBuilderX86::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -3821,16 +3927,38 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { Location length_loc = locations->InAt(1); SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction, index_loc, length_loc); - codegen_->AddSlowPath(slow_path); - Register length = length_loc.AsRegister<Register>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + if (length_loc.IsConstant()) { + int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guarenteed to pass. + int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + if (index < 0 || index >= length) { + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } + + // We have to reverse the jump condition because the length is the constant. + Register index_reg = index_loc.AsRegister<Register>(); + __ cmpl(index_reg, Immediate(length)); + codegen_->AddSlowPath(slow_path); + __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - __ cmpl(length, index_loc.AsRegister<Register>()); + Register length = length_loc.AsRegister<Register>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<Register>()); + } + codegen_->AddSlowPath(slow_path); + __ j(kBelowEqual, slow_path->GetEntryLabel()); } - __ j(kBelowEqual, slow_path->GetEntryLabel()); } void LocationsBuilderX86::VisitTemporary(HTemporary* temp) { @@ -3872,8 +4000,19 @@ void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ fs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86WordSize>().Int32Value()), Immediate(0)); if (successor == nullptr) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 8bd3cd3585..5a5a37b3fe 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -75,22 +75,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, XmmRegiste DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86() {} + virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86); }; class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { @@ -137,7 +132,7 @@ class LocationsBuilderX86 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); CodeGeneratorX86* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; @@ -171,6 +166,9 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); void GenerateUShrLong(const Location& loc, Register shifter); + void GenerateShlLong(const Location& loc, int shift); + void GenerateShrLong(const Location& loc, int shift); + void GenerateUShrLong(const Location& loc, int shift); void GenerateMemoryBarrier(MemBarrierKind kind); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 47425fb9ae..63d68465d0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -99,7 +99,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { if (is_div_) { __ negq(cpu_reg_); } else { - __ movq(cpu_reg_, Immediate(0)); + __ xorl(cpu_reg_, cpu_reg_); } } __ jmp(GetExitLabel()); @@ -136,6 +136,10 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { return &return_label_; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: HSuspendCheck* const instruction_; HBasicBlock* const successor_; @@ -197,7 +201,6 @@ class LoadClassSlowPathX86_64 : public SlowPathCodeX86_64 { InvokeRuntimeCallingConvention calling_convention; __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex())); - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); __ gs()->call(Address::Absolute((do_clinit_ ? QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeStaticStorage) : QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pInitializeType)) , true)); @@ -244,7 +247,6 @@ class LoadStringSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - x64_codegen->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction_->GetStringIndex())); __ gs()->call(Address::Absolute( @@ -368,29 +370,37 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo // // Currently we implement the app -> app logic, which looks up in the resolve cache. - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + if (invoke->IsStringInit()) { + // temp = thread->string_init_entrypoint + __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); } else { - __ call(&frame_entry_label_); + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + } else { + __ call(&frame_entry_label_); + } } DCHECK(!IsLeafMethod()); } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { - stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); + stream << Register(reg); } void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { - stream << X86_64ManagedRegister::FromXmmRegister(FloatRegister(reg)); + stream << FloatRegister(reg); } size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { @@ -665,7 +675,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { DCHECK(constant->IsLongConstant()); value = constant->AsLongConstant()->GetValue(); } - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(source.IsDoubleStackSlot()); @@ -698,9 +708,9 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, } else if (const_to_move->IsLongConstant()) { int64_t value = const_to_move->AsLongConstant()->GetValue(); if (location.IsRegister()) { - __ movq(location.AsRegister<CpuRegister>(), Immediate(value)); + Load64BitValue(location.AsRegister<CpuRegister>(), value); } else if (location.IsDoubleStackSlot()) { - __ movq(CpuRegister(TMP), Immediate(value)); + Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), location.GetStackIndex()), CpuRegister(TMP)); } else { DCHECK(location.IsConstant()); @@ -765,7 +775,6 @@ void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -950,7 +959,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { LocationSummary* locations = comp->GetLocations(); CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. - __ xorq(reg, reg); + __ xorl(reg, reg); Location lhs = locations->InAt(0); Location rhs = locations->InAt(1); if (rhs.IsRegister()) { @@ -1234,7 +1243,7 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { codegen_->GenerateFrameExit(); } -Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) { switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -1264,7 +1273,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimFloat: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1274,7 +1283,7 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } case Primitive::kPrimDouble: { - uint32_t index = fp_index_++; + uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); @@ -1291,6 +1300,10 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { return; @@ -1309,6 +1322,10 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codeg } void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); + if (TryGenerateIntrinsicCode(invoke, codegen_)) { return; } @@ -1324,8 +1341,8 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); locations->AddTemp(Location::RegisterLocation(RDI)); - InvokeDexCallingConventionVisitor calling_convention_visitor; - for (size_t i = 0; i < invoke->InputCount(); i++) { + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); locations->SetInAt(i, calling_convention_visitor.GetNextLocation(input->GetType())); } @@ -1405,8 +1422,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // Set the hidden argument. - __ movq(invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(), - Immediate(invoke->GetDexMethodIndex())); + CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>(); + codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); // temp = object->GetClass(); if (receiver.IsStackSlot()) { @@ -1842,7 +1859,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-float(output) __ cvtsi2ss(temp, output, true); // if input >= temp goto done @@ -1855,7 +1872,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -1867,7 +1884,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); Label done, nan; - __ movq(output, Immediate(kPrimLongMax)); + codegen_->Load64BitValue(output, kPrimLongMax); // temp = long-to-double(output) __ cvtsi2sd(temp, output, true); // if input >= temp goto done @@ -1880,7 +1897,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver __ jmp(&done); __ Bind(&nan); // output = 0 - __ xorq(output, output); + __ xorl(output, output); __ Bind(&done); break; } @@ -2469,7 +2486,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr case Primitive::kPrimLong: { if (instruction->IsRem()) { - __ xorq(output_register, output_register); + __ xorl(output_register, output_register); } else { __ movq(output_register, input_register); if (imm == -1) { @@ -2513,7 +2530,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - __ movq(rdx, Immediate(std::abs(imm) - 1)); + codegen_->Load64BitValue(rdx, std::abs(imm) - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -2610,7 +2627,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat __ movq(numerator, rax); // RAX = magic - __ movq(rax, Immediate(magic)); + codegen_->Load64BitValue(rax, magic); // RDX:RAX = magic * numerator __ imulq(numerator); @@ -2639,8 +2656,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat if (IsInt<32>(imm)) { __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); } else { - __ movq(numerator, Immediate(imm)); - __ imulq(rdx, numerator); + __ imulq(rdx, codegen_->LiteralInt64Address(imm)); } __ subq(rax, rdx); @@ -3006,8 +3022,8 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); - + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3028,7 +3044,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); - __ movq(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(instruction->GetTypeIndex())); + codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), + instruction->GetTypeIndex()); __ gs()->call( Address::Absolute(GetThreadOffset<kX86_64WordSize>(instruction->GetEntrypoint()), true)); @@ -3750,7 +3767,7 @@ void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (instruction->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } @@ -3762,16 +3779,38 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) Location length_loc = locations->InAt(1); SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction, index_loc, length_loc); - codegen_->AddSlowPath(slow_path); - CpuRegister length = length_loc.AsRegister<CpuRegister>(); - if (index_loc.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); - __ cmpl(length, Immediate(value)); + if (length_loc.IsConstant()) { + int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); + if (index_loc.IsConstant()) { + // BCE will remove the bounds check if we are guarenteed to pass. + int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + if (index < 0 || index >= length) { + codegen_->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + } else { + // Some optimization after BCE may have generated this, and we should not + // generate a bounds check if it is a valid range. + } + return; + } + + // We have to reverse the jump condition because the length is the constant. + CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); + __ cmpl(index_reg, Immediate(length)); + codegen_->AddSlowPath(slow_path); + __ j(kAboveEqual, slow_path->GetEntryLabel()); } else { - __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + CpuRegister length = length_loc.AsRegister<CpuRegister>(); + if (index_loc.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); + __ cmpl(length, Immediate(value)); + } else { + __ cmpl(length, index_loc.AsRegister<CpuRegister>()); + } + codegen_->AddSlowPath(slow_path); + __ j(kBelowEqual, slow_path->GetEntryLabel()); } - __ j(kBelowEqual, slow_path->GetEntryLabel()); } void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, @@ -3828,8 +3867,19 @@ void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instructio void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathX86_64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); + if (slow_path == nullptr) { + slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } + __ gs()->cmpw(Address::Absolute( Thread::ThreadFlagsOffset<kX86_64WordSize>().Int32Value(), true), Immediate(0)); if (successor == nullptr) { @@ -3902,45 +3952,42 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); if (destination.IsRegister()) { - __ movq(destination.AsRegister<CpuRegister>(), Immediate(value)); + codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), Immediate(value)); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (constant->IsFloatConstant()) { float fp_value = constant->AsFloatConstant()->GetValue(); int32_t value = bit_cast<int32_t, float>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { // easy FP 0.0. __ xorps(dest, dest); } else { - __ movl(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movss(dest, codegen_->LiteralFloatAddress(fp_value)); } } else { DCHECK(destination.IsStackSlot()) << destination; + Immediate imm(value); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); } } else { DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); double fp_value = constant->AsDoubleConstant()->GetValue(); int64_t value = bit_cast<int64_t, double>(fp_value); - Immediate imm(value); if (destination.IsFpuRegister()) { XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); if (value == 0) { __ xorpd(dest, dest); } else { - __ movq(CpuRegister(TMP), imm); - __ movd(dest, CpuRegister(TMP)); + __ movsd(dest, codegen_->LiteralDoubleAddress(fp_value)); } } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - __ movq(CpuRegister(TMP), imm); + codegen_->Load64BitValue(CpuRegister(TMP), value); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } @@ -4399,6 +4446,17 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { LOG(FATAL) << "Unreachable"; } +void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { + if (value == 0) { + __ xorl(dest, dest); + } else if (value > 0 && IsInt<32>(value)) { + // We can use a 32 bit move, as it will zero-extend and is one byte shorter. + __ movl(dest, Immediate(static_cast<int32_t>(value))); + } else { + __ movq(dest, Immediate(value)); + } +} + void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { // Generate the constant area if needed. X86_64Assembler* assembler = GetAssembler(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 6cdc82262c..480ea6b9c9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -37,7 +37,7 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); -static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; +static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX, RCX }; static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; @@ -68,22 +68,17 @@ class InvokeDexCallingConvention : public CallingConvention<Register, FloatRegis DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention); }; -class InvokeDexCallingConventionVisitor { +class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventionVisitor { public: - InvokeDexCallingConventionVisitor() : gp_index_(0), fp_index_(0), stack_index_(0) {} + InvokeDexCallingConventionVisitorX86_64() {} + virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(Primitive::Type type); + Location GetNextLocation(Primitive::Type type) OVERRIDE; private: InvokeDexCallingConvention calling_convention; - // The current index for cpu registers. - uint32_t gp_index_; - // The current index for fpu registers. - uint32_t fp_index_; - // The current stack index. - uint32_t stack_index_; - - DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitor); + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorX86_64); }; class CodeGeneratorX86_64; @@ -147,7 +142,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { void HandleFieldGet(HInstruction* instruction); CodeGeneratorX86_64* const codegen_; - InvokeDexCallingConventionVisitor parameter_visitor_; + InvokeDexCallingConventionVisitorX86_64 parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; @@ -287,6 +282,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { Address LiteralInt32Address(int32_t v); Address LiteralInt64Address(int64_t v); + // Load a 64 bit value into a register in the most efficient manner. + void Load64BitValue(CpuRegister dest, int64_t value); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 94f56e5d3e..bfed1a89de 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -225,7 +225,7 @@ static void RunCodeOptimized(HGraph* graph, static void TestCode(const uint16_t* data, bool has_result = false, int32_t expected = 0) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -238,7 +238,7 @@ static void TestCode(const uint16_t* data, bool has_result = false, int32_t expe static void TestCodeLong(const uint16_t* data, bool has_result, int64_t expected) { ArenaPool pool; ArenaAllocator arena(&pool); - HGraph* graph = new (&arena) HGraph(&arena); + HGraph* graph = CreateGraph(&arena); HGraphBuilder builder(graph, Primitive::kPrimLong); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -504,7 +504,7 @@ TEST(CodegenTest, NonMaterializedCondition) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -623,7 +623,7 @@ TEST(CodegenTest, MaterializedCondition1) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); @@ -669,7 +669,7 @@ TEST(CodegenTest, MaterializedCondition2) { for (size_t i = 0; i < arraysize(lhs); i++) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index b7a92b5ae5..20ce1105ce 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -28,6 +28,7 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { void VisitShift(HBinaryOperation* shift); void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitCompare(HCompare* instruction) OVERRIDE; void VisitMul(HMul* instruction) OVERRIDE; void VisitOr(HOr* instruction) OVERRIDE; void VisitRem(HRem* instruction) OVERRIDE; @@ -70,6 +71,14 @@ void HConstantFolding::Run() { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); } + } else if (inst->IsTypeConversion()) { + // Constant folding: replace `TypeConversion(a)' with a constant at + // compile time if `a' is a constant. + HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation(); + if (constant != nullptr) { + inst->ReplaceWith(constant); + inst->GetBlock()->RemoveInstruction(inst); + } } else if (inst->IsDivZeroCheck()) { // We can safely remove the check if the input is a non-null constant. HDivZeroCheck* check = inst->AsDivZeroCheck(); @@ -108,6 +117,26 @@ void InstructionWithAbsorbingInputSimplifier::VisitAnd(HAnd* instruction) { } } +void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction) { + HConstant* input_cst = instruction->GetConstantRight(); + if (input_cst != nullptr) { + HInstruction* input_value = instruction->GetLeastConstantLeft(); + if (Primitive::IsFloatingPointType(input_value->GetType()) && + ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) || + (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) { + // Replace code looking like + // CMP{G,L} dst, src, NaN + // with + // CONSTANT +1 (gt bias) + // or + // CONSTANT -1 (lt bias) + instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt, + (instruction->IsGtBias() ? 1 : -1))); + instruction->GetBlock()->RemoveInstruction(instruction); + } + } +} + void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) { HConstant* input_cst = instruction->GetConstantRight(); Primitive::Type type = instruction->GetType(); diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index ac00824e33..66ff57855e 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -32,8 +32,8 @@ namespace art { */ class HConstantFolding : public HOptimization { public: - explicit HConstantFolding(HGraph* graph) - : HOptimization(graph, true, kConstantFoldingPassName) {} + explicit HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName) + : HOptimization(graph, true, name) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 02ad675dc3..422223f5e0 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -572,14 +572,19 @@ TEST(ConstantFolding, IntConstantFoldingAndJumps) { }; // Expected difference after dead code elimination. - diff_t expected_dce_diff = { - { " 3: IntConstant\n", removed }, - { " 13: IntConstant\n", removed }, - { " 18: IntConstant\n", removed }, - { " 24: IntConstant\n", removed }, - { " 34: IntConstant\n", removed }, - }; - std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + std::string expected_after_dce = + "BasicBlock 0, succ: 1\n" + " 5: IntConstant []\n" + " 30: SuspendCheck\n" + " 32: IntConstant []\n" + " 33: IntConstant []\n" + " 35: IntConstant [28]\n" + " 31: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5\n" + " 21: SuspendCheck\n" + " 28: Return(35)\n" + "BasicBlock 5, pred: 1\n" + " 29: Exit\n"; TestCode(data, expected_before, @@ -647,13 +652,15 @@ TEST(ConstantFolding, ConstantCondition) { ASSERT_EQ(inst->AsIntConstant()->GetValue(), 1); }; - // Expected difference after dead code elimination. - diff_t expected_dce_diff = { - { " 3: IntConstant [9, 15, 22]\n", " 3: IntConstant [9, 22]\n" }, - { " 22: Phi(3, 5) [15]\n", " 22: Phi(3, 5)\n" }, - { " 15: Add(22, 3)\n", removed } - }; - std::string expected_after_dce = Patch(expected_after_cf, expected_dce_diff); + // Expected graph after dead code elimination. + std::string expected_after_dce = + "BasicBlock 0, succ: 1\n" + " 19: SuspendCheck\n" + " 20: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 4\n" + " 17: ReturnVoid\n" + "BasicBlock 4, pred: 1\n" + " 18: Exit\n"; TestCode(data, expected_before, diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 8045cc5027..b31de98e25 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -17,13 +17,97 @@ #include "dead_code_elimination.h" #include "base/bit_vector-inl.h" +#include "ssa_phi_elimination.h" namespace art { -void HDeadCodeElimination::Run() { +static void MarkReachableBlocks(HBasicBlock* block, ArenaBitVector* visited) { + int block_id = block->GetBlockId(); + if (visited->IsBitSet(block_id)) { + return; + } + visited->SetBit(block_id); + + HInstruction* last_instruction = block->GetLastInstruction(); + if (last_instruction->IsIf()) { + HIf* if_instruction = last_instruction->AsIf(); + HInstruction* condition = if_instruction->InputAt(0); + if (!condition->IsIntConstant()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } else if (condition->AsIntConstant()->IsOne()) { + MarkReachableBlocks(if_instruction->IfTrueSuccessor(), visited); + } else { + DCHECK(condition->AsIntConstant()->IsZero()); + MarkReachableBlocks(if_instruction->IfFalseSuccessor(), visited); + } + } else { + for (size_t i = 0, e = block->GetSuccessors().Size(); i < e; ++i) { + MarkReachableBlocks(block->GetSuccessors().Get(i), visited); + } + } +} + +static void MarkLoopHeadersContaining(const HBasicBlock& block, ArenaBitVector* set) { + for (HLoopInformationOutwardIterator it(block); !it.Done(); it.Advance()) { + set->SetBit(it.Current()->GetHeader()->GetBlockId()); + } +} + +void HDeadCodeElimination::MaybeRecordDeadBlock(HBasicBlock* block) { + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedDeadInstruction, + block->GetPhis().CountSize() + block->GetInstructions().CountSize()); + } +} + +void HDeadCodeElimination::RemoveDeadBlocks() { + // Classify blocks as reachable/unreachable. + ArenaAllocator* allocator = graph_->GetArena(); + ArenaBitVector live_blocks(allocator, graph_->GetBlocks().Size(), false); + ArenaBitVector affected_loops(allocator, graph_->GetBlocks().Size(), false); + + MarkReachableBlocks(graph_->GetEntryBlock(), &live_blocks); + + // Remove all dead blocks. Iterate in post order because removal needs the + // block's chain of dominators and nested loops need to be updated from the + // inside out. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + int id = block->GetBlockId(); + if (live_blocks.IsBitSet(id)) { + if (affected_loops.IsBitSet(id)) { + DCHECK(block->IsLoopHeader()); + block->GetLoopInformation()->Update(); + } + } else { + MaybeRecordDeadBlock(block); + MarkLoopHeadersContaining(*block, &affected_loops); + block->DisconnectAndDelete(); + } + } + + // Connect successive blocks created by dead branches. Order does not matter. + for (HReversePostOrderIterator it(*graph_); !it.Done();) { + HBasicBlock* block = it.Current(); + if (block->IsEntryBlock() || block->GetSuccessors().Size() != 1u) { + it.Advance(); + continue; + } + HBasicBlock* successor = block->GetSuccessors().Get(0); + if (successor->IsExitBlock() || successor->GetPredecessors().Size() != 1u) { + it.Advance(); + continue; + } + block->MergeWith(successor); + + // Reiterate on this block in case it can be merged with its new successor. + } +} + +void HDeadCodeElimination::RemoveDeadInstructions() { // Process basic blocks in post-order in the dominator tree, so that - // a dead instruction depending on another dead instruction is - // removed. + // a dead instruction depending on another dead instruction is removed. for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) { HBasicBlock* block = b.Current(); // Traverse this block's instructions in backward order and remove @@ -47,4 +131,10 @@ void HDeadCodeElimination::Run() { } } +void HDeadCodeElimination::Run() { + RemoveDeadBlocks(); + SsaRedundantPhiElimination(graph_).Run(); + RemoveDeadInstructions(); +} + } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index cee9364c84..59a57c4345 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -31,15 +31,19 @@ class HDeadCodeElimination : public HOptimization { public: HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats = nullptr, - const char* name = kDeadCodeEliminationPassName) + const char* name = kInitialDeadCodeEliminationPassName) : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; - static constexpr const char* kDeadCodeEliminationPassName = - "dead_code_elimination"; + static constexpr const char* kInitialDeadCodeEliminationPassName = "dead_code_elimination"; + static constexpr const char* kFinalDeadCodeEliminationPassName = "dead_code_elimination_final"; private: + void MaybeRecordDeadBlock(HBasicBlock* block); + void RemoveDeadBlocks(); + void RemoveDeadInstructions(); + DISALLOW_COPY_AND_ASSIGN(HDeadCodeElimination); }; diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 98ae1ec5d3..3209d3eb18 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -169,20 +169,25 @@ TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { "BasicBlock 5, pred: 4\n" " 28: Exit\n"; - // Expected difference after dead code elimination. - diff_t expected_diff = { - { " 13: IntConstant [14]\n", removed }, - { " 24: IntConstant [25]\n", removed }, - { " 14: Add(19, 13) [25]\n", removed }, - // The SuspendCheck instruction following this Add instruction - // inserts the latter in an environment, thus making it "used" and - // therefore non removable. It ensues that some other Add and - // IntConstant instructions cannot be removed, as they are direct - // or indirect inputs of the initial Add instruction. - { " 19: Add(9, 18) [14]\n", " 19: Add(9, 18) []\n" }, - { " 25: Add(14, 24)\n", removed }, - }; - std::string expected_after = Patch(expected_before, expected_diff); + // The SuspendCheck instruction following this Add instruction + // inserts the latter in an environment, thus making it "used" and + // therefore non removable. It ensures that some other Add and + // IntConstant instructions cannot be removed, as they are direct + // or indirect inputs of the initial Add instruction. + std::string expected_after = + "BasicBlock 0, succ: 1\n" + " 3: IntConstant [9]\n" + " 5: IntConstant [9]\n" + " 18: IntConstant [19]\n" + " 29: SuspendCheck\n" + " 30: Goto 1\n" + "BasicBlock 1, pred: 0, succ: 5\n" + " 9: Add(3, 5) [19]\n" + " 19: Add(9, 18) []\n" + " 21: SuspendCheck\n" + " 27: ReturnVoid\n" + "BasicBlock 5, pred: 1\n" + " 28: Exit\n"; TestCode(data, expected_before, expected_after); } diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 61a7697301..78ae1dd960 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -27,7 +27,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* blocks, size_t blocks_length) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 2bfecc696a..29aa97a83a 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -28,7 +28,7 @@ namespace art { static HGraph* TestCode(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -235,14 +235,13 @@ TEST(FindLoopsTest, Loop4) { TestBlock(graph, 0, false, -1); // entry block TestBlock(graph, 1, false, -1); // pre header - const int blocks2[] = {2, 3, 4, 5, 8}; - TestBlock(graph, 2, true, 2, blocks2, 5); // loop header + const int blocks2[] = {2, 3, 4, 5}; + TestBlock(graph, 2, true, 2, blocks2, arraysize(blocks2)); // loop header TestBlock(graph, 3, false, 2); // block in loop - TestBlock(graph, 4, false, 2); // original back edge - TestBlock(graph, 5, false, 2); // original back edge + TestBlock(graph, 4, false, 2); // back edge + TestBlock(graph, 5, false, 2); // back edge TestBlock(graph, 6, false, -1); // return block TestBlock(graph, 7, false, -1); // exit block - TestBlock(graph, 8, false, 2); // synthesized back edge } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 8950635d6a..fd28f0b83f 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -121,6 +121,18 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { } } +void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) { + if (!GetGraph()->HasBoundsChecks()) { + AddError(StringPrintf("Instruction %s:%d is a HBoundsCheck, " + "but HasBoundsChecks() returns false", + check->DebugName(), + check->GetId())); + } + + // Perform the instruction base checks too. + VisitInstruction(check); +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -158,7 +170,8 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } - // Ensure the uses of `instruction` are defined in a block of the graph. + // Ensure the uses of `instruction` are defined in a block of the graph, + // and the entry in the use list is consistent. for (HUseIterator<HInstruction*> use_it(instruction->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); @@ -172,6 +185,27 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { use->GetId(), instruction->GetId())); } + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { + AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + "UseListNode index.", + use->DebugName(), + use->GetId(), + instruction->GetId())); + } + } + + // Ensure the environment uses entries are consistent. + for (HUseIterator<HEnvironment*> use_it(instruction->GetEnvUses()); + !use_it.Done(); use_it.Advance()) { + HEnvironment* use = use_it.Current()->GetUser(); + size_t use_index = use_it.Current()->GetIndex(); + if ((use_index >= use->Size()) || (use->GetInstructionAt(use_index) != instruction)) { + AddError(StringPrintf("Environment user of %s:%d has a wrong " + "UseListNode index.", + instruction->DebugName(), + instruction->GetId())); + } } // Ensure 'instruction' has pointers to its inputs' use entries. @@ -179,7 +213,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); HInstruction* input = input_record.GetInstruction(); HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); - if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + size_t use_index = use_node->GetIndex(); + if ((use_node == nullptr) + || !input->GetUses().Contains(use_node) + || (use_index >= e) + || (use_index != i)) { AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " "at input %u (%s:%d).", instruction->DebugName(), @@ -191,6 +229,30 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } } +void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + VisitInstruction(invoke); + + if (invoke->IsStaticWithExplicitClinitCheck()) { + size_t last_input_index = invoke->InputCount() - 1; + HInstruction* last_input = invoke->InputAt(last_input_index); + if (last_input == nullptr) { + AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " + "has a null pointer as last input.", + invoke->DebugName(), + invoke->GetId())); + } + if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { + AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " + "has a last instruction (%s:%d) which is neither a clinit check " + "nor a load class instruction.", + invoke->DebugName(), + invoke->GetId(), + last_input->DebugName(), + last_input->GetId())); + } + } +} + void SSAChecker::VisitBasicBlock(HBasicBlock* block) { super_type::VisitBasicBlock(block); @@ -226,6 +288,7 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) { void SSAChecker::CheckLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); + HLoopInformation* loop_information = loop_header->GetLoopInformation(); // Ensure the pre-header block is first in the list of // predecessors of a loop header. @@ -235,57 +298,61 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { id)); } - // Ensure the loop header has only two predecessors and that only the - // second one is a back edge. + // Ensure the loop header has only one incoming branch and the remaining + // predecessors are back edges. size_t num_preds = loop_header->GetPredecessors().Size(); if (num_preds < 2) { AddError(StringPrintf( "Loop header %d has less than two predecessors: %zu.", id, num_preds)); - } else if (num_preds > 2) { - AddError(StringPrintf( - "Loop header %d has more than two predecessors: %zu.", - id, - num_preds)); } else { - HLoopInformation* loop_information = loop_header->GetLoopInformation(); HBasicBlock* first_predecessor = loop_header->GetPredecessors().Get(0); if (loop_information->IsBackEdge(*first_predecessor)) { AddError(StringPrintf( "First predecessor of loop header %d is a back edge.", id)); } - HBasicBlock* second_predecessor = loop_header->GetPredecessors().Get(1); - if (!loop_information->IsBackEdge(*second_predecessor)) { - AddError(StringPrintf( - "Second predecessor of loop header %d is not a back edge.", - id)); + for (size_t i = 1, e = loop_header->GetPredecessors().Size(); i < e; ++i) { + HBasicBlock* predecessor = loop_header->GetPredecessors().Get(i); + if (!loop_information->IsBackEdge(*predecessor)) { + AddError(StringPrintf( + "Loop header %d has multiple incoming (non back edge) blocks.", + id)); + } } } - // Ensure there is only one back edge per loop. - size_t num_back_edges = - loop_header->GetLoopInformation()->GetBackEdges().Size(); + const ArenaBitVector& loop_blocks = loop_information->GetBlocks(); + + // Ensure back edges belong to the loop. + size_t num_back_edges = loop_information->GetBackEdges().Size(); if (num_back_edges == 0) { AddError(StringPrintf( "Loop defined by header %d has no back edge.", id)); - } else if (num_back_edges > 1) { - AddError(StringPrintf( - "Loop defined by header %d has several back edges: %zu.", - id, - num_back_edges)); + } else { + for (size_t i = 0; i < num_back_edges; ++i) { + int back_edge_id = loop_information->GetBackEdges().Get(i)->GetBlockId(); + if (!loop_blocks.IsBitSet(back_edge_id)) { + AddError(StringPrintf( + "Loop defined by header %d has an invalid back edge %d.", + id, + back_edge_id)); + } + } } - // Ensure all blocks in the loop are dominated by the loop header. - const ArenaBitVector& loop_blocks = - loop_header->GetLoopInformation()->GetBlocks(); + // Ensure all blocks in the loop are live and dominated by the loop header. for (uint32_t i : loop_blocks.Indexes()) { HBasicBlock* loop_block = GetGraph()->GetBlocks().Get(i); - if (!loop_header->Dominates(loop_block)) { + if (loop_block == nullptr) { + AddError(StringPrintf("Loop defined by header %d contains a previously removed block %d.", + id, + i)); + } else if (!loop_header->Dominates(loop_block)) { AddError(StringPrintf("Loop block %d not dominated by loop header %d.", - loop_block->GetBlockId(), + i, id)); } } @@ -296,7 +363,7 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) { if (!loop_blocks.IsSubsetOf(&outer_info->GetBlocks())) { AddError(StringPrintf("Blocks of loop defined by header %d are not a subset of blocks of " "an outer loop defined by header %d.", - loop_header->GetBlockId(), + id, outer_info->GetHeader()->GetBlockId())); } } @@ -319,8 +386,9 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { // Ensure an instruction having an environment is dominated by the // instructions contained in the environment. - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* env_instruction = environment->GetInstructionAt(i); if (env_instruction != nullptr @@ -483,7 +551,7 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); } } else { - if (PrimitiveKind(op->InputAt(1)->GetType()) != PrimitiveKind(op->InputAt(0)->GetType())) { + if (PrimitiveKind(op->InputAt(0)->GetType()) != PrimitiveKind(op->InputAt(1)->GetType())) { AddError(StringPrintf( "Binary operation %s %d has inputs of different types: " "%s, and %s.", @@ -508,7 +576,7 @@ void SSAChecker::VisitBinaryOperation(HBinaryOperation* op) { "from its input type: %s vs %s.", op->DebugName(), op->GetId(), Primitive::PrettyDescriptor(op->GetType()), - Primitive::PrettyDescriptor(op->InputAt(1)->GetType()))); + Primitive::PrettyDescriptor(op->InputAt(0)->GetType()))); } } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 24fee373f9..b4314da03b 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -42,6 +42,12 @@ class GraphChecker : public HGraphDelegateVisitor { // Check `instruction`. void VisitInstruction(HInstruction* instruction) OVERRIDE; + // Perform control-flow graph checks on instruction. + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + + // Check that the HasBoundsChecks() flag is set for bounds checks. + void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + // Was the last visit of the graph valid? bool IsValid() const { return errors_.empty(); diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 923468ff16..eca0d9344f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -30,7 +30,7 @@ namespace art { * 1: Exit */ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); entry_block->AddInstruction(new (allocator) HGoto()); graph->AddBlock(entry_block); diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 50398b4790..59d50926ad 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -73,7 +73,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_true = createGotoBlock(graph, &allocator); @@ -108,7 +108,7 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* if_false = createGotoBlock(graph, &allocator); @@ -143,7 +143,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -178,7 +178,7 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); HBasicBlock* return_block = createReturnBlock(graph, &allocator); @@ -213,7 +213,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -252,7 +252,7 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry_block = createEntryBlock(graph, &allocator); HBasicBlock* first_if_block = createIfBlock(graph, &allocator); HBasicBlock* if_block = createIfBlock(graph, &allocator); @@ -288,7 +288,7 @@ TEST(GraphTest, InsertInstructionBefore) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* block = createGotoBlock(graph, &allocator); HInstruction* got = block->GetLastInstruction(); ASSERT_TRUE(got->IsControlFlow()); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index ca9cbc3d01..be287558e9 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -17,14 +17,75 @@ #include "graph_visualizer.h" #include "code_generator.h" +#include "dead_code_elimination.h" #include "licm.h" #include "nodes.h" #include "optimization.h" #include "register_allocator.h" #include "ssa_liveness_analysis.h" +#include <cctype> +#include <sstream> + namespace art { +static bool HasWhitespace(const char* str) { + DCHECK(str != nullptr); + while (str[0] != 0) { + if (isspace(str[0])) { + return true; + } + str++; + } + return false; +} + +class StringList { + public: + enum Format { + kArrayBrackets, + kSetBrackets, + }; + + // Create an empty list + explicit StringList(Format format = kArrayBrackets) : format_(format), is_empty_(true) {} + + // Construct StringList from a linked list. List element class T + // must provide methods `GetNext` and `Dump`. + template<class T> + explicit StringList(T* first_entry, Format format = kArrayBrackets) : StringList(format) { + for (T* current = first_entry; current != nullptr; current = current->GetNext()) { + current->Dump(NewEntryStream()); + } + } + + std::ostream& NewEntryStream() { + if (is_empty_) { + is_empty_ = false; + } else { + sstream_ << ","; + } + return sstream_; + } + + private: + Format format_; + bool is_empty_; + std::ostringstream sstream_; + + friend std::ostream& operator<<(std::ostream& os, const StringList& list); +}; + +std::ostream& operator<<(std::ostream& os, const StringList& list) { + switch (list.format_) { + case StringList::kArrayBrackets: return os << "[" << list.sstream_.str() << "]"; + case StringList::kSetBrackets: return os << "{" << list.sstream_.str() << "}"; + default: + LOG(FATAL) << "Invalid StringList format"; + UNREACHABLE(); + } +} + /** * HGraph visitor to generate a file suitable for the c1visualizer tool and IRHydra. */ @@ -124,76 +185,84 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_<< std::endl; } - void DumpLocation(Location location) { + void DumpLocation(std::ostream& stream, const Location& location) { if (location.IsRegister()) { - codegen_.DumpCoreRegister(output_, location.reg()); + codegen_.DumpCoreRegister(stream, location.reg()); } else if (location.IsFpuRegister()) { - codegen_.DumpFloatingPointRegister(output_, location.reg()); + codegen_.DumpFloatingPointRegister(stream, location.reg()); } else if (location.IsConstant()) { - output_ << "constant"; + stream << "#"; HConstant* constant = location.GetConstant(); if (constant->IsIntConstant()) { - output_ << " " << constant->AsIntConstant()->GetValue(); + stream << constant->AsIntConstant()->GetValue(); } else if (constant->IsLongConstant()) { - output_ << " " << constant->AsLongConstant()->GetValue(); + stream << constant->AsLongConstant()->GetValue(); } } else if (location.IsInvalid()) { - output_ << "invalid"; + stream << "invalid"; } else if (location.IsStackSlot()) { - output_ << location.GetStackIndex() << "(sp)"; + stream << location.GetStackIndex() << "(sp)"; } else if (location.IsFpuRegisterPair()) { - codegen_.DumpFloatingPointRegister(output_, location.low()); - output_ << " and "; - codegen_.DumpFloatingPointRegister(output_, location.high()); + codegen_.DumpFloatingPointRegister(stream, location.low()); + stream << "|"; + codegen_.DumpFloatingPointRegister(stream, location.high()); } else if (location.IsRegisterPair()) { - codegen_.DumpCoreRegister(output_, location.low()); - output_ << " and "; - codegen_.DumpCoreRegister(output_, location.high()); + codegen_.DumpCoreRegister(stream, location.low()); + stream << "|"; + codegen_.DumpCoreRegister(stream, location.high()); } else if (location.IsUnallocated()) { - output_ << "<U>"; + stream << "unallocated"; } else { DCHECK(location.IsDoubleStackSlot()); - output_ << "2x" << location.GetStackIndex() << "(sp)"; + stream << "2x" << location.GetStackIndex() << "(sp)"; + } + } + + std::ostream& StartAttributeStream(const char* name = nullptr) { + if (name == nullptr) { + output_ << " "; + } else { + DCHECK(!HasWhitespace(name)) << "Checker does not allow spaces in attributes"; + output_ << " " << name << ":"; } + return output_; } void VisitParallelMove(HParallelMove* instruction) OVERRIDE { - output_ << " ("; + StartAttributeStream("liveness") << instruction->GetLifetimePosition(); + StringList moves; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { MoveOperands* move = instruction->MoveOperandsAt(i); - DumpLocation(move->GetSource()); - output_ << " -> "; - DumpLocation(move->GetDestination()); - if (i + 1 != e) { - output_ << ", "; - } + std::ostream& str = moves.NewEntryStream(); + DumpLocation(str, move->GetSource()); + str << "->"; + DumpLocation(str, move->GetDestination()); } - output_ << ")"; - output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; + StartAttributeStream("moves") << moves; } void VisitIntConstant(HIntConstant* instruction) OVERRIDE { - output_ << " " << instruction->GetValue(); + StartAttributeStream() << instruction->GetValue(); } void VisitLongConstant(HLongConstant* instruction) OVERRIDE { - output_ << " " << instruction->GetValue(); + StartAttributeStream() << instruction->GetValue(); } void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE { - output_ << " " << instruction->GetValue(); + StartAttributeStream() << instruction->GetValue(); } void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE { - output_ << " " << instruction->GetValue(); + StartAttributeStream() << instruction->GetValue(); } void VisitPhi(HPhi* phi) OVERRIDE { - output_ << " " << phi->GetRegNumber(); + StartAttributeStream("reg") << phi->GetRegNumber(); } void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { - output_ << " " << barrier->GetBarrierKind(); + StartAttributeStream("kind") << barrier->GetBarrierKind(); } bool IsPass(const char* name) { @@ -202,59 +271,66 @@ class HGraphVisualizerPrinter : public HGraphVisitor { void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); - instruction->Accept(this); if (instruction->InputCount() > 0) { - output_ << " [ "; - for (HInputIterator inputs(instruction); !inputs.Done(); inputs.Advance()) { - output_ << GetTypeId(inputs.Current()->GetType()) << inputs.Current()->GetId() << " "; + StringList inputs; + for (HInputIterator it(instruction); !it.Done(); it.Advance()) { + inputs.NewEntryStream() << GetTypeId(it.Current()->GetType()) << it.Current()->GetId(); } - output_ << "]"; + StartAttributeStream() << inputs; } + instruction->Accept(this); if (instruction->HasEnvironment()) { - HEnvironment* env = instruction->GetEnvironment(); - output_ << " (env: [ "; - for (size_t i = 0, e = env->Size(); i < e; ++i) { - HInstruction* insn = env->GetInstructionAt(i); - if (insn != nullptr) { - output_ << GetTypeId(insn->GetType()) << insn->GetId() << " "; - } else { - output_ << " _ "; + StringList envs; + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + StringList vregs; + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* insn = environment->GetInstructionAt(i); + if (insn != nullptr) { + vregs.NewEntryStream() << GetTypeId(insn->GetType()) << insn->GetId(); + } else { + vregs.NewEntryStream() << "_"; + } } + envs.NewEntryStream() << vregs; } - output_ << "])"; + StartAttributeStream("env") << envs; } if (IsPass(SsaLivenessAnalysis::kLivenessPassName) && is_after_pass_ && instruction->GetLifetimePosition() != kNoLifetime) { - output_ << " (liveness: " << instruction->GetLifetimePosition(); + StartAttributeStream("liveness") << instruction->GetLifetimePosition(); if (instruction->HasLiveInterval()) { - output_ << " "; - const LiveInterval& interval = *instruction->GetLiveInterval(); - interval.Dump(output_); + LiveInterval* interval = instruction->GetLiveInterval(); + StartAttributeStream("ranges") + << StringList(interval->GetFirstRange(), StringList::kSetBrackets); + StartAttributeStream("uses") << StringList(interval->GetFirstUse()); + StartAttributeStream("env_uses") << StringList(interval->GetFirstEnvironmentUse()); + StartAttributeStream("is_fixed") << interval->IsFixed(); + StartAttributeStream("is_split") << interval->IsSplit(); + StartAttributeStream("is_low") << interval->IsLowInterval(); + StartAttributeStream("is_high") << interval->IsHighInterval(); } - output_ << ")"; } else if (IsPass(RegisterAllocator::kRegisterAllocatorPassName) && is_after_pass_) { + StartAttributeStream("liveness") << instruction->GetLifetimePosition(); LocationSummary* locations = instruction->GetLocations(); if (locations != nullptr) { - output_ << " ( "; + StringList inputs; for (size_t i = 0; i < instruction->InputCount(); ++i) { - DumpLocation(locations->InAt(i)); - output_ << " "; - } - output_ << ")"; - if (locations->Out().IsValid()) { - output_ << " -> "; - DumpLocation(locations->Out()); + DumpLocation(inputs.NewEntryStream(), locations->InAt(i)); } + std::ostream& attr = StartAttributeStream("locations"); + attr << inputs << "->"; + DumpLocation(attr, locations->Out()); } - output_ << " (liveness: " << instruction->GetLifetimePosition() << ")"; - } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName)) { - output_ << " ( loop_header:"; + } else if (IsPass(LICM::kLoopInvariantCodeMotionPassName) + || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName)) { HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { - output_ << "null )"; + StartAttributeStream("loop") << "none"; } else { - output_ << "B" << info->GetHeader()->GetBlockId() << " )"; + StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } } } @@ -274,7 +350,7 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << bci << " " << num_uses << " " << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; PrintInstruction(instruction); - output_ << kEndInstructionMarker << std::endl; + output_ << " " << kEndInstructionMarker << std::endl; } } diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index a81d49aa0c..c3ce7e142a 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -29,7 +29,7 @@ TEST(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -78,7 +78,7 @@ TEST(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -133,7 +133,7 @@ TEST(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -220,7 +220,7 @@ TEST(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index bffd639e83..afffc7ab4f 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -130,6 +130,16 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } + if (invoke_instruction->IsInvokeStaticOrDirect() && + invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { + // Case of a static method that cannot be inlined because it implicitly + // requires an initialization check of its declaring class. + VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; + return false; + } + if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, can_use_dex_cache)) { resolved_method->SetShouldNotInline(); return false; @@ -160,7 +170,11 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, nullptr); HGraph* callee_graph = new (graph_->GetArena()) HGraph( - graph_->GetArena(), graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); + graph_->GetArena(), + caller_dex_file, + method_index, + graph_->IsDebuggable(), + graph_->GetCurrentInstructionId()); OptimizingCompilerStats inline_stats; HGraphBuilder builder(callee_graph, @@ -258,8 +272,8 @@ bool HInliner::TryBuildAndInline(Handle<mirror::ArtMethod> resolved_method, callee_graph->InlineInto(graph_, invoke_instruction); - if (callee_graph->HasArrayAccesses()) { - graph_->SetHasArrayAccesses(true); + if (callee_graph->HasBoundsChecks()) { + graph_->SetHasBoundsChecks(true); } return true; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2df7c166d8..46fad17b8f 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -137,13 +137,25 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - if ((input_cst != nullptr) && input_cst->IsZero()) { - // Replace code looking like - // SHL dst, src, 0 - // with - // src - instruction->ReplaceWith(input_other); - instruction->GetBlock()->RemoveInstruction(instruction); + if (input_cst != nullptr) { + if (input_cst->IsZero()) { + // Replace code looking like + // SHL dst, src, 0 + // with + // src + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (instruction->IsShl() && input_cst->IsOne()) { + // Replace Shl looking like + // SHL dst, src, 1 + // with + // ADD dst, src, src + HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(), + input_other, + input_other); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); + RecordSimplification(); + } } } @@ -377,15 +389,42 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { return; } - if ((input_cst != nullptr) && input_cst->IsMinusOne() && - (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + if ((input_cst != nullptr) && input_cst->IsMinusOne()) { // Replace code looking like // DIV dst, src, -1 // with // NEG dst, src instruction->GetBlock()->ReplaceAndRemoveInstructionWith( - instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other))); + instruction, new (GetGraph()->GetArena()) HNeg(type, input_other)); RecordSimplification(); + return; + } + + if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) { + // Try replacing code looking like + // DIV dst, src, constant + // with + // MUL dst, src, 1 / constant + HConstant* reciprocal = nullptr; + if (type == Primitive::Primitive::kPrimDouble) { + double value = input_cst->AsDoubleConstant()->GetValue(); + if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) { + reciprocal = GetGraph()->GetDoubleConstant(1.0 / value); + } + } else { + DCHECK_EQ(type, Primitive::kPrimFloat); + float value = input_cst->AsFloatConstant()->GetValue(); + if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) { + reciprocal = GetGraph()->GetFloatConstant(1.0f / value); + } + } + + if (reciprocal != nullptr) { + instruction->GetBlock()->ReplaceAndRemoveInstructionWith( + instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal)); + RecordSimplification(); + return; + } } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 20aa45f197..43fe3746ad 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -186,6 +186,8 @@ static Intrinsics GetIntrinsic(InlineMethod method) { return Intrinsics::kStringCharAt; case kIntrinsicCompareTo: return Intrinsics::kStringCompareTo; + case kIntrinsicGetCharsNoCheck: + return Intrinsics::kStringGetCharsNoCheck; case kIntrinsicIsEmptyOrLength: // The inliner can handle these two cases - and this is the preferred approach // since after inlining the call is no longer visible (as opposed to waiting @@ -194,6 +196,12 @@ static Intrinsics GetIntrinsic(InlineMethod method) { case kIntrinsicIndexOf: return ((method.d.data & kIntrinsicFlagBase0) == 0) ? Intrinsics::kStringIndexOfAfter : Intrinsics::kStringIndexOf; + case kIntrinsicNewStringFromBytes: + return Intrinsics::kStringNewStringFromBytes; + case kIntrinsicNewStringFromChars: + return Intrinsics::kStringNewStringFromChars; + case kIntrinsicNewStringFromString: + return Intrinsics::kStringNewStringFromString; case kIntrinsicCas: switch (GetType(method.d.data, false)) { @@ -280,6 +288,11 @@ static Intrinsics GetIntrinsic(InlineMethod method) { case kInlineOpIPut: return Intrinsics::kNone; + // String init cases, not intrinsics. + + case kInlineStringInit: + return Intrinsics::kNone; + // No default case to make the compiler warn on missing cases. } return Intrinsics::kNone; @@ -361,4 +374,3 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) } } // namespace art - diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index dbb7cbaa98..c243ef3f8b 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -17,8 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_ #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_ +#include "code_generator.h" #include "nodes.h" #include "optimization.h" +#include "parallel_move_resolver.h" namespace art { @@ -76,6 +78,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + static void MoveArguments(HInvoke* invoke, + CodeGenerator* codegen, + InvokeDexCallingConventionVisitor* calling_convention_visitor) { + if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + // When we do not run baseline, explicit clinit checks triggered by static + // invokes must have been pruned by art::PrepareForRegisterAllocation. + DCHECK(codegen->IsBaseline() || !invoke_static_or_direct->IsStaticWithExplicitClinitCheck()); + } + + if (invoke->GetNumberOfArguments() == 0) { + // No argument to move. + return; + } + + LocationSummary* locations = invoke->GetLocations(); + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + + for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 932192e4fd..dccfe9a0ca 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -48,7 +48,7 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener DCHECK_NE(type, Primitive::kPrimVoid); - if (Primitive::IsIntegralType(type)) { + if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { if (type == Primitive::kPrimLong) { Register trg_reg_lo = trg.AsRegisterPairLow<Register>(); Register trg_reg_hi = trg.AsRegisterPairHigh<Register>(); @@ -77,27 +77,9 @@ static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGener } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { - if (invoke->InputCount() == 0) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM* codegen) { + InvokeDexCallingConventionVisitorARM calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -116,7 +98,7 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); @@ -809,10 +791,6 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { const MemberOffset value_offset = mirror::String::ValueOffset(); // Location of count const MemberOffset count_offset = mirror::String::CountOffset(); - // Starting offset within data array - const MemberOffset offset_offset = mirror::String::OffsetOffset(); - // Start of char data with array_ - const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. @@ -834,15 +812,10 @@ void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { __ cmp(idx, ShifterOperand(temp)); __ b(slow_path->GetEntryLabel(), CS); - // Index computation. - __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset. - __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset. - __ add(temp, temp, ShifterOperand(idx)); - DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. - __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2)); + __ add(array_temp, obj, ShifterOperand(value_offset.Int32Value())); // array_temp := str.value. // Load the value. - __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp]. + __ ldrh(out, Address(array_temp, idx, LSL, 1)); // out := array_temp[idx]. __ Bind(slow_path->GetExitLabel()); } @@ -877,6 +850,169 @@ void IntrinsicCodeGeneratorARM::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + ArmAssembler* assembler, + CodeGeneratorARM* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = locations->GetTemp(0).AsRegister<Register>(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = locations->InAt(1).AsRegister<Register>(); + __ LoadImmediate(tmp_reg, std::numeric_limits<uint16_t>::max()); + __ cmp(char_reg, ShifterOperand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM(invoke); + codegen->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), HI); + } + + if (start_at_zero) { + DCHECK_EQ(tmp_reg, R2); + // Start-index = 0. + __ LoadImmediate(tmp_reg, 0); + } + + __ LoadFromOffset(kLoadWord, LR, TR, + QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pIndexOf).Int32Value()); + __ blx(LR); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare, and need to send start-index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(R0)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromBytes(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = locations->InAt(0).AsRegister<Register>(); + __ cmp(byte_array, ShifterOperand(0)); + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), EQ); + + __ LoadFromOffset( + kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromBytes).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromChars(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + + __ LoadFromOffset( + kLoadWord, LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromChars).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); +} + +void IntrinsicLocationsBuilderARM::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(R0)); +} + +void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = locations->InAt(0).AsRegister<Register>(); + __ cmp(string_to_copy, ShifterOperand(0)); + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + __ b(slow_path->GetEntryLabel(), EQ); + + __ LoadFromOffset(kLoadWord, + LR, TR, QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pAllocStringFromString).Int32Value()); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ blx(LR); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -903,9 +1039,8 @@ UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) } // namespace arm } // namespace art diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 117d6a4279..2c4fab0465 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -75,7 +75,7 @@ static void MoveFromReturnRegister(Location trg, DCHECK_NE(type, Primitive::kPrimVoid); - if (Primitive::IsIntegralType(type)) { + if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { Register trg_reg = RegisterFrom(trg, type); Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); __ Mov(trg_reg, res_reg, kDiscardForSameWReg); @@ -86,27 +86,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM64* codegen) { - if (invoke->InputCount() == 0) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { + InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -125,7 +107,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); @@ -952,10 +934,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { const MemberOffset value_offset = mirror::String::ValueOffset(); // Location of count const MemberOffset count_offset = mirror::String::CountOffset(); - // Starting offset within data array - const MemberOffset offset_offset = mirror::String::OffsetOffset(); - // Start of char data with array_ - const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); Register obj = WRegisterFrom(locations->InAt(0)); // String object pointer. Register idx = WRegisterFrom(locations->InAt(1)); // Index of character. @@ -978,21 +956,15 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { __ Cmp(idx, temp); __ B(hs, slow_path->GetEntryLabel()); - // Index computation. - __ Ldr(temp, HeapOperand(obj, offset_offset)); // temp := str.offset. - __ Ldr(array_temp, HeapOperand(obj, value_offset)); // array_temp := str.offset. - __ Add(temp, temp, idx); - DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. - __ Add(temp, temp, Operand(data_offset.Int32Value() / 2)); + __ Add(array_temp, obj, Operand(value_offset.Int32Value())); // array_temp := str.value. // Load the value. - __ Ldrh(out, MemOperand(array_temp.X(), temp, UXTW, 1)); // out := array_temp[temp]. + __ Ldrh(out, MemOperand(array_temp.X(), idx, UXTW, 1)); // out := array_temp[idx]. __ Bind(slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { - // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCall, kIntrinsified); @@ -1021,6 +993,169 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + vixl::MacroAssembler* masm, + CodeGeneratorARM64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeARM64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > 0xFFFFU) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + Register char_reg = WRegisterFrom(locations->InAt(1)); + __ Mov(tmp_reg, 0xFFFF); + __ Cmp(char_reg, Operand(tmp_reg)); + slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + __ B(hi, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + // Start-index = 0. + __ Mov(tmp_reg, 0); + } + + __ Ldr(lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pIndexOf).Int32Value())); + __ Blr(lr); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare, and need to send start_index=0. + locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + + // Need a temp for slow-path codepoint compare. + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = WRegisterFrom(locations->InAt(0)); + __ Cmp(byte_array, 0); + SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromBytes).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromChars).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); +} + +void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { + // The inputs plus one temp. + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); +} + +void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { + vixl::MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = WRegisterFrom(locations->InAt(0)); + __ Cmp(string_to_copy, 0); + SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ B(eq, slow_path->GetEntryLabel()); + + __ Ldr(lr, + MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pAllocStringFromString).Int32Value())); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Blr(lr); + __ Bind(slow_path->GetExitLabel()); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -1030,9 +1165,8 @@ void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED } UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 10f6e1d6c7..2c9248f52c 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -60,8 +60,12 @@ V(MemoryPokeShortNative, kStatic) \ V(StringCharAt, kDirect) \ V(StringCompareTo, kDirect) \ + V(StringGetCharsNoCheck, kDirect) \ V(StringIndexOf, kDirect) \ V(StringIndexOfAfter, kDirect) \ + V(StringNewStringFromBytes, kStatic) \ + V(StringNewStringFromChars, kStatic) \ + V(StringNewStringFromString, kStatic) \ V(UnsafeCASInt, kDirect) \ V(UnsafeCASLong, kDirect) \ V(UnsafeCASObject, kDirect) \ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a8e2cdf1f6..28b7a07cf9 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -16,6 +16,8 @@ #include "intrinsics_x86.h" +#include <limits> + #include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -111,27 +113,9 @@ static void MoveFromReturnRegister(Location target, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) { - if (invoke->InputCount() == 0) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) { + InvokeDexCallingConventionVisitorX86 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -142,11 +126,8 @@ static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX // restored! class IntrinsicSlowPathX86 : public SlowPathCodeX86 { public: - explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp) - : invoke_(invoke) { - // The temporary register has to be EAX for x86 invokes. - DCHECK_EQ(temp, EAX); - } + explicit IntrinsicSlowPathX86(HInvoke* invoke) + : invoke_(invoke) { } void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in); @@ -154,7 +135,7 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); @@ -748,7 +729,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); @@ -898,8 +879,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { @@ -909,23 +888,17 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Starting offset within data array - const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); - // Start of char data with array_ - const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); Register obj = locations->InAt(0).AsRegister<Register>(); Register idx = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); - Location temp_loc = locations->GetTemp(0); - Register temp = temp_loc.AsRegister<Register>(); // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth // the cost. // TODO: For simplicity, the index parameter is requested in a register, so different from Quick // we will not optimize the code for constants (which would save a register). - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); X86Assembler* assembler = GetAssembler(); @@ -934,12 +907,8 @@ void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) { codegen_->MaybeRecordImplicitNullCheck(invoke); __ j(kAboveEqual, slow_path->GetEntryLabel()); - // Get the actual element. - __ movl(temp, idx); // temp := idx. - __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. - __ movl(out, Address(obj, value_offset)); // obj := obj.array. - // out = out[2*temp]. - __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + // out = out[2*idx]. + __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); __ Bind(slow_path->GetExitLabel()); } @@ -953,8 +922,6 @@ void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(EAX)); - // Needs to be EAX for the invoke. - locations->AddTemp(Location::RegisterLocation(EAX)); } void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { @@ -966,8 +933,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); - SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86( - invoke, locations->GetTemp(0).AsRegister<Register>()); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -975,6 +941,227 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in EDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(EDI)); + // If we look for a constant char, we'll still have to copy it into EAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber EAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(EAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber EDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses ECX as the counter. + locations->AddTemp(Location::RegisterLocation(ECX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86Assembler* assembler, + CodeGeneratorX86* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + Register string_obj = locations->InAt(0).AsRegister<Register>(); + Register search_value = locations->InAt(1).AsRegister<Register>(); + Register counter = locations->GetTemp(0).AsRegister<Register>(); + Register string_length = locations->GetTemp(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj, EDI); + DCHECK_EQ(search_value, EAX); + DCHECK_EQ(counter, ECX); + DCHECK_EQ(out, EDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a zero-length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addl(string_obj, Immediate(value_offset)); + } else { + Register start_index = locations->InAt(2).AsRegister<Register>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmovl(kGreater, counter, start_index); + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx (the repne scasw work counter). We have string.length - start_index left to + // compare. + __ negl(counter); + __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in EDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register byte_array = locations->InAt(0).AsRegister<Register>(); + __ testl(byte_array, byte_array); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromBytes))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromChars))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(EAX)); +} + +void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) { + X86Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register string_to_copy = locations->InAt(0).AsRegister<Register>(); + __ testl(string_to_copy, string_to_copy); + SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pAllocStringFromString))); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { Register address = locations->InAt(0).AsRegisterPairLow<Register>(); Location out_loc = locations->Out(); @@ -1038,7 +1225,7 @@ static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - HInstruction *value = invoke->InputAt(1); + HInstruction* value = invoke->InputAt(1); if (size == Primitive::kPrimByte) { locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); } else { @@ -1535,8 +1722,7 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) } UNIMPLEMENTED_INTRINSIC(MathRoundDouble) -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5d24d1fbfb..0efa714a23 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -16,6 +16,8 @@ #include "intrinsics_x86_64.h" +#include <limits> + #include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -103,27 +105,9 @@ static void MoveFromReturnRegister(Location trg, } } -static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) { - if (invoke->InputCount() == 0) { - return; - } - - LocationSummary* locations = invoke->GetLocations(); - InvokeDexCallingConventionVisitor calling_convention_visitor; - - // We're moving potentially two or more locations to locations that could overlap, so we need - // a parallel move resolver. - HParallelMove parallel_move(arena); - - for (size_t i = 0; i < invoke->InputCount(); i++) { - HInstruction* input = invoke->InputAt(i); - Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); - Location actual_loc = locations->InAt(i); - - parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr); - } - - codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; + IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); } // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified @@ -142,7 +126,7 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { SaveLiveRegisters(codegen, invoke_->GetLocations()); - MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -622,7 +606,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { } static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) { - MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen); + MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); @@ -801,7 +785,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&nan); // output = 0 - __ xorq(out, out); + __ xorl(out, out); __ Bind(&done); } @@ -823,16 +807,10 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Location of count const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - // Starting offset within data array - const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value(); - // Start of char data with array_ - const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value(); CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - Location temp_loc = locations->GetTemp(0); - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth // the cost. @@ -848,12 +826,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) { codegen_->MaybeRecordImplicitNullCheck(invoke); __ j(kAboveEqual, slow_path->GetEntryLabel()); - // Get the actual element. - __ movl(temp, idx); // temp := idx. - __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx. - __ movl(out, Address(obj, value_offset)); // obj := obj.array. - // out = out[2*temp]. - __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset)); + // out = out[2*idx]. + __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset)); __ Bind(slow_path->GetExitLabel()); } @@ -886,6 +860,229 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void CreateStringIndexOfLocations(HInvoke* invoke, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = new (allocator) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + // The data needs to be in RDI for scasw. So request that the string is there, anyways. + locations->SetInAt(0, Location::RegisterLocation(RDI)); + // If we look for a constant char, we'll still have to copy it into RAX. So just request the + // allocator to do that, anyways. We can still do the constant check by checking the parameter + // of the instruction explicitly. + // Note: This works as we don't clobber RAX anywhere. + locations->SetInAt(1, Location::RegisterLocation(RAX)); + if (!start_at_zero) { + locations->SetInAt(2, Location::RequiresRegister()); // The starting index. + } + // As we clobber RDI during execution anyways, also use it as the output. + locations->SetOut(Location::SameAsFirstInput()); + + // repne scasw uses RCX as the counter. + locations->AddTemp(Location::RegisterLocation(RCX)); + // Need another temporary to be able to compute the result. + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenerateStringIndexOf(HInvoke* invoke, + X86_64Assembler* assembler, + CodeGeneratorX86_64* codegen, + ArenaAllocator* allocator, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>(); + CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>(); + CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>(); + CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>(); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + // Check our assumptions for registers. + DCHECK_EQ(string_obj.AsRegister(), RDI); + DCHECK_EQ(search_value.AsRegister(), RAX); + DCHECK_EQ(counter.AsRegister(), RCX); + DCHECK_EQ(out.AsRegister(), RDI); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch if we have a constant. + SlowPathCodeX86_64* slow_path = nullptr; + if (invoke->InputAt(1)->IsIntConstant()) { + if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) > + std::numeric_limits<uint16_t>::max()) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ jmp(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else { + __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); + slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + codegen->AddSlowPath(slow_path); + __ j(kAbove, slow_path->GetEntryLabel()); + } + + // From here down, we know that we are looking for a char that fits in 16 bits. + // Location of reference to data array within the String object. + int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Location of count within the String object. + int32_t count_offset = mirror::String::CountOffset().Int32Value(); + + // Load string length, i.e., the count field of the string. + __ movl(string_length, Address(string_obj, count_offset)); + + // Do a length check. + // TODO: Support jecxz. + Label not_found_label; + __ testl(string_length, string_length); + __ j(kEqual, ¬_found_label); + + if (start_at_zero) { + // Number of chars to scan is the same as the string length. + __ movl(counter, string_length); + + // Move to the start of the string. + __ addq(string_obj, Immediate(value_offset)); + } else { + CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>(); + + // Do a start_index check. + __ cmpl(start_index, string_length); + __ j(kGreaterEqual, ¬_found_label); + + // Ensure we have a start index >= 0; + __ xorl(counter, counter); + __ cmpl(start_index, Immediate(0)); + __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough. + + // Move to the start of the string: string_obj + value_offset + 2 * start_index. + __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset)); + + // Now update ecx, the work counter: it's gonna be string.length - start_index. + __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit. + __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0)); + } + + // Everything is set up for repne scasw: + // * Comparison address in RDI. + // * Counter in ECX. + __ repne_scasw(); + + // Did we find a match? + __ j(kNotEqual, ¬_found_label); + + // Yes, we matched. Compute the index of the result. + __ subl(string_length, counter); + __ leal(out, Address(string_length, -1)); + + Label done; + __ jmp(&done); + + // Failed to match; return -1. + __ Bind(¬_found_label); + __ movl(out, Immediate(-1)); + + // And join up at the end. + __ Bind(&done); + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, true); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + CreateStringIndexOfLocations(invoke, arena_, false); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); + __ testl(byte_array, byte_array); + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); +} + +void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCall, + kIntrinsified); + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetOut(Location::RegisterLocation(RAX)); +} + +void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) { + X86_64Assembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); + __ testl(string_to_copy, string_to_copy); + SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + codegen_->AddSlowPath(slow_path); + __ j(kEqual, slow_path->GetEntryLabel()); + + __ gs()->call(Address::Absolute( + QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true)); + codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + __ Bind(slow_path->GetExitLabel()); +} + static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. @@ -1389,8 +1586,7 @@ void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } -UNIMPLEMENTED_INTRINSIC(StringIndexOf) -UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index bf9b8e59c5..2535ea274a 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -39,8 +39,9 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { } } - if (instruction->HasEnvironment()) { - HEnvironment* environment = instruction->GetEnvironment(); + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* input = environment->GetInstructionAt(i); if (input != nullptr) { @@ -63,13 +64,15 @@ static bool InputsAreDefinedBeforeLoop(HInstruction* instruction) { * If `environment` has a loop header phi, we replace it with its first input. */ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) { - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* input = environment->GetInstructionAt(i); - if (input != nullptr && IsPhiOf(input, info->GetHeader())) { - environment->RemoveAsUserOfInput(i); - HInstruction* incoming = input->InputAt(0); - environment->SetRawEnvAt(i, incoming); - incoming->AddEnvUseAt(environment, i); + for (; environment != nullptr; environment = environment->GetParent()) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* input = environment->GetInstructionAt(i); + if (input != nullptr && IsPhiOf(input, info->GetHeader())) { + environment->RemoveAsUserOfInput(i); + HInstruction* incoming = input->InputAt(0); + environment->SetRawEnvAt(i, incoming); + incoming->AddEnvUseAt(environment, i); + } } } } diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 7818c606db..4f259b5095 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -39,7 +39,7 @@ namespace art { static void TestCode(const uint16_t* data, const int* expected_order, size_t number_of_blocks) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 52367730ed..7cb00a1923 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -32,7 +32,7 @@ namespace art { static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 8a96ee9ace..9d7d0b6c67 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -46,7 +46,7 @@ static void DumpBitVector(BitVector* vector, static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -445,44 +445,40 @@ TEST(LivenessTest, Loop5) { TEST(LivenessTest, Loop6) { // Bitsets are made of: - // (constant0, constant4, constant5, phi in block 2, phi in block 8) + // (constant0, constant4, constant5, phi in block 2) const char* expected = "Block 0\n" - " live in: (00000)\n" - " live out: (11100)\n" - " kill: (11100)\n" + " live in: (0000)\n" + " live out: (1110)\n" + " kill: (1110)\n" "Block 1\n" - " live in: (11100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (1110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 2\n" // loop header - " live in: (01100)\n" - " live out: (01110)\n" - " kill: (00010)\n" + " live in: (0110)\n" + " live out: (0111)\n" + " kill: (0001)\n" "Block 3\n" - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 4\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" - "Block 5\n" // original back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00000)\n" + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 4\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" + "Block 5\n" // back edge + " live in: (0110)\n" + " live out: (0110)\n" + " kill: (0000)\n" "Block 6\n" // return block - " live in: (00010)\n" - " live out: (00000)\n" - " kill: (00000)\n" + " live in: (0001)\n" + " live out: (0000)\n" + " kill: (0000)\n" "Block 7\n" // exit block - " live in: (00000)\n" - " live out: (00000)\n" - " kill: (00000)\n" - "Block 8\n" // synthesized back edge - " live in: (01100)\n" - " live out: (01100)\n" - " kill: (00001)\n"; + " live in: (0000)\n" + " live out: (0000)\n" + " kill: (0000)\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index a1ae67009e..42aba04828 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -25,8 +25,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, bool intrinsified) : inputs_(instruction->GetBlock()->GetGraph()->GetArena(), instruction->InputCount()), temps_(instruction->GetBlock()->GetGraph()->GetArena(), 0), - environment_(instruction->GetBlock()->GetGraph()->GetArena(), - instruction->EnvironmentSize()), output_overlaps_(Location::kOutputOverlap), call_kind_(call_kind), stack_mask_(nullptr), @@ -37,10 +35,6 @@ LocationSummary::LocationSummary(HInstruction* instruction, for (size_t i = 0; i < instruction->InputCount(); ++i) { inputs_.Put(i, Location()); } - environment_.SetSize(instruction->EnvironmentSize()); - for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { - environment_.Put(i, Location()); - } instruction->SetLocations(this); if (NeedsSafepoint()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index c3a99150c4..09bbb33042 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -525,14 +525,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return temps_.Size(); } - void SetEnvironmentAt(uint32_t at, Location location) { - environment_.Put(at, location); - } - - Location GetEnvironmentAt(uint32_t at) const { - return environment_.Get(at); - } - Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } @@ -602,7 +594,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { private: GrowableArray<Location> inputs_; GrowableArray<Location> temps_; - GrowableArray<Location> environment_; // Whether the output overlaps with any of the inputs. If it overlaps, then it cannot // share the same register as the inputs. Location::OutputOverlap output_overlaps_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 6ab57b8e50..47da9cc17c 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -16,7 +16,9 @@ #include "nodes.h" +#include "code_generator.h" #include "ssa_builder.h" +#include "base/bit_vector-inl.h" #include "utils/growable_array.h" #include "scoped_thread_state_change.h" @@ -37,8 +39,9 @@ static void RemoveAsUser(HInstruction* instruction) { instruction->RemoveAsUserOfInput(i); } - HEnvironment* environment = instruction->GetEnvironment(); - if (environment != nullptr) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { if (environment->GetInstructionAt(i) != nullptr) { environment->RemoveAsUserOfInput(i); @@ -191,24 +194,6 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { void HGraph::SimplifyLoop(HBasicBlock* header) { HLoopInformation* info = header->GetLoopInformation(); - // If there are more than one back edge, make them branch to the same block that - // will become the only back edge. This simplifies finding natural loops in the - // graph. - // Also, if the loop is a do/while (that is the back edge is an if), change the - // back edge to be a goto. This simplifies code generation of suspend cheks. - if (info->NumberOfBackEdges() > 1 || info->GetBackEdges().Get(0)->GetLastInstruction()->IsIf()) { - HBasicBlock* new_back_edge = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(new_back_edge); - new_back_edge->AddInstruction(new (arena_) HGoto()); - for (size_t pred = 0, e = info->GetBackEdges().Size(); pred < e; ++pred) { - HBasicBlock* back_edge = info->GetBackEdges().Get(pred); - back_edge->ReplaceSuccessor(header, new_back_edge); - } - info->ClearBackEdges(); - info->AddBackEdge(new_back_edge); - new_back_edge->AddSuccessor(header); - } - // Make sure the loop has only one pre header. This simplifies SSA building by having // to just look at the pre header to know which locals are initialized at entry of the // loop. @@ -218,11 +203,9 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { AddBlock(pre_header); pre_header->AddInstruction(new (arena_) HGoto()); - ArenaBitVector back_edges(arena_, GetBlocks().Size(), false); - HBasicBlock* back_edge = info->GetBackEdges().Get(0); for (size_t pred = 0; pred < header->GetPredecessors().Size(); ++pred) { HBasicBlock* predecessor = header->GetPredecessors().Get(pred); - if (predecessor != back_edge) { + if (!info->IsBackEdge(*predecessor)) { predecessor->ReplaceSuccessor(header, pre_header); pred--; } @@ -230,9 +213,17 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { pre_header->AddSuccessor(header); } - // Make sure the second predecessor of a loop header is the back edge. - if (header->GetPredecessors().Get(1) != info->GetBackEdges().Get(0)) { - header->SwapPredecessors(); + // Make sure the first predecessor of a loop header is the incoming block. + if (info->IsBackEdge(*header->GetPredecessors().Get(0))) { + HBasicBlock* to_swap = header->GetPredecessors().Get(0); + for (size_t pred = 1, e = header->GetPredecessors().Size(); pred < e; ++pred) { + HBasicBlock* predecessor = header->GetPredecessors().Get(pred); + if (!info->IsBackEdge(*predecessor)) { + header->predecessors_.Put(pred, to_swap); + header->predecessors_.Put(0, predecessor); + break; + } + } } // Place the suspend check at the beginning of the header, so that live registers @@ -303,25 +294,6 @@ HNullConstant* HGraph::GetNullConstant() { return cached_null_constant_; } -template <class InstructionType, typename ValueType> -InstructionType* HGraph::CreateConstant(ValueType value, - ArenaSafeMap<ValueType, InstructionType*>* cache) { - // Try to find an existing constant of the given value. - InstructionType* constant = nullptr; - auto cached_constant = cache->find(value); - if (cached_constant != cache->end()) { - constant = cached_constant->second; - } - - // If not found or previously deleted, create and cache a new instruction. - if (constant == nullptr || constant->GetBlock() == nullptr) { - constant = new (arena_) InstructionType(value); - cache->Overwrite(value, constant); - InsertConstant(constant); - } - return constant; -} - HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) { switch (type) { case Primitive::Type::kPrimBoolean: @@ -343,6 +315,18 @@ HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value) { } } +void HGraph::CacheFloatConstant(HFloatConstant* constant) { + int32_t value = bit_cast<int32_t, float>(constant->GetValue()); + DCHECK(cached_float_constants_.find(value) == cached_float_constants_.end()); + cached_float_constants_.Overwrite(value, constant); +} + +void HGraph::CacheDoubleConstant(HDoubleConstant* constant) { + int64_t value = bit_cast<int64_t, double>(constant->GetValue()); + DCHECK(cached_double_constants_.find(value) == cached_double_constants_.end()); + cached_double_constants_.Overwrite(value, constant); +} + void HLoopInformation::Add(HBasicBlock* block) { blocks_.SetBit(block->GetBlockId()); } @@ -364,26 +348,60 @@ void HLoopInformation::PopulateRecursive(HBasicBlock* block) { } bool HLoopInformation::Populate() { - DCHECK_EQ(GetBackEdges().Size(), 1u); - HBasicBlock* back_edge = GetBackEdges().Get(0); - DCHECK(back_edge->GetDominator() != nullptr); - if (!header_->Dominates(back_edge)) { - // This loop is not natural. Do not bother going further. - return false; - } + DCHECK_EQ(blocks_.NumSetBits(), 0u) << "Loop information has already been populated"; + for (size_t i = 0, e = GetBackEdges().Size(); i < e; ++i) { + HBasicBlock* back_edge = GetBackEdges().Get(i); + DCHECK(back_edge->GetDominator() != nullptr); + if (!header_->Dominates(back_edge)) { + // This loop is not natural. Do not bother going further. + return false; + } - // Populate this loop: starting with the back edge, recursively add predecessors - // that are not already part of that loop. Set the header as part of the loop - // to end the recursion. - // This is a recursive implementation of the algorithm described in - // "Advanced Compiler Design & Implementation" (Muchnick) p192. - blocks_.SetBit(header_->GetBlockId()); - PopulateRecursive(back_edge); + // Populate this loop: starting with the back edge, recursively add predecessors + // that are not already part of that loop. Set the header as part of the loop + // to end the recursion. + // This is a recursive implementation of the algorithm described in + // "Advanced Compiler Design & Implementation" (Muchnick) p192. + blocks_.SetBit(header_->GetBlockId()); + PopulateRecursive(back_edge); + } return true; } +void HLoopInformation::Update() { + HGraph* graph = header_->GetGraph(); + for (uint32_t id : blocks_.Indexes()) { + HBasicBlock* block = graph->GetBlocks().Get(id); + // Reset loop information of non-header blocks inside the loop, except + // members of inner nested loops because those should already have been + // updated by their own LoopInformation. + if (block->GetLoopInformation() == this && block != header_) { + block->SetLoopInformation(nullptr); + } + } + blocks_.ClearAllBits(); + + if (back_edges_.IsEmpty()) { + // The loop has been dismantled, delete its suspend check and remove info + // from the header. + DCHECK(HasSuspendCheck()); + header_->RemoveInstruction(suspend_check_); + header_->SetLoopInformation(nullptr); + header_ = nullptr; + suspend_check_ = nullptr; + } else { + if (kIsDebugBuild) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + DCHECK(header_->Dominates(back_edges_.Get(i))); + } + } + // This loop still has reachable back edges. Repopulate the list of blocks. + bool populate_successful = Populate(); + DCHECK(populate_successful); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { - DCHECK_EQ(header_->GetPredecessors().Size(), 2u); return header_->GetDominator(); } @@ -395,6 +413,14 @@ bool HLoopInformation::IsIn(const HLoopInformation& other) const { return other.blocks_.IsBitSet(header_->GetBlockId()); } +size_t HLoopInformation::GetLifetimeEnd() const { + size_t last_position = 0; + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + last_position = std::max(back_edges_.Get(i)->GetLifetimeEnd(), last_position); + } + return last_position; +} + bool HBasicBlock::Dominates(HBasicBlock* other) const { // Walk up the dominator tree from `other`, to find out if `this` // is an ancestor. @@ -456,6 +482,20 @@ void HBasicBlock::InsertInstructionBefore(HInstruction* instruction, HInstructio instructions_.InsertInstructionBefore(instruction, cursor); } +void HBasicBlock::InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!instruction->IsPhi()); + DCHECK_EQ(instruction->GetId(), -1); + DCHECK_NE(cursor->GetId(), -1); + DCHECK_EQ(cursor->GetBlock(), this); + DCHECK(!instruction->IsControlFlow()); + DCHECK(!cursor->IsControlFlow()); + instruction->SetBlock(this); + instruction->SetId(GetGraph()->GetNextInstructionId()); + UpdateInputsUsers(instruction); + instructions_.InsertInstructionAfter(instruction, cursor); +} + void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { DCHECK_EQ(phi->GetId(), -1); DCHECK_NE(cursor->GetId(), -1); @@ -481,6 +521,7 @@ static void Remove(HInstructionList* instruction_list, } void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) { + DCHECK(!instruction->IsPhi()); Remove(&instructions_, this, instruction, ensure_safety); } @@ -488,6 +529,24 @@ void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) { Remove(&phis_, this, phi, ensure_safety); } +void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety) { + if (instruction->IsPhi()) { + RemovePhi(instruction->AsPhi(), ensure_safety); + } else { + RemoveInstruction(instruction, ensure_safety); + } +} + +void HEnvironment::CopyFrom(const GrowableArray<HInstruction*>& locals) { + for (size_t i = 0; i < locals.Size(); i++) { + HInstruction* instruction = locals.Get(i); + SetRawEnvAt(i, instruction); + if (instruction != nullptr) { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::CopyFrom(HEnvironment* env) { for (size_t i = 0; i < env->Size(); i++) { HInstruction* instruction = env->GetInstructionAt(i); @@ -498,6 +557,28 @@ void HEnvironment::CopyFrom(HEnvironment* env) { } } +void HEnvironment::CopyFromWithLoopPhiAdjustment(HEnvironment* env, + HBasicBlock* loop_header) { + DCHECK(loop_header->IsLoopHeader()); + for (size_t i = 0; i < env->Size(); i++) { + HInstruction* instruction = env->GetInstructionAt(i); + SetRawEnvAt(i, instruction); + if (instruction == nullptr) { + continue; + } + if (instruction->IsLoopHeaderPhi() && (instruction->GetBlock() == loop_header)) { + // At the end of the loop pre-header, the corresponding value for instruction + // is the first input of the phi. + HInstruction* initial = instruction->AsPhi()->InputAt(0); + DCHECK(initial->GetBlock()->Dominates(loop_header)); + SetRawEnvAt(i, initial); + initial->AddEnvUseAt(this, i); + } else { + instruction->AddEnvUseAt(this, i); + } + } +} + void HEnvironment::RemoveAsUserOfInput(size_t index) const { const HUserRecord<HEnvironment*> user_record = vregs_.Get(index); user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); @@ -672,6 +753,14 @@ void HPhi::AddInput(HInstruction* input) { input->AddUseAt(this, inputs_.Size() - 1); } +void HPhi::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.DeleteAt(index); + for (size_t i = index, e = InputCount(); i < e; ++i) { + InputRecordAt(i).GetUseNode()->SetIndex(i); + } +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -706,6 +795,84 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) { } } +HConstant* HTypeConversion::TryStaticEvaluation() const { + HGraph* graph = GetBlock()->GetGraph(); + if (GetInput()->IsIntConstant()) { + int32_t value = GetInput()->AsIntConstant()->GetValue(); + switch (GetResultType()) { + case Primitive::kPrimLong: + return graph->GetLongConstant(static_cast<int64_t>(value)); + case Primitive::kPrimFloat: + return graph->GetFloatConstant(static_cast<float>(value)); + case Primitive::kPrimDouble: + return graph->GetDoubleConstant(static_cast<double>(value)); + default: + return nullptr; + } + } else if (GetInput()->IsLongConstant()) { + int64_t value = GetInput()->AsLongConstant()->GetValue(); + switch (GetResultType()) { + case Primitive::kPrimInt: + return graph->GetIntConstant(static_cast<int32_t>(value)); + case Primitive::kPrimFloat: + return graph->GetFloatConstant(static_cast<float>(value)); + case Primitive::kPrimDouble: + return graph->GetDoubleConstant(static_cast<double>(value)); + default: + return nullptr; + } + } else if (GetInput()->IsFloatConstant()) { + float value = GetInput()->AsFloatConstant()->GetValue(); + switch (GetResultType()) { + case Primitive::kPrimInt: + if (std::isnan(value)) + return graph->GetIntConstant(0); + if (value >= kPrimIntMax) + return graph->GetIntConstant(kPrimIntMax); + if (value <= kPrimIntMin) + return graph->GetIntConstant(kPrimIntMin); + return graph->GetIntConstant(static_cast<int32_t>(value)); + case Primitive::kPrimLong: + if (std::isnan(value)) + return graph->GetLongConstant(0); + if (value >= kPrimLongMax) + return graph->GetLongConstant(kPrimLongMax); + if (value <= kPrimLongMin) + return graph->GetLongConstant(kPrimLongMin); + return graph->GetLongConstant(static_cast<int64_t>(value)); + case Primitive::kPrimDouble: + return graph->GetDoubleConstant(static_cast<double>(value)); + default: + return nullptr; + } + } else if (GetInput()->IsDoubleConstant()) { + double value = GetInput()->AsDoubleConstant()->GetValue(); + switch (GetResultType()) { + case Primitive::kPrimInt: + if (std::isnan(value)) + return graph->GetIntConstant(0); + if (value >= kPrimIntMax) + return graph->GetIntConstant(kPrimIntMax); + if (value <= kPrimLongMin) + return graph->GetIntConstant(kPrimIntMin); + return graph->GetIntConstant(static_cast<int32_t>(value)); + case Primitive::kPrimLong: + if (std::isnan(value)) + return graph->GetLongConstant(0); + if (value >= kPrimLongMax) + return graph->GetLongConstant(kPrimLongMax); + if (value <= kPrimLongMin) + return graph->GetLongConstant(kPrimLongMin); + return graph->GetLongConstant(static_cast<int64_t>(value)); + case Primitive::kPrimFloat: + return graph->GetFloatConstant(static_cast<float>(value)); + default: + return nullptr; + } + } + return nullptr; +} + HConstant* HUnaryOperation::TryStaticEvaluation() const { if (GetInput()->IsIntConstant()) { int32_t value = Evaluate(GetInput()->AsIntConstant()->GetValue()); @@ -867,6 +1034,15 @@ bool HBasicBlock::HasSinglePhi() const { return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr; } +size_t HInstructionList::CountSize() const { + size_t size = 0; + HInstruction* current = first_instruction_; + for (; current != nullptr; current = current->GetNext()) { + size++; + } + return size; +} + void HInstructionList::SetBlockOfInstructions(HBasicBlock* block) const { for (HInstruction* current = first_instruction_; current != nullptr; @@ -898,40 +1074,167 @@ void HInstructionList::Add(const HInstructionList& instruction_list) { } } -void HBasicBlock::DisconnectFromAll() { - DCHECK(dominated_blocks_.IsEmpty()) << "Unimplemented scenario"; +void HBasicBlock::DisconnectAndDelete() { + // Dominators must be removed after all the blocks they dominate. This way + // a loop header is removed last, a requirement for correct loop information + // iteration. + DCHECK(dominated_blocks_.IsEmpty()); + // Remove the block from all loops it is included in. + for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(this); + if (loop_info->IsBackEdge(*this)) { + // If this was the last back edge of the loop, we deliberately leave the + // loop in an inconsistent state and will fail SSAChecker unless the + // entire loop is removed during the pass. + loop_info->RemoveBackEdge(this); + } + } + + // Disconnect the block from its predecessors and update their control-flow + // instructions. for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { - predecessors_.Get(i)->successors_.Delete(this); + HBasicBlock* predecessor = predecessors_.Get(i); + HInstruction* last_instruction = predecessor->GetLastInstruction(); + predecessor->RemoveInstruction(last_instruction); + predecessor->RemoveSuccessor(this); + if (predecessor->GetSuccessors().Size() == 1u) { + DCHECK(last_instruction->IsIf()); + predecessor->AddInstruction(new (graph_->GetArena()) HGoto()); + } else { + // The predecessor has no remaining successors and therefore must be dead. + // We deliberately leave it without a control-flow instruction so that the + // SSAChecker fails unless it is not removed during the pass too. + DCHECK_EQ(predecessor->GetSuccessors().Size(), 0u); + } } + predecessors_.Reset(); + + // Disconnect the block from its successors and update their dominators + // and phis. for (size_t i = 0, e = successors_.Size(); i < e; ++i) { - successors_.Get(i)->predecessors_.Delete(this); - } - dominator_->dominated_blocks_.Delete(this); + HBasicBlock* successor = successors_.Get(i); + // Delete this block from the list of predecessors. + size_t this_index = successor->GetPredecessorIndexOf(this); + successor->predecessors_.DeleteAt(this_index); + + // Check that `successor` has other predecessors, otherwise `this` is the + // dominator of `successor` which violates the order DCHECKed at the top. + DCHECK(!successor->predecessors_.IsEmpty()); + + // Recompute the successor's dominator. + HBasicBlock* old_dominator = successor->GetDominator(); + HBasicBlock* new_dominator = successor->predecessors_.Get(0); + for (size_t j = 1, f = successor->predecessors_.Size(); j < f; ++j) { + new_dominator = graph_->FindCommonDominator( + new_dominator, successor->predecessors_.Get(j)); + } + if (old_dominator != new_dominator) { + successor->SetDominator(new_dominator); + old_dominator->RemoveDominatedBlock(successor); + new_dominator->AddDominatedBlock(successor); + } - predecessors_.Reset(); + // Remove this block's entries in the successor's phis. + if (successor->predecessors_.Size() == 1u) { + // The successor has just one predecessor left. Replace phis with the only + // remaining input. + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + phi->ReplaceWith(phi->InputAt(1 - this_index)); + successor->RemovePhi(phi); + } + } else { + for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + phi_it.Current()->AsPhi()->RemoveInputAt(this_index); + } + } + } successors_.Reset(); - dominator_ = nullptr; - graph_ = nullptr; + + // Disconnect from the dominator. + dominator_->RemoveDominatedBlock(this); + SetDominator(nullptr); + + // Delete from the graph. The function safely deletes remaining instructions + // and updates the reverse post order. + graph_->DeleteDeadBlock(this); + SetGraph(nullptr); } void HBasicBlock::MergeWith(HBasicBlock* other) { - DCHECK(successors_.IsEmpty()) << "Unimplemented block merge scenario"; - DCHECK(dominated_blocks_.IsEmpty() - || (dominated_blocks_.Size() == 1 && dominated_blocks_.Get(0) == other)) - << "Unimplemented block merge scenario"; + DCHECK_EQ(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().Contains(other)); + DCHECK_EQ(GetSuccessors().Size(), 1u); + DCHECK_EQ(GetSuccessors().Get(0), other); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK_EQ(other->GetPredecessors().Get(0), this); DCHECK(other->GetPhis().IsEmpty()); + // Move instructions from `other` to `this`. + DCHECK(EndsWithControlFlowInstruction()); + RemoveInstruction(GetLastInstruction()); + instructions_.Add(other->GetInstructions()); + other->instructions_.SetBlockOfInstructions(this); + other->instructions_.Clear(); + + // Remove `other` from the loops it is included in. + for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) { + HLoopInformation* loop_info = it.Current(); + loop_info->Remove(other); + if (loop_info->IsBackEdge(*other)) { + loop_info->ReplaceBackEdge(other, this); + } + } + + // Update links to the successors of `other`. successors_.Reset(); - dominated_blocks_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); + successor->ReplacePredecessor(other, this); + } + + // Update the dominator tree. + dominated_blocks_.Delete(other); + for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { + HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); + dominated_blocks_.Add(dominated); + dominated->SetDominator(this); + } + other->dominated_blocks_.Reset(); + other->dominator_ = nullptr; + + // Clear the list of predecessors of `other` in preparation of deleting it. + other->predecessors_.Reset(); + + // Delete `other` from the graph. The function updates reverse post order. + graph_->DeleteDeadBlock(other); + other->SetGraph(nullptr); +} + +void HBasicBlock::MergeWithInlined(HBasicBlock* other) { + DCHECK_NE(GetGraph(), other->GetGraph()); + DCHECK(GetDominatedBlocks().IsEmpty()); + DCHECK(GetSuccessors().IsEmpty()); + DCHECK(!EndsWithControlFlowInstruction()); + DCHECK_EQ(other->GetPredecessors().Size(), 1u); + DCHECK(other->GetPredecessors().Get(0)->IsEntryBlock()); + DCHECK(other->GetPhis().IsEmpty()); + DCHECK(!other->IsInLoop()); + + // Move instructions from `other` to `this`. instructions_.Add(other->GetInstructions()); - other->GetInstructions().SetBlockOfInstructions(this); + other->instructions_.SetBlockOfInstructions(this); - while (!other->GetSuccessors().IsEmpty()) { - HBasicBlock* successor = other->GetSuccessors().Get(0); + // Update links to the successors of `other`. + successors_.Reset(); + while (!other->successors_.IsEmpty()) { + HBasicBlock* successor = other->successors_.Get(0); successor->ReplacePredecessor(other, this); } + // Update the dominator tree. for (size_t i = 0, e = other->GetDominatedBlocks().Size(); i < e; ++i) { HBasicBlock* dominated = other->GetDominatedBlocks().Get(i); dominated_blocks_.Add(dominated); @@ -973,6 +1276,24 @@ static void MakeRoomFor(GrowableArray<HBasicBlock*>* blocks, } } +void HGraph::DeleteDeadBlock(HBasicBlock* block) { + DCHECK_EQ(block->GetGraph(), this); + DCHECK(block->GetSuccessors().IsEmpty()); + DCHECK(block->GetPredecessors().IsEmpty()); + DCHECK(block->GetDominatedBlocks().IsEmpty()); + DCHECK(block->GetDominator() == nullptr); + + for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current()); + } + for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi()); + } + + reverse_post_order_.Delete(block); + blocks_.Put(block->GetBlockId(), nullptr); +} + void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (GetBlocks().Size() == 3) { // Simple case of an entry block, a body block, and an exit block. @@ -1005,7 +1326,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { HBasicBlock* first = entry_block_->GetSuccessors().Get(0); DCHECK(!first->IsInLoop()); - at->MergeWith(first); + at->MergeWithInlined(first); exit_block_->ReplaceWith(to); // Update all predecessors of the exit block (now the `to` block) @@ -1094,11 +1415,9 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { loop_it.Current()->Add(to); } if (info->IsBackEdge(*at)) { - // Only `at` can become a back edge, as the inlined blocks - // are predecessors of `at`. - DCHECK_EQ(1u, info->NumberOfBackEdges()); - info->ClearBackEdges(); - info->AddBackEdge(to); + // Only `to` can become a back edge, as the inlined blocks + // are predecessors of `to`. + info->ReplaceBackEdge(at, to); } } } @@ -1113,7 +1432,7 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // - Remove suspend checks, that hold an environment. // We must do this after the other blocks have been inlined, otherwise ids of // constants could overlap with the inner graph. - int parameter_index = 0; + size_t parameter_index = 0; for (HInstructionIterator it(entry_block_->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->IsNullConstant()) { @@ -1122,10 +1441,19 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { current->ReplaceWith(outer_graph->GetIntConstant(current->AsIntConstant()->GetValue())); } else if (current->IsLongConstant()) { current->ReplaceWith(outer_graph->GetLongConstant(current->AsLongConstant()->GetValue())); - } else if (current->IsFloatConstant() || current->IsDoubleConstant()) { - // TODO: Don't duplicate floating-point constants. - current->MoveBefore(outer_graph->GetEntryBlock()->GetLastInstruction()); + } else if (current->IsFloatConstant()) { + current->ReplaceWith(outer_graph->GetFloatConstant(current->AsFloatConstant()->GetValue())); + } else if (current->IsDoubleConstant()) { + current->ReplaceWith(outer_graph->GetDoubleConstant(current->AsDoubleConstant()->GetValue())); } else if (current->IsParameterValue()) { + if (kIsDebugBuild + && invoke->IsInvokeStaticOrDirect() + && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) { + // Ensure we do not use the last input of `invoke`, as it + // contains a clinit check which is not an actual argument. + size_t last_input_index = invoke->InputCount() - 1; + DCHECK(parameter_index != last_input_index); + } current->ReplaceWith(invoke->InputAt(parameter_index++)); } else { DCHECK(current->IsGoto() || current->IsSuspendCheck()); @@ -1137,53 +1465,6 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } -void HGraph::MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block) { - // Find the two branches of an If. - DCHECK_EQ(start_block->GetSuccessors().Size(), 2u); - HBasicBlock* left_branch = start_block->GetSuccessors().Get(0); - HBasicBlock* right_branch = start_block->GetSuccessors().Get(1); - - // Make sure this is a diamond control-flow path. - DCHECK_EQ(left_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(right_branch->GetSuccessors().Get(0), end_block); - DCHECK_EQ(end_block->GetPredecessors().Size(), 2u); - DCHECK_EQ(start_block, end_block->GetDominator()); - - // Disconnect the branches and merge the two blocks. This will move - // all instructions from 'end_block' to 'start_block'. - DCHECK(left_branch->IsSingleGoto()); - DCHECK(right_branch->IsSingleGoto()); - left_branch->DisconnectFromAll(); - right_branch->DisconnectFromAll(); - start_block->RemoveInstruction(start_block->GetLastInstruction()); - start_block->MergeWith(end_block); - - // Delete the now redundant blocks from the graph. - blocks_.Put(left_branch->GetBlockId(), nullptr); - blocks_.Put(right_branch->GetBlockId(), nullptr); - blocks_.Put(end_block->GetBlockId(), nullptr); - - // Update reverse post order. - reverse_post_order_.Delete(left_branch); - reverse_post_order_.Delete(right_branch); - reverse_post_order_.Delete(end_block); - - // Update loops which contain the code. - for (HLoopInformationOutwardIterator it(*start_block); !it.Done(); it.Advance()) { - HLoopInformation* loop_info = it.Current(); - DCHECK(loop_info->Contains(*left_branch)); - DCHECK(loop_info->Contains(*right_branch)); - DCHECK(loop_info->Contains(*end_block)); - loop_info->Remove(left_branch); - loop_info->Remove(right_branch); - loop_info->Remove(end_block); - if (loop_info->IsBackEdge(*end_block)) { - loop_info->RemoveBackEdge(end_block); - loop_info->AddBackEdge(start_block); - } - } -} - std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b89487f4f6..cb2e5ccab4 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -48,6 +48,7 @@ class HPhi; class HSuspendCheck; class LiveInterval; class LocationSummary; +class SlowPathCode; class SsaBuilder; static const int kDefaultNumberOfBlocks = 8; @@ -97,6 +98,9 @@ class HInstructionList { void AddAfter(HInstruction* cursor, const HInstructionList& instruction_list); void Add(const HInstructionList& instruction_list); + // Return the number of instructions in the list. This is an expensive operation. + size_t CountSize() const; + private: HInstruction* first_instruction_; HInstruction* last_instruction_; @@ -113,7 +117,11 @@ class HInstructionList { // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocMisc> { public: - HGraph(ArenaAllocator* arena, bool debuggable = false, int start_instruction_id = 0) + HGraph(ArenaAllocator* arena, + const DexFile& dex_file, + uint32_t method_idx, + bool debuggable = false, + int start_instruction_id = 0) : arena_(arena), blocks_(arena, kDefaultNumberOfBlocks), reverse_post_order_(arena, kDefaultNumberOfBlocks), @@ -124,12 +132,16 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { number_of_vregs_(0), number_of_in_vregs_(0), temporaries_vreg_slots_(0), - has_array_accesses_(false), + has_bounds_checks_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), + dex_file_(dex_file), + method_idx_(method_idx), cached_null_constant_(nullptr), cached_int_constants_(std::less<int32_t>(), arena->Adapter()), - cached_long_constants_(std::less<int64_t>(), arena->Adapter()) {} + cached_float_constants_(std::less<int32_t>(), arena->Adapter()), + cached_long_constants_(std::less<int64_t>(), arena->Adapter()), + cached_double_constants_(std::less<int64_t>(), arena->Adapter()) {} ArenaAllocator* GetArena() const { return arena_; } const GrowableArray<HBasicBlock*>& GetBlocks() const { return blocks_; } @@ -168,7 +180,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); - void MergeEmptyBranches(HBasicBlock* start_block, HBasicBlock* end_block); + // Removes `block` from the graph. + void DeleteDeadBlock(HBasicBlock* block); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); void SimplifyLoop(HBasicBlock* header); @@ -226,19 +239,19 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return linear_order_; } - bool HasArrayAccesses() const { - return has_array_accesses_; + bool HasBoundsChecks() const { + return has_bounds_checks_; } - void SetHasArrayAccesses(bool value) { - has_array_accesses_ = value; + void SetHasBoundsChecks(bool value) { + has_bounds_checks_ = value; } bool IsDebuggable() const { return debuggable_; } // Returns a constant of the given type and value. If it does not exist - // already, it is created and inserted into the graph. Only integral types - // are currently supported. + // already, it is created and inserted into the graph. This method is only for + // integral types. HConstant* GetConstant(Primitive::Type type, int64_t value); HNullConstant* GetNullConstant(); HIntConstant* GetIntConstant(int32_t value) { @@ -247,9 +260,24 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { HLongConstant* GetLongConstant(int64_t value) { return CreateConstant(value, &cached_long_constants_); } + HFloatConstant* GetFloatConstant(float value) { + return CreateConstant(bit_cast<int32_t, float>(value), &cached_float_constants_); + } + HDoubleConstant* GetDoubleConstant(double value) { + return CreateConstant(bit_cast<int64_t, double>(value), &cached_double_constants_); + } - private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; + + const DexFile& GetDexFile() const { + return dex_file_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + + private: void VisitBlockForDominatorTree(HBasicBlock* block, HBasicBlock* predecessor, GrowableArray<size_t>* visits); @@ -260,10 +288,34 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited); - template <class InstType, typename ValueType> - InstType* CreateConstant(ValueType value, ArenaSafeMap<ValueType, InstType*>* cache); + template <class InstructionType, typename ValueType> + InstructionType* CreateConstant(ValueType value, + ArenaSafeMap<ValueType, InstructionType*>* cache) { + // Try to find an existing constant of the given value. + InstructionType* constant = nullptr; + auto cached_constant = cache->find(value); + if (cached_constant != cache->end()) { + constant = cached_constant->second; + } + + // If not found or previously deleted, create and cache a new instruction. + if (constant == nullptr || constant->GetBlock() == nullptr) { + constant = new (arena_) InstructionType(value); + cache->Overwrite(value, constant); + InsertConstant(constant); + } + return constant; + } + void InsertConstant(HConstant* instruction); + // Cache a float constant into the graph. This method should only be + // called by the SsaBuilder when creating "equivalent" instructions. + void CacheFloatConstant(HFloatConstant* constant); + + // See CacheFloatConstant comment. + void CacheDoubleConstant(HDoubleConstant* constant); + ArenaAllocator* const arena_; // List of blocks in insertion order. @@ -290,8 +342,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Number of vreg size slots that the temporaries use (used in baseline compiler). size_t temporaries_vreg_slots_; - // Has array accesses. We can totally skip BCE if it's false. - bool has_array_accesses_; + // Has bounds checks. We can totally skip BCE if it's false. + bool has_bounds_checks_; // Indicates whether the graph should be compiled in a way that // ensures full debuggability. If false, we can apply more @@ -301,11 +353,20 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; - // Cached common constants often needed by optimization passes. + // The dex file from which the method is from. + const DexFile& dex_file_; + + // The method index in the dex file. + const uint32_t method_idx_; + + // Cached constants. HNullConstant* cached_null_constant_; ArenaSafeMap<int32_t, HIntConstant*> cached_int_constants_; + ArenaSafeMap<int32_t, HFloatConstant*> cached_float_constants_; ArenaSafeMap<int64_t, HLongConstant*> cached_long_constants_; + ArenaSafeMap<int64_t, HDoubleConstant*> cached_double_constants_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); @@ -357,14 +418,30 @@ class HLoopInformation : public ArenaObject<kArenaAllocMisc> { return back_edges_; } - void ClearBackEdges() { - back_edges_.Reset(); + // Returns the lifetime position of the back edge that has the + // greatest lifetime position. + size_t GetLifetimeEnd() const; + + void ReplaceBackEdge(HBasicBlock* existing, HBasicBlock* new_back_edge) { + for (size_t i = 0, e = back_edges_.Size(); i < e; ++i) { + if (back_edges_.Get(i) == existing) { + back_edges_.Put(i, new_back_edge); + return; + } + } + UNREACHABLE(); } - // Find blocks that are part of this loop. Returns whether the loop is a natural loop, + // Finds blocks that are part of this loop. Returns whether the loop is a natural loop, // that is the header dominates the back edge. bool Populate(); + // Reanalyzes the loop by removing loop info from its blocks and re-running + // Populate(). If there are no back edges left, the loop info is completely + // removed as well as its SuspendCheck instruction. It must be run on nested + // inner loops first. + void Update(); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -451,6 +528,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { HBasicBlock* GetDominator() const { return dominator_; } void SetDominator(HBasicBlock* dominator) { dominator_ = dominator; } void AddDominatedBlock(HBasicBlock* block) { dominated_blocks_.Add(block); } + void RemoveDominatedBlock(HBasicBlock* block) { dominated_blocks_.Delete(block); } void ReplaceDominatedBlock(HBasicBlock* existing, HBasicBlock* new_block) { for (size_t i = 0, e = dominated_blocks_.Size(); i < e; ++i) { if (dominated_blocks_.Get(i) == existing) { @@ -520,6 +598,13 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { predecessors_.Put(1, temp); } + void SwapSuccessors() { + DCHECK_EQ(successors_.Size(), 2u); + HBasicBlock* temp = successors_.Get(0); + successors_.Put(0, successors_.Get(1)); + successors_.Put(1, temp); + } + size_t GetPredecessorIndexOf(HBasicBlock* predecessor) { for (size_t i = 0, e = predecessors_.Size(); i < e; ++i) { if (predecessors_.Get(i) == predecessor) { @@ -550,7 +635,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // that this method does not update the graph, reverse post order, loop // information, nor make sure the blocks are consistent (for example ending // with a control flow instruction). - void MergeWith(HBasicBlock* other); + void MergeWithInlined(HBasicBlock* other); // Replace `this` with `other`. Predecessors, successors, and dominated blocks // of `this` are moved to `other`. @@ -559,15 +644,22 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // with a control flow instruction). void ReplaceWith(HBasicBlock* other); - // Disconnects `this` from all its predecessors, successors and the dominator. - // It assumes that `this` does not dominate any blocks. - // Note that this method does not update the graph, reverse post order, loop - // information, nor make sure the blocks are consistent (for example ending - // with a control flow instruction). - void DisconnectFromAll(); + // Merge `other` at the end of `this`. This method updates loops, reverse post + // order, links to predecessors, successors, dominators and deletes the block + // from the graph. The two blocks must be successive, i.e. `this` the only + // predecessor of `other` and vice versa. + void MergeWith(HBasicBlock* other); + + // Disconnects `this` from all its predecessors, successors and dominator, + // removes it from all loops it is included in and eventually from the graph. + // The block must not dominate any other block. Predecessors and successors + // are safely updated. + void DisconnectAndDelete(); void AddInstruction(HInstruction* instruction); + // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); + void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); @@ -578,9 +670,10 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { // instruction is not in use and removes it from the use lists of its inputs. void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true); void RemovePhi(HPhi* phi, bool ensure_safety = true); + void RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_safety = true); bool IsLoopHeader() const { - return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); + return IsInLoop() && (loop_information_->GetHeader() == this); } bool IsLoopPreHeaderFirstPredecessor() const { @@ -599,7 +692,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void SetInLoop(HLoopInformation* info) { if (IsLoopHeader()) { // Nothing to do. This just means `info` is an outer loop. - } else if (loop_information_ == nullptr) { + } else if (!IsInLoop()) { loop_information_ = info; } else if (loop_information_->Contains(*info->GetHeader())) { // Block is currently part of an outer loop. Make it part of this inner loop. @@ -620,7 +713,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { bool IsInLoop() const { return loop_information_ != nullptr; } - // Returns wheter this block dominates the blocked passed as parameter. + // Returns whether this block dominates the blocked passed as parameter. bool Dominates(HBasicBlock* block) const; size_t GetLifetimeStart() const { return lifetime_start_; } @@ -671,7 +764,7 @@ class HLoopInformationOutwardIterator : public ValueObject { void Advance() { DCHECK(!Done()); - current_ = current_->GetHeader()->GetDominator()->GetLoopInformation(); + current_ = current_->GetPreHeader()->GetLoopInformation(); } HLoopInformation* Current() const { @@ -784,13 +877,14 @@ class HUseListNode : public ArenaObject<kArenaAllocMisc> { HUseListNode* GetNext() const { return next_; } T GetUser() const { return user_; } size_t GetIndex() const { return index_; } + void SetIndex(size_t index) { index_ = index; } private: HUseListNode(T user, size_t index) : user_(user), index_(index), prev_(nullptr), next_(nullptr) {} T const user_; - const size_t index_; + size_t index_; HUseListNode<T>* prev_; HUseListNode<T>* next_; @@ -861,6 +955,14 @@ class HUseList : public ValueObject { return first_ != nullptr && first_->next_ == nullptr; } + size_t SizeSlow() const { + size_t count = 0; + for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) { + ++count; + } + return count; + } + private: HUseListNode<T>* first_; }; @@ -987,15 +1089,47 @@ class SideEffects : public ValueObject { // A HEnvironment object contains the values of virtual registers at a given location. class HEnvironment : public ArenaObject<kArenaAllocMisc> { public: - HEnvironment(ArenaAllocator* arena, size_t number_of_vregs) - : vregs_(arena, number_of_vregs) { + HEnvironment(ArenaAllocator* arena, + size_t number_of_vregs, + const DexFile& dex_file, + uint32_t method_idx, + uint32_t dex_pc) + : vregs_(arena, number_of_vregs), + locations_(arena, number_of_vregs), + parent_(nullptr), + dex_file_(dex_file), + method_idx_(method_idx), + dex_pc_(dex_pc) { vregs_.SetSize(number_of_vregs); for (size_t i = 0; i < number_of_vregs; i++) { vregs_.Put(i, HUserRecord<HEnvironment*>()); } + + locations_.SetSize(number_of_vregs); + for (size_t i = 0; i < number_of_vregs; ++i) { + locations_.Put(i, Location()); + } } - void CopyFrom(HEnvironment* env); + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { + parent_ = new (allocator) HEnvironment(allocator, + parent->Size(), + parent->GetDexFile(), + parent->GetMethodIdx(), + parent->GetDexPc()); + if (parent->GetParent() != nullptr) { + parent_->SetAndCopyParentChain(allocator, parent->GetParent()); + } + parent_->CopyFrom(parent); + } + + void CopyFrom(const GrowableArray<HInstruction*>& locals); + void CopyFrom(HEnvironment* environment); + + // Copy from `env`. If it's a loop phi for `loop_header`, copy the first + // input to the loop phi instead. This is for inserting instructions that + // require an environment (like HDeoptimization) in the loop pre-header. + void CopyFromWithLoopPhiAdjustment(HEnvironment* env, HBasicBlock* loop_header); void SetRawEnvAt(size_t index, HInstruction* instruction) { vregs_.Put(index, HUserRecord<HEnvironment*>(instruction)); @@ -1009,6 +1143,28 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { size_t Size() const { return vregs_.Size(); } + HEnvironment* GetParent() const { return parent_; } + + void SetLocationAt(size_t index, Location location) { + locations_.Put(index, location); + } + + Location GetLocationAt(size_t index) const { + return locations_.Get(index); + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + uint32_t GetMethodIdx() const { + return method_idx_; + } + + const DexFile& GetDexFile() const { + return dex_file_; + } + private: // Record instructions' use entries of this environment for constant-time removal. // It should only be called by HInstruction when a new environment use is added. @@ -1019,8 +1175,13 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { } GrowableArray<HUserRecord<HEnvironment*> > vregs_; + GrowableArray<Location> locations_; + HEnvironment* parent_; + const DexFile& dex_file_; + const uint32_t method_idx_; + const uint32_t dex_pc_; - friend HInstruction; + friend class HInstruction; DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; @@ -1150,6 +1311,11 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { } virtual bool NeedsEnvironment() const { return false; } + virtual uint32_t GetDexPc() const { + LOG(FATAL) << "GetDexPc() cannot be called on an instruction that" + " does not need an environment"; + UNREACHABLE(); + } virtual bool IsControlFlow() const { return false; } virtual bool CanThrow() const { return false; } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -1227,8 +1393,31 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // copying, the uses lists are being updated. void CopyEnvironmentFrom(HEnvironment* environment) { ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); - environment_ = new (allocator) HEnvironment(allocator, environment->Size()); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); environment_->CopyFrom(environment); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } + } + + void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment, + HBasicBlock* block) { + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + environment_ = new (allocator) HEnvironment( + allocator, + environment->Size(), + environment->GetDexFile(), + environment->GetMethodIdx(), + environment->GetDexPc()); + if (environment->GetParent() != nullptr) { + environment_->SetAndCopyParentChain(allocator, environment->GetParent()); + } + environment_->CopyFromWithLoopPhiAdjustment(environment, block); } // Returns the number of entries in the environment. Typically, that is the @@ -1604,7 +1793,7 @@ class HDeoptimize : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Deoptimize); @@ -2008,28 +2197,30 @@ class HFloatConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } bool IsMinusOne() const OVERRIDE { - return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) == - bit_cast<uint32_t, float>((-1.0f)); + return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f)); } bool IsZero() const OVERRIDE { - return AsFloatConstant()->GetValue() == 0.0f; + return value_ == 0.0f; } bool IsOne() const OVERRIDE { - return bit_cast<uint32_t, float>(AsFloatConstant()->GetValue()) == - bit_cast<uint32_t, float>(1.0f); + return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f); + } + bool IsNaN() const { + return std::isnan(value_); } DECLARE_INSTRUCTION(FloatConstant); private: explicit HFloatConstant(float value) : HConstant(Primitive::kPrimFloat), value_(value) {} + explicit HFloatConstant(int32_t value) + : HConstant(Primitive::kPrimFloat), value_(bit_cast<float, int32_t>(value)) {} const float value_; - // Only the SsaBuilder can currently create floating-point constants. If we - // ever need to create them later in the pipeline, we will have to handle them - // the same way as integral constants. + // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HFloatConstant); }; @@ -2045,28 +2236,30 @@ class HDoubleConstant : public HConstant { size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } bool IsMinusOne() const OVERRIDE { - return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) == - bit_cast<uint64_t, double>((-1.0)); + return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0)); } bool IsZero() const OVERRIDE { - return AsDoubleConstant()->GetValue() == 0.0; + return value_ == 0.0; } bool IsOne() const OVERRIDE { - return bit_cast<uint64_t, double>(AsDoubleConstant()->GetValue()) == - bit_cast<uint64_t, double>(1.0); + return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0); + } + bool IsNaN() const { + return std::isnan(value_); } DECLARE_INSTRUCTION(DoubleConstant); private: explicit HDoubleConstant(double value) : HConstant(Primitive::kPrimDouble), value_(value) {} + explicit HDoubleConstant(int64_t value) + : HConstant(Primitive::kPrimDouble), value_(bit_cast<double, int64_t>(value)) {} const double value_; - // Only the SsaBuilder can currently create floating-point constants. If we - // ever need to create them later in the pipeline, we will have to handle them - // the same way as integral constants. + // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; + friend class HGraph; DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; @@ -2163,9 +2356,15 @@ class HInvoke : public HInstruction { SetRawInputAt(index, argument); } + // Return the number of arguments. This number can be lower than + // the number of inputs returned by InputCount(), as some invoke + // instructions (e.g. HInvokeStaticOrDirect) can have non-argument + // inputs at the end of their list of inputs. + uint32_t GetNumberOfArguments() const { return number_of_arguments_; } + Primitive::Type GetType() const OVERRIDE { return return_type_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetDexMethodIndex() const { return dex_method_index_; } @@ -2182,16 +2381,19 @@ class HInvoke : public HInstruction { protected: HInvoke(ArenaAllocator* arena, uint32_t number_of_arguments, + uint32_t number_of_other_inputs, Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index) : HInstruction(SideEffects::All()), + number_of_arguments_(number_of_arguments), inputs_(arena, number_of_arguments), return_type_(return_type), dex_pc_(dex_pc), dex_method_index_(dex_method_index), intrinsic_(Intrinsics::kNone) { - inputs_.SetSize(number_of_arguments); + uint32_t number_of_inputs = number_of_arguments + number_of_other_inputs; + inputs_.SetSize(number_of_inputs); } const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } @@ -2199,6 +2401,7 @@ class HInvoke : public HInstruction { inputs_.Put(index, input); } + uint32_t number_of_arguments_; GrowableArray<HUserRecord<HInstruction*> > inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; @@ -2211,18 +2414,35 @@ class HInvoke : public HInstruction { class HInvokeStaticOrDirect : public HInvoke { public: + // Requirements of this method call regarding the class + // initialization (clinit) check of its declaring class. + enum class ClinitCheckRequirement { + kNone, // Class already initialized. + kExplicit, // Static call having explicit clinit check as last input. + kImplicit, // Static call implicitly requiring a clinit check. + }; + HInvokeStaticOrDirect(ArenaAllocator* arena, uint32_t number_of_arguments, Primitive::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, bool is_recursive, + int32_t string_init_offset, InvokeType original_invoke_type, - InvokeType invoke_type) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + InvokeType invoke_type, + ClinitCheckRequirement clinit_check_requirement) + : HInvoke(arena, + number_of_arguments, + clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u, + return_type, + dex_pc, + dex_method_index), original_invoke_type_(original_invoke_type), invoke_type_(invoke_type), - is_recursive_(is_recursive) {} + is_recursive_(is_recursive), + clinit_check_requirement_(clinit_check_requirement), + string_init_offset_(string_init_offset) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { UNUSED(obj); @@ -2235,13 +2455,67 @@ class HInvokeStaticOrDirect : public HInvoke { InvokeType GetInvokeType() const { return invoke_type_; } bool IsRecursive() const { return is_recursive_; } bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } + bool IsStringInit() const { return string_init_offset_ != 0; } + int32_t GetStringInitOffset() const { return string_init_offset_; } + + // Is this instruction a call to a static method? + bool IsStatic() const { + return GetInvokeType() == kStatic; + } + + // Remove the art::HLoadClass instruction set as last input by + // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of + // the initial art::HClinitCheck instruction (only relevant for + // static calls with explicit clinit check). + void RemoveLoadClassAsLastInput() { + DCHECK(IsStaticWithExplicitClinitCheck()); + size_t last_input_index = InputCount() - 1; + HInstruction* last_input = InputAt(last_input_index); + DCHECK(last_input != nullptr); + DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + RemoveAsUserOfInput(last_input_index); + inputs_.DeleteAt(last_input_index); + clinit_check_requirement_ = ClinitCheckRequirement::kImplicit; + DCHECK(IsStaticWithImplicitClinitCheck()); + } + + // Is this a call to a static method whose declaring class has an + // explicit intialization check in the graph? + bool IsStaticWithExplicitClinitCheck() const { + return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit); + } + + // Is this a call to a static method whose declaring class has an + // implicit intialization check requirement? + bool IsStaticWithImplicitClinitCheck() const { + return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit); + } DECLARE_INSTRUCTION(InvokeStaticOrDirect); + protected: + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { + const HUserRecord<HInstruction*> input_record = HInvoke::InputRecordAt(i); + if (kIsDebugBuild && IsStaticWithExplicitClinitCheck() && (i == InputCount() - 1)) { + HInstruction* input = input_record.GetInstruction(); + // `input` is the last input of a static invoke marked as having + // an explicit clinit check. It must either be: + // - an art::HClinitCheck instruction, set by art::HGraphBuilder; or + // - an art::HLoadClass instruction, set by art::PrepareForRegisterAllocation. + DCHECK(input != nullptr); + DCHECK(input->IsClinitCheck() || input->IsLoadClass()) << input->DebugName(); + } + return input_record; + } + private: const InvokeType original_invoke_type_; const InvokeType invoke_type_; const bool is_recursive_; + ClinitCheckRequirement clinit_check_requirement_; + // Thread entrypoint offset for string init method if this is a string init invoke. + // Note that there are multiple string init methods, each having its own offset. + int32_t string_init_offset_; DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; @@ -2254,7 +2528,7 @@ class HInvokeVirtual : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t vtable_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index), vtable_index_(vtable_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -2280,7 +2554,7 @@ class HInvokeInterface : public HInvoke { uint32_t dex_pc, uint32_t dex_method_index, uint32_t imt_index) - : HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index), + : HInvoke(arena, number_of_arguments, 0u, return_type, dex_pc, dex_method_index), imt_index_(imt_index) {} bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { @@ -2307,7 +2581,7 @@ class HNewInstance : public HExpression<0> { type_index_(type_index), entrypoint_(entrypoint) {} - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2359,7 +2633,7 @@ class HNewArray : public HExpression<1> { SetRawInputAt(0, length); } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } // Calls runtime so needs an environment. @@ -2454,7 +2728,7 @@ class HDiv : public HBinaryOperation { return (y == -1) ? -x : x / y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Div); @@ -2481,7 +2755,7 @@ class HRem : public HBinaryOperation { return (y == -1) ? 0 : x % y; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Rem); @@ -2508,7 +2782,7 @@ class HDivZeroCheck : public HExpression<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(DivZeroCheck); @@ -2703,11 +2977,15 @@ class HTypeConversion : public HExpression<1> { // Required by the x86 and ARM code generators when producing calls // to the runtime. - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } + // Try to statically evaluate the conversion and return a HConstant + // containing the result. If the input cannot be converted, return nullptr. + HConstant* TryStaticEvaluation() const; + DECLARE_INSTRUCTION(TypeConversion); private: @@ -2746,6 +3024,7 @@ class HPhi : public HInstruction { size_t InputCount() const OVERRIDE { return inputs_.Size(); } void AddInput(HInstruction* input); + void RemoveInputAt(size_t index); Primitive::Type GetType() const OVERRIDE { return type_; } void SetType(Primitive::Type type) { type_ = type; } @@ -2812,7 +3091,7 @@ class HNullCheck : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(NullCheck); @@ -2975,7 +3254,7 @@ class HArraySet : public HTemplateInstruction<3> { bool NeedsTypeCheck() const { return needs_type_check_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -3045,7 +3324,7 @@ class HBoundsCheck : public HExpression<2> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(BoundsCheck); @@ -3085,19 +3364,25 @@ class HTemporary : public HTemplateInstruction<0> { class HSuspendCheck : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc) {} + : HTemplateInstruction(SideEffects::None()), dex_pc_(dex_pc), slow_path_(nullptr) {} bool NeedsEnvironment() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } + void SetSlowPath(SlowPathCode* slow_path) { slow_path_ = slow_path; } + SlowPathCode* GetSlowPath() const { return slow_path_; } DECLARE_INSTRUCTION(SuspendCheck); private: const uint32_t dex_pc_; + // Only used for code generation, in order to share the same slow path between back edges + // of a same loop. + SlowPathCode* slow_path_; + DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; @@ -3124,7 +3409,7 @@ class HLoadClass : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return type_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } bool IsReferrersClass() const { return is_referrers_class_; } @@ -3198,7 +3483,7 @@ class HLoadString : public HExpression<0> { size_t ComputeHashCode() const OVERRIDE { return string_index_; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint32_t GetStringIndex() const { return string_index_; } // TODO: Can we deopt or debug when we resolve a string? @@ -3214,7 +3499,6 @@ class HLoadString : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HLoadString); }; -// TODO: Pass this check to HInvokeStaticOrDirect nodes. /** * Performs an initialization check on its Class object input. */ @@ -3237,7 +3521,7 @@ class HClinitCheck : public HExpression<1> { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); } @@ -3337,7 +3621,7 @@ class HThrow : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } DECLARE_INSTRUCTION(Throw); @@ -3371,7 +3655,7 @@ class HInstanceOf : public HExpression<2> { return false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3446,7 +3730,7 @@ class HCheckCast : public HTemplateInstruction<2> { bool MustDoNullCheck() const { return must_do_null_check_; } void ClearMustDoNullCheck() { must_do_null_check_ = false; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsClassFinal() const { return class_is_final_; } @@ -3492,7 +3776,7 @@ class HMonitorOperation : public HTemplateInstruction<1> { bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - uint32_t GetDexPc() const { return dex_pc_; } + uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } bool IsEnter() const { return kind_ == kEnter; } diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 4e83ce576c..2736453ccc 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -16,6 +16,7 @@ #include "base/arena_allocator.h" #include "nodes.h" +#include "optimizing_unit_test.h" #include "gtest/gtest.h" @@ -29,7 +30,7 @@ TEST(Node, RemoveInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -49,7 +50,8 @@ TEST(Node, RemoveInstruction) { first_block->AddSuccessor(exit_block); exit_block->AddInstruction(new (&allocator) HExit()); - HEnvironment* environment = new (&allocator) HEnvironment(&allocator, 1); + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); @@ -70,7 +72,7 @@ TEST(Node, InsertInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -96,7 +98,7 @@ TEST(Node, AddInstruction) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -112,4 +114,51 @@ TEST(Node, AddInstruction) { ASSERT_TRUE(parameter->GetUses().HasOnlyOneUse()); } +TEST(Node, ParentEnvironment) { + ArenaPool pool; + ArenaAllocator allocator(&pool); + + HGraph* graph = CreateGraph(&allocator); + HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + graph->AddBlock(entry); + graph->SetEntryBlock(entry); + HInstruction* parameter1 = new (&allocator) HParameterValue(0, Primitive::kPrimNot); + HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0); + entry->AddInstruction(parameter1); + entry->AddInstruction(with_environment); + entry->AddInstruction(new (&allocator) HExit()); + + ASSERT_TRUE(parameter1->HasUses()); + ASSERT_TRUE(parameter1->GetUses().HasOnlyOneUse()); + + HEnvironment* environment = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + GrowableArray<HInstruction*> array(&allocator, 1); + array.Add(parameter1); + + environment->CopyFrom(array); + with_environment->SetRawEnvironment(environment); + + ASSERT_TRUE(parameter1->HasEnvironmentUses()); + ASSERT_TRUE(parameter1->GetEnvUses().HasOnlyOneUse()); + + HEnvironment* parent1 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent1->CopyFrom(array); + + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u); + + HEnvironment* parent2 = new (&allocator) HEnvironment( + &allocator, 1, graph->GetDexFile(), graph->GetMethodIdx(), 0); + parent2->CopyFrom(array); + parent1->SetAndCopyParentChain(&allocator, parent2); + + // One use for parent2, and one other use for the new parent of parent1. + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u); + + // We have copied the parent chain. So we now have two more uses. + environment->SetAndCopyParentChain(&allocator, parent1); + ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u); +} + } // namespace art diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index b13e07eb22..c46a21955c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,9 +21,9 @@ namespace art { -void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const { +void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const { if (stats_ != nullptr) { - stats_->RecordStat(compilation_stat); + stats_->RecordStat(compilation_stat, count); } } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 8b2028177b..ccf8de9f6a 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -48,7 +48,7 @@ class HOptimization : public ValueObject { void Check(); protected: - void MaybeRecordStat(MethodCompilationStat compilation_stat) const; + void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; HGraph* const graph_; // Used to record stats about the optimization. diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b2c13adf35..7aea249c42 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -21,6 +21,7 @@ #include "cfi_test.h" #include "gtest/gtest.h" #include "optimizing/code_generator.h" +#include "optimizing/optimizing_unit_test.h" #include "utils/assembler.h" #include "optimizing/optimizing_cfi_test_expected.inc" @@ -45,10 +46,10 @@ class OptimizingCFITest : public CFITest { std::unique_ptr<const InstructionSetFeatures> isa_features; std::string error; isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - HGraph graph(&allocator); + HGraph* graph = CreateGraph(&allocator); // Generate simple frame with some spills. std::unique_ptr<CodeGenerator> code_gen( - CodeGenerator::Create(&graph, isa, *isa_features.get(), opts)); + CodeGenerator::Create(graph, isa, *isa_features.get(), opts)); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d99d35998a..8bb5d8ebae 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -320,15 +320,17 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats); - HDeadCodeElimination dce2(graph, stats, "dead_code_elimination_final"); + HDeadCodeElimination dce1(graph, stats, + HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination dce2(graph, stats, + HDeadCodeElimination::kFinalDeadCodeEliminationPassName); HConstantFolding fold1(graph); InstructionSimplifier simplify1(graph, stats); HBooleanSimplifier boolean_simplify(graph); HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - HConstantFolding fold2(graph); + HConstantFolding fold2(graph, "constant_folding_after_inlining"); SideEffectsAnalysis side_effects(graph); GVNOptimization gvn(graph, side_effects); LICM licm(graph, side_effects); @@ -512,7 +514,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite ArenaAllocator arena(Runtime::Current()->GetArenaPool()); HGraph* graph = new (&arena) HGraph( - &arena, compiler_driver->GetCompilerOptions().GetDebuggable()); + &arena, dex_file, method_idx, compiler_driver->GetCompilerOptions().GetDebuggable()); // For testing purposes, we put a special marker on method names that should be compiled // with this compiler. This makes sure we're not regressing. diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index e6508c9851..b6b1bb1cad 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -29,25 +29,26 @@ enum MethodCompilationStat { kCompiledBaseline, kCompiledOptimized, kCompiledQuick, - kInstructionSimplifications, kInlinedInvoke, - kNotCompiledUnsupportedIsa, - kNotCompiledPathological, + kInstructionSimplifications, + kNotCompiledBranchOutsideMethodCode, + kNotCompiledCannotBuildSSA, + kNotCompiledCantAccesType, + kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, - kNotCompiledCannotBuildSSA, kNotCompiledNoCodegen, - kNotCompiledUnresolvedMethod, - kNotCompiledUnresolvedField, kNotCompiledNonSequentialRegPair, + kNotCompiledPathological, kNotCompiledSpaceFilter, - kNotOptimizedTryCatch, - kNotOptimizedDisabled, - kNotCompiledCantAccesType, - kNotOptimizedRegisterAllocator, kNotCompiledUnhandledInstruction, + kNotCompiledUnresolvedField, + kNotCompiledUnresolvedMethod, + kNotCompiledUnsupportedIsa, kNotCompiledVerifyAtRuntime, - kNotCompiledClassNotVerified, + kNotOptimizedDisabled, + kNotOptimizedRegisterAllocator, + kNotOptimizedTryCatch, kRemovedCheckedCast, kRemovedDeadInstruction, kRemovedNullCheck, @@ -58,8 +59,8 @@ class OptimizingCompilerStats { public: OptimizingCompilerStats() {} - void RecordStat(MethodCompilationStat stat) { - compile_stats_[stat]++; + void RecordStat(MethodCompilationStat stat, size_t count = 1) { + compile_stats_[stat] += count; } void Log() const { @@ -98,23 +99,24 @@ class OptimizingCompilerStats { case kCompiledQuick : return "kCompiledQuick"; case kInlinedInvoke : return "kInlinedInvoke"; case kInstructionSimplifications: return "kInstructionSimplifications"; - case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; - case kNotCompiledPathological : return "kNotCompiledPathological"; + case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode"; + case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; + case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; - case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; - case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; - case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; - case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; - case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; + case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; - case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; + case kNotCompiledUnresolvedField : return "kNotCompiledUnresolvedField"; + case kNotCompiledUnresolvedMethod : return "kNotCompiledUnresolvedMethod"; + case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa"; case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime"; - case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; + case kNotOptimizedDisabled : return "kNotOptimizedDisabled"; + case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; + case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch"; case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6b236927da..4f8ec65e43 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -72,11 +72,16 @@ void RemoveSuspendChecks(HGraph* graph) { } } +inline HGraph* CreateGraph(ArenaAllocator* allocator) { + return new (allocator) HGraph( + allocator, *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), -1); +} + // Create a control-flow graph from Dex instructions. inline HGraph* CreateCFG(ArenaAllocator* allocator, const uint16_t* data, Primitive::Type return_type = Primitive::kPrimInt) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph, return_type); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f5d8d82571..78d11857c3 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -79,4 +79,26 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { } } +void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (invoke->IsStaticWithExplicitClinitCheck()) { + size_t last_input_index = invoke->InputCount() - 1; + HInstruction* last_input = invoke->InputAt(last_input_index); + DCHECK(last_input->IsLoadClass()) << last_input->DebugName(); + + // Remove a load class instruction as last input of a static + // invoke, which has been added (along with a clinit check, + // removed by PrepareForRegisterAllocation::VisitClinitCheck + // previously) by the graph builder during the creation of the + // static invoke instruction, but is no longer required at this + // stage (i.e., after inlining has been performed). + invoke->RemoveLoadClassAsLastInput(); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } + } +} + } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index c28507c925..d7f277fa0d 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -39,6 +39,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitBoundType(HBoundType* bound_type) OVERRIDE; void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 293fde978e..c56100dfa1 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 0fdf051957..925099ade6 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -768,14 +768,14 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { } } else { DCHECK(!current->IsHighInterval()); - int hint = current->FindFirstRegisterHint(free_until); + int hint = current->FindFirstRegisterHint(free_until, liveness_); if (hint != kNoRegister) { DCHECK(!IsBlocked(hint)); reg = hint; } else if (current->IsLowInterval()) { reg = FindAvailableRegisterPair(free_until, current->GetStart()); } else { - reg = FindAvailableRegister(free_until); + reg = FindAvailableRegister(free_until, current); } } @@ -839,14 +839,52 @@ int RegisterAllocator::FindAvailableRegisterPair(size_t* next_use, size_t starti return reg; } -int RegisterAllocator::FindAvailableRegister(size_t* next_use) const { +bool RegisterAllocator::IsCallerSaveRegister(int reg) const { + return processing_core_registers_ + ? !codegen_->IsCoreCalleeSaveRegister(reg) + : !codegen_->IsFloatingPointCalleeSaveRegister(reg); +} + +int RegisterAllocator::FindAvailableRegister(size_t* next_use, LiveInterval* current) const { + // We special case intervals that do not span a safepoint to try to find a caller-save + // register if one is available. We iterate from 0 to the number of registers, + // so if there are caller-save registers available at the end, we continue the iteration. + bool prefers_caller_save = !current->HasWillCallSafepoint(); int reg = kNoRegister; - // Pick the register that is used the last. for (size_t i = 0; i < number_of_registers_; ++i) { - if (IsBlocked(i)) continue; - if (reg == kNoRegister || next_use[i] > next_use[reg]) { + if (IsBlocked(i)) { + // Register cannot be used. Continue. + continue; + } + + // Best case: we found a register fully available. + if (next_use[i] == kMaxLifetimePosition) { + if (prefers_caller_save && !IsCallerSaveRegister(i)) { + // We can get shorter encodings on some platforms by using + // small register numbers. So only update the candidate if the previous + // one was not available for the whole method. + if (reg == kNoRegister || next_use[reg] != kMaxLifetimePosition) { + reg = i; + } + // Continue the iteration in the hope of finding a caller save register. + continue; + } else { + reg = i; + // We know the register is good enough. Return it. + break; + } + } + + // If we had no register before, take this one as a reference. + if (reg == kNoRegister) { + reg = i; + continue; + } + + // Pick the register that is used the last. + if (next_use[i] > next_use[reg]) { reg = i; - if (next_use[i] == kMaxLifetimePosition) break; + continue; } } return reg; @@ -971,7 +1009,7 @@ bool RegisterAllocator::AllocateBlockedReg(LiveInterval* current) { || (first_use >= next_use[GetHighForLowRegister(reg)]); } else { DCHECK(!current->IsHighInterval()); - reg = FindAvailableRegister(next_use); + reg = FindAvailableRegister(next_use, current); should_spill = (first_use >= next_use[reg]); } @@ -1101,8 +1139,8 @@ void RegisterAllocator::AddSorted(GrowableArray<LiveInterval*>* array, LiveInter } LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t from, size_t to) { - HBasicBlock* block_from = liveness_.GetBlockFromPosition(from); - HBasicBlock* block_to = liveness_.GetBlockFromPosition(to); + HBasicBlock* block_from = liveness_.GetBlockFromPosition(from / 2); + HBasicBlock* block_to = liveness_.GetBlockFromPosition(to / 2); DCHECK(block_from != nullptr); DCHECK(block_to != nullptr); @@ -1111,6 +1149,41 @@ LiveInterval* RegisterAllocator::SplitBetween(LiveInterval* interval, size_t fro return Split(interval, to); } + /* + * Non-linear control flow will force moves at every branch instruction to the new location. + * To avoid having all branches doing the moves, we find the next non-linear position and + * split the interval at this position. Take the following example (block number is the linear + * order position): + * + * B1 + * / \ + * B2 B3 + * \ / + * B4 + * + * B2 needs to split an interval, whose next use is in B4. If we were to split at the + * beginning of B4, B3 would need to do a move between B3 and B4 to ensure the interval + * is now in the correct location. It makes performance worst if the interval is spilled + * and both B2 and B3 need to reload it before entering B4. + * + * By splitting at B3, we give a chance to the register allocator to allocate the + * interval to the same register as in B1, and therefore avoid doing any + * moves in B3. + */ + if (block_from->GetDominator() != nullptr) { + const GrowableArray<HBasicBlock*>& dominated = block_from->GetDominator()->GetDominatedBlocks(); + for (size_t i = 0; i < dominated.Size(); ++i) { + size_t position = dominated.Get(i)->GetLifetimeStart(); + if ((position > from) && (block_to->GetLifetimeStart() > position)) { + // Even if we found a better block, we continue iterating in case + // a dominated block is closer. + // Note that dominated blocks are not sorted in liveness order. + block_to = dominated.Get(i); + DCHECK_NE(block_to, block_from); + } + } + } + // If `to` is in a loop, find the outermost loop header which does not contain `from`. for (HLoopInformationOutwardIterator it(*block_to); !it.Done(); it.Advance()) { HBasicBlock* header = it.Current()->GetHeader(); @@ -1455,6 +1528,7 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { : Location::StackSlot(interval->GetParent()->GetSpillSlot())); } UsePosition* use = current->GetFirstUse(); + UsePosition* env_use = current->GetFirstEnvironmentUse(); // Walk over all siblings, updating locations of use positions, and // connecting them when they are adjacent. @@ -1467,15 +1541,14 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { while (use != nullptr && use->GetPosition() < range->GetStart()) { - DCHECK(use->GetIsEnvironment()); + DCHECK(use->IsSynthesized()); use = use->GetNext(); } while (use != nullptr && use->GetPosition() <= range->GetEnd()) { + DCHECK(!use->GetIsEnvironment()); DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - LocationSummary* locations = use->GetUser()->GetLocations(); - if (use->GetIsEnvironment()) { - locations->SetEnvironmentAt(use->GetInputIndex(), source); - } else { + if (!use->IsSynthesized()) { + LocationSummary* locations = use->GetUser()->GetLocations(); Location expected_location = locations->InAt(use->GetInputIndex()); // The expected (actual) location may be invalid in case the input is unused. Currently // this only happens for intrinsics. @@ -1492,6 +1565,20 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } use = use->GetNext(); } + + // Walk over the environment uses, and update their locations. + while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) { + env_use = env_use->GetNext(); + } + + while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { + DCHECK(current->CoversSlow(env_use->GetPosition()) + || (env_use->GetPosition() == range->GetEnd())); + HEnvironment* environment = env_use->GetUser()->GetEnvironment(); + environment->SetLocationAt(env_use->GetInputIndex(), source); + env_use = env_use->GetNext(); + } + range = range->GetNext(); } @@ -1554,10 +1641,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { } while (current != nullptr); if (kIsDebugBuild) { - // Following uses can only be environment uses. The location for - // these environments will be none. + // Following uses can only be synthesized uses. while (use != nullptr) { - DCHECK(use->GetIsEnvironment()); + DCHECK(use->IsSynthesized()); use = use->GetNext(); } } diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index dc9c708eea..6d5bfc3f0d 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -140,7 +140,8 @@ class RegisterAllocator { void DumpInterval(std::ostream& stream, LiveInterval* interval) const; void DumpAllIntervals(std::ostream& stream) const; int FindAvailableRegisterPair(size_t* next_use, size_t starting_at) const; - int FindAvailableRegister(size_t* next_use) const; + int FindAvailableRegister(size_t* next_use, LiveInterval* current) const; + bool IsCallerSaveRegister(int reg) const; // Try splitting an active non-pair or unaligned pair interval at the given `position`. // Returns whether it was successful at finding such an interval. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 8c6d904a4c..b72ffb8bf7 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -38,7 +38,7 @@ namespace art { static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -60,7 +60,7 @@ static bool Check(const uint16_t* data) { TEST(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); @@ -255,7 +255,7 @@ TEST(RegisterAllocatorTest, Loop2) { } static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); @@ -463,7 +463,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HPhi** phi, HInstruction** input1, HInstruction** input2) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -593,7 +593,7 @@ TEST(RegisterAllocatorTest, PhiHint) { static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -661,7 +661,7 @@ TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { static HGraph* BuildTwoSubs(ArenaAllocator* allocator, HInstruction** first_sub, HInstruction** second_sub) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -731,7 +731,7 @@ TEST(RegisterAllocatorTest, SameAsFirstInputHint) { static HGraph* BuildDiv(ArenaAllocator* allocator, HInstruction** div) { - HGraph* graph = new (allocator) HGraph(allocator); + HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -783,7 +783,7 @@ TEST(RegisterAllocatorTest, SpillInactive) { // Create a synthesized graph to please the register_allocator and // ssa_liveness_analysis code. ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 7a252af2ad..59a2852735 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -332,7 +332,7 @@ void SsaBuilder::BuildSsa() { } HInstruction* SsaBuilder::ValueOfLocal(HBasicBlock* block, size_t local) { - return GetLocalsFor(block)->GetInstructionAt(local); + return GetLocalsFor(block)->Get(local); } void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { @@ -349,7 +349,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid); block->AddPhi(phi); - current_locals_->SetRawEnvAt(local, phi); + current_locals_->Put(local, phi); } } // Save the loop header so that the last phase of the analysis knows which @@ -389,7 +389,7 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { block->AddPhi(phi); value = phi; } - current_locals_->SetRawEnvAt(local, value); + current_locals_->Put(local, value); } } @@ -417,6 +417,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { ArenaAllocator* allocator = graph->GetArena(); result = new (allocator) HFloatConstant(bit_cast<float, int32_t>(constant->GetValue())); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + graph->CacheFloatConstant(result); } else { // If there is already a constant with the expected type, we know it is // the floating point equivalent of this constant. @@ -439,6 +440,7 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { ArenaAllocator* allocator = graph->GetArena(); result = new (allocator) HDoubleConstant(bit_cast<double, int64_t>(constant->GetValue())); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); + graph->CacheDoubleConstant(result); } else { // If there is already a constant with the expected type, we know it is // the floating point equivalent of this constant. @@ -518,7 +520,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { - HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); + HInstruction* value = current_locals_->Get(load->GetLocal()->GetRegNumber()); // If the operation requests a specific type, we make sure its input is of that type. if (load->GetType() != value->GetType()) { if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { @@ -532,7 +534,7 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { } void SsaBuilder::VisitStoreLocal(HStoreLocal* store) { - current_locals_->SetRawEnvAt(store->GetLocal()->GetRegNumber(), store->InputAt(1)); + current_locals_->Put(store->GetLocal()->GetRegNumber(), store->InputAt(1)); store->GetBlock()->RemoveInstruction(store); } @@ -541,8 +543,12 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) { return; } HEnvironment* environment = new (GetGraph()->GetArena()) HEnvironment( - GetGraph()->GetArena(), current_locals_->Size()); - environment->CopyFrom(current_locals_); + GetGraph()->GetArena(), + current_locals_->Size(), + GetGraph()->GetDexFile(), + GetGraph()->GetMethodIdx(), + instruction->GetDexPc()); + environment->CopyFrom(*current_locals_); instruction->SetRawEnvironment(environment); } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 265e95b4ac..1c83c4ba48 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,14 +58,15 @@ class SsaBuilder : public HGraphVisitor { void BuildSsa(); - HEnvironment* GetLocalsFor(HBasicBlock* block) { - HEnvironment* env = locals_for_.Get(block->GetBlockId()); - if (env == nullptr) { - env = new (GetGraph()->GetArena()) HEnvironment( + GrowableArray<HInstruction*>* GetLocalsFor(HBasicBlock* block) { + GrowableArray<HInstruction*>* locals = locals_for_.Get(block->GetBlockId()); + if (locals == nullptr) { + locals = new (GetGraph()->GetArena()) GrowableArray<HInstruction*>( GetGraph()->GetArena(), GetGraph()->GetNumberOfVRegs()); - locals_for_.Put(block->GetBlockId(), env); + locals->SetSize(GetGraph()->GetNumberOfVRegs()); + locals_for_.Put(block->GetBlockId(), locals); } - return env; + return locals; } HInstruction* ValueOfLocal(HBasicBlock* block, size_t local); @@ -93,14 +94,14 @@ class SsaBuilder : public HGraphVisitor { static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); // Locals for the current block being visited. - HEnvironment* current_locals_; + GrowableArray<HInstruction*>* current_locals_; // Keep track of loop headers found. The last phase of the analysis iterates // over these blocks to set the inputs of their phis. GrowableArray<HBasicBlock*> loop_headers_; // HEnvironment for each block. - GrowableArray<HEnvironment*> locals_for_; + GrowableArray<GrowableArray<HInstruction*>*> locals_for_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index ea0e7c3712..250eb04a1c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -75,9 +75,7 @@ void SsaLivenessAnalysis::LinearizeGraph() { HBasicBlock* block = it.Current(); size_t number_of_forward_predecessors = block->GetPredecessors().Size(); if (block->IsLoopHeader()) { - // We rely on having simplified the CFG. - DCHECK_EQ(1u, block->GetLoopInformation()->NumberOfBackEdges()); - number_of_forward_predecessors--; + number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges(); } forward_predecessors.Put(block->GetBlockId(), number_of_forward_predecessors); } @@ -220,10 +218,11 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // Process the environment first, because we know their uses come after // or at the same liveness position of inputs. - if (current->HasEnvironment()) { + for (HEnvironment* environment = current->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { // Handle environment uses. See statements (b) and (c) of the // SsaLivenessAnalysis. - HEnvironment* environment = current->GetEnvironment(); for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* instruction = environment->GetInstructionAt(i); bool should_be_live = ShouldBeLiveForEnvironment(instruction); @@ -233,7 +232,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (instruction != nullptr) { instruction->GetLiveInterval()->AddUse( - current, i, /* is_environment */ true, should_be_live); + current, environment, i, should_be_live); } } } @@ -245,7 +244,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { // to be materialized. if (input->HasSsaIndex()) { live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, i, /* is_environment */ false); + input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); } } } @@ -264,13 +263,12 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (block->IsLoopHeader()) { - HBasicBlock* back_edge = block->GetLoopInformation()->GetBackEdges().Get(0); + size_t last_position = block->GetLoopInformation()->GetLifetimeEnd(); // For all live_in instructions at the loop header, we need to create a range // that covers the full loop. for (uint32_t idx : live_in->Indexes()) { HInstruction* current = instructions_from_ssa_index_.Get(idx); - current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), - back_edge->GetLifetimeEnd()); + current->GetLiveInterval()->AddLoopRange(block->GetLifetimeStart(), last_position); } } } @@ -322,7 +320,8 @@ static int RegisterOrLowRegister(Location location) { return location.IsPair() ? location.low() : location.reg(); } -int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { +int LiveInterval::FindFirstRegisterHint(size_t* free_until, + const SsaLivenessAnalysis& liveness) const { DCHECK(!IsHighInterval()); if (IsTemp()) return kNoRegister; @@ -336,12 +335,32 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { } } + if (IsSplit() && liveness.IsAtBlockBoundary(GetStart() / 2)) { + // If the start of this interval is at a block boundary, we look at the + // location of the interval in blocks preceding the block this interval + // starts at. If one location is a register we return it as a hint. This + // will avoid a move between the two blocks. + HBasicBlock* block = liveness.GetBlockFromPosition(GetStart() / 2); + for (size_t i = 0; i < block->GetPredecessors().Size(); ++i) { + size_t position = block->GetPredecessors().Get(i)->GetLifetimeEnd() - 1; + // We know positions above GetStart() do not have a location yet. + if (position < GetStart()) { + LiveInterval* existing = GetParent()->GetSiblingAt(position); + if (existing != nullptr + && existing->HasRegister() + && (free_until[existing->GetRegister()] > GetStart())) { + return existing->GetRegister(); + } + } + } + } + UsePosition* use = first_use_; size_t start = GetStart(); size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position >= start && !use->GetIsEnvironment()) { + if (use_position >= start && !use->IsSynthesized()) { HInstruction* user = use->GetUser(); size_t input_index = use->GetInputIndex(); if (user->IsPhi()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 97254edb5e..4b19c5b46a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -23,6 +23,7 @@ namespace art { class CodeGenerator; +class SsaLivenessAnalysis; static constexpr int kNoRegister = -1; @@ -75,7 +76,7 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { } void Dump(std::ostream& stream) const { - stream << "[" << start_ << ", " << end_ << ")"; + stream << "[" << start_ << "," << end_ << ")"; } LiveRange* Dup(ArenaAllocator* allocator) const { @@ -103,21 +104,24 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocMisc> { class UsePosition : public ArenaObject<kArenaAllocMisc> { public: UsePosition(HInstruction* user, + HEnvironment* environment, size_t input_index, - bool is_environment, size_t position, UsePosition* next) : user_(user), + environment_(environment), input_index_(input_index), - is_environment_(is_environment), position_(position), next_(next) { - DCHECK(user->IsPhi() + DCHECK((user == nullptr) + || user->IsPhi() || (GetPosition() == user->GetLifetimePosition() + 1) || (GetPosition() == user->GetLifetimePosition())); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } + static constexpr size_t kNoInput = -1; + size_t GetPosition() const { return position_; } UsePosition* GetNext() const { return next_; } @@ -125,27 +129,38 @@ class UsePosition : public ArenaObject<kArenaAllocMisc> { HInstruction* GetUser() const { return user_; } - bool GetIsEnvironment() const { return is_environment_; } + bool GetIsEnvironment() const { return environment_ != nullptr; } + bool IsSynthesized() const { return user_ == nullptr; } size_t GetInputIndex() const { return input_index_; } void Dump(std::ostream& stream) const { stream << position_; - if (is_environment_) { - stream << " (env)"; - } + } + + HLoopInformation* GetLoopInformation() const { + return user_->GetBlock()->GetLoopInformation(); } UsePosition* Dup(ArenaAllocator* allocator) const { return new (allocator) UsePosition( - user_, input_index_, is_environment_, position_, + user_, environment_, input_index_, position_, next_ == nullptr ? nullptr : next_->Dup(allocator)); } + bool RequiresRegister() const { + if (GetIsEnvironment()) return false; + if (IsSynthesized()) return false; + Location location = GetUser()->GetLocations()->InAt(GetInputIndex()); + return location.IsUnallocated() + && (location.GetPolicy() == Location::kRequiresRegister + || location.GetPolicy() == Location::kRequiresFpuRegister); + } + private: HInstruction* const user_; + HEnvironment* const environment_; const size_t input_index_; - const bool is_environment_; const size_t position_; UsePosition* next_; @@ -219,17 +234,19 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddTempUse(HInstruction* instruction, size_t temp_index) { DCHECK(IsTemp()); DCHECK(first_use_ == nullptr) << "A temporary can only have one user"; + DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user"; size_t position = instruction->GetLifetimePosition(); first_use_ = new (allocator_) UsePosition( - instruction, temp_index, /* is_environment */ false, position, first_use_); + instruction, /* environment */ nullptr, temp_index, position, first_use_); AddRange(position, position + 1); } void AddUse(HInstruction* instruction, + HEnvironment* environment, size_t input_index, - bool is_environment, bool keep_alive = false) { // Set the use within the instruction. + bool is_environment = (environment != nullptr); size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); if (!is_environment) { @@ -239,9 +256,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // location of the input just before that instruction (and not potential moves due // to splitting). position = instruction->GetLifetimePosition(); + } else if (!locations->InAt(input_index).IsValid()) { + return; } } + if (!is_environment && instruction->IsInLoop()) { + AddBackEdgeUses(*instruction->GetBlock()); + } + DCHECK(position == instruction->GetLifetimePosition() || position == instruction->GetLifetimePosition() + 1); @@ -257,7 +280,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { } DCHECK(first_use_->GetPosition() + 1 == position); UsePosition* new_use = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, cursor->GetNext()); + instruction, environment, input_index, position, cursor->GetNext()); cursor->SetNext(new_use); if (first_range_->GetEnd() == first_use_->GetPosition()) { first_range_->end_ = position; @@ -265,8 +288,13 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return; } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, is_environment, position, first_use_); + if (is_environment) { + first_env_use_ = new (allocator_) UsePosition( + instruction, environment, input_index, position, first_env_use_); + } else { + first_use_ = new (allocator_) UsePosition( + instruction, environment, input_index, position, first_use_); + } if (is_environment && !keep_alive) { // If this environment use does not keep the instruction live, it does not @@ -300,8 +328,11 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { void AddPhiUse(HInstruction* instruction, size_t input_index, HBasicBlock* block) { DCHECK(instruction->IsPhi()); + if (block->IsInLoop()) { + AddBackEdgeUses(*block); + } first_use_ = new (allocator_) UsePosition( - instruction, input_index, false, block->GetLifetimeEnd(), first_use_); + instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_); } void AddRange(size_t start, size_t end) { @@ -450,38 +481,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (is_temp_) { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - LocationSummary* locations = defined_by_->GetLocations(); - Location location = locations->Out(); - // This interval is the first interval of the instruction. If the output - // of the instruction requires a register, we return the position of that instruction - // as the first register use. - if (location.IsUnallocated()) { - if ((location.GetPolicy() == Location::kRequiresRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && (locations->InAt(0).IsRegister() - || locations->InAt(0).IsRegisterPair() - || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { - return position; - } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) - || (location.GetPolicy() == Location::kSameAsFirstInput - && locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister)) { - return position; - } - } else if (location.IsRegister() || location.IsRegisterPair()) { - return position; - } + + if (IsDefiningPosition(position) && DefinitionRequiresRegister()) { + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { size_t use_position = use->GetPosition(); - if (use_position > position && !use->GetIsEnvironment()) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - if (location.IsUnallocated() - && (location.GetPolicy() == Location::kRequiresRegister - || location.GetPolicy() == Location::kRequiresFpuRegister)) { + if (use_position > position) { + if (use->RequiresRegister()) { return use_position; } } @@ -499,21 +509,17 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return position == GetStart() ? position : kNoLifetime; } - if (position == GetStart() && IsParent()) { - if (defined_by_->GetLocations()->Out().IsValid()) { - return position; - } + if (IsDefiningPosition(position)) { + DCHECK(defined_by_->GetLocations()->Out().IsValid()); + return position; } UsePosition* use = first_use_; size_t end = GetEnd(); while (use != nullptr && use->GetPosition() <= end) { - if (!use->GetIsEnvironment()) { - Location location = use->GetUser()->GetLocations()->InAt(use->GetInputIndex()); - size_t use_position = use->GetPosition(); - if (use_position > position && location.IsValid()) { - return use_position; - } + size_t use_position = use->GetPosition(); + if (use_position > position) { + return use_position; } use = use->GetNext(); } @@ -524,6 +530,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return first_use_; } + UsePosition* GetFirstEnvironmentUse() const { + return first_env_use_; + } + Primitive::Type GetType() const { return type_; } @@ -532,6 +542,15 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return defined_by_; } + bool HasWillCallSafepoint() const { + for (SafepointPosition* safepoint = first_safepoint_; + safepoint != nullptr; + safepoint = safepoint->GetNext()) { + if (safepoint->GetLocations()->WillCall()) return true; + } + return false; + } + SafepointPosition* FindSafepointJustBefore(size_t position) const { for (SafepointPosition* safepoint = first_safepoint_, *previous = nullptr; safepoint != nullptr; @@ -577,6 +596,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { new_interval->parent_ = parent_; new_interval->first_use_ = first_use_; + new_interval->first_env_use_ = first_env_use_; LiveRange* current = first_range_; LiveRange* previous = nullptr; // Iterate over the ranges, and either find a range that covers this position, or @@ -655,10 +675,18 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { stream << " "; } while ((use = use->GetNext()) != nullptr); } + stream << "}, { "; + use = first_env_use_; + if (use != nullptr) { + do { + use->Dump(stream); + stream << " "; + } while ((use = use->GetNext()) != nullptr); + } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); - stream << " is_high: " << IsHighInterval(); stream << " is_low: " << IsLowInterval(); + stream << " is_high: " << IsHighInterval(); } LiveInterval* GetNextSibling() const { return next_sibling_; } @@ -673,7 +701,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Returns the first register hint that is at least free before // the value contained in `free_until`. If none is found, returns // `kNoRegister`. - int FindFirstRegisterHint(size_t* free_until) const; + int FindFirstRegisterHint(size_t* free_until, const SsaLivenessAnalysis& liveness) const; // If there is enough at the definition site to find a register (for example // it uses the same input as the first input), returns the register as a hint. @@ -754,6 +782,10 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { if (first_use_ != nullptr) { high_or_low_interval_->first_use_ = first_use_->Dup(allocator_); } + + if (first_env_use_ != nullptr) { + high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_); + } } // Returns whether an interval, when it is non-split, is using @@ -851,6 +883,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { first_safepoint_(nullptr), last_safepoint_(nullptr), first_use_(nullptr), + first_env_use_(nullptr), type_(type), next_sibling_(nullptr), parent_(this), @@ -888,6 +921,107 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { return range; } + bool DefinitionRequiresRegister() const { + DCHECK(IsParent()); + LocationSummary* locations = defined_by_->GetLocations(); + Location location = locations->Out(); + // This interval is the first interval of the instruction. If the output + // of the instruction requires a register, we return the position of that instruction + // as the first register use. + if (location.IsUnallocated()) { + if ((location.GetPolicy() == Location::kRequiresRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsRegister() + || locations->InAt(0).IsRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresRegister))) { + return true; + } else if ((location.GetPolicy() == Location::kRequiresFpuRegister) + || (location.GetPolicy() == Location::kSameAsFirstInput + && (locations->InAt(0).IsFpuRegister() + || locations->InAt(0).IsFpuRegisterPair() + || locations->InAt(0).GetPolicy() == Location::kRequiresFpuRegister))) { + return true; + } + } else if (location.IsRegister() || location.IsRegisterPair()) { + return true; + } + return false; + } + + bool IsDefiningPosition(size_t position) const { + return IsParent() && (position == GetStart()); + } + + bool HasSynthesizeUseAt(size_t position) const { + UsePosition* use = first_use_; + while (use != nullptr) { + size_t use_position = use->GetPosition(); + if ((use_position == position) && use->IsSynthesized()) { + return true; + } + if (use_position > position) break; + use = use->GetNext(); + } + return false; + } + + void AddBackEdgeUses(const HBasicBlock& block_at_use) { + DCHECK(block_at_use.IsInLoop()); + // Add synthesized uses at the back edge of loops to help the register allocator. + // Note that this method is called in decreasing liveness order, to faciliate adding + // uses at the head of the `first_use_` linked list. Because below + // we iterate from inner-most to outer-most, which is in increasing liveness order, + // we need to take extra care of how the `first_use_` linked list is being updated. + UsePosition* first_in_new_list = nullptr; + UsePosition* last_in_new_list = nullptr; + for (HLoopInformationOutwardIterator it(block_at_use); + !it.Done(); + it.Advance()) { + HLoopInformation* current = it.Current(); + if (GetDefinedBy()->GetLifetimePosition() >= current->GetHeader()->GetLifetimeStart()) { + // This interval is defined in the loop. We can stop going outward. + break; + } + + // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // all back edges is not necessary: anything used in the loop will have its use at the + // last back edge. If we want branches in a loop to have better register allocation than + // another branch, then it is the linear order we should change. + size_t back_edge_use_position = current->GetLifetimeEnd(); + if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + // There was a use already seen in this loop. Therefore the previous call to `AddUse` + // already inserted the backedge use. We can stop going outward. + DCHECK(HasSynthesizeUseAt(back_edge_use_position)); + break; + } + + DCHECK(last_in_new_list == nullptr + || back_edge_use_position > last_in_new_list->GetPosition()); + + UsePosition* new_use = new (allocator_) UsePosition( + /* user */ nullptr, + /* environment */ nullptr, + UsePosition::kNoInput, + back_edge_use_position, + /* next */ nullptr); + + if (last_in_new_list != nullptr) { + // Going outward. The latest created use needs to point to the new use. + last_in_new_list->SetNext(new_use); + } else { + // This is the inner-most loop. + DCHECK_EQ(current, block_at_use.GetLoopInformation()); + first_in_new_list = new_use; + } + last_in_new_list = new_use; + } + // Link the newly created linked list with `first_use_`. + if (last_in_new_list != nullptr) { + last_in_new_list->SetNext(first_use_); + first_use_ = first_in_new_list; + } + } + ArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test @@ -905,6 +1039,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { // Uses of this interval. Note that this linked list is shared amongst siblings. UsePosition* first_use_; + UsePosition* first_env_use_; // The instruction type this interval corresponds to. const Primitive::Type type_; @@ -999,14 +1134,18 @@ class SsaLivenessAnalysis : public ValueObject { } HBasicBlock* GetBlockFromPosition(size_t index) const { - HInstruction* instruction = GetInstructionFromPosition(index / 2); + HInstruction* instruction = GetInstructionFromPosition(index); if (instruction == nullptr) { // If we are at a block boundary, get the block following. - instruction = GetInstructionFromPosition((index / 2) + 1); + instruction = GetInstructionFromPosition(index + 1); } return instruction->GetBlock(); } + bool IsAtBlockBoundary(size_t index) const { + return GetInstructionFromPosition(index) == nullptr; + } + HInstruction* GetTempUser(LiveInterval* temp) const { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 00c241b85a..fb3e7d798c 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -78,7 +78,7 @@ static void ReNumberInstructions(HGraph* graph) { static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); @@ -373,30 +373,26 @@ TEST(SsaTest, Loop6) { const char* expected = "BasicBlock 0, succ: 1\n" " 0: IntConstant 0 [5]\n" - " 1: IntConstant 4 [14, 8, 8]\n" - " 2: IntConstant 5 [14]\n" + " 1: IntConstant 4 [5, 8, 8]\n" + " 2: IntConstant 5 [5]\n" " 3: Goto\n" "BasicBlock 1, pred: 0, succ: 2\n" " 4: Goto\n" - "BasicBlock 2, pred: 1, 8, succ: 6, 3\n" - " 5: Phi(0, 14) [12, 6, 6]\n" + "BasicBlock 2, pred: 1, 4, 5, succ: 6, 3\n" + " 5: Phi(0, 2, 1) [12, 6, 6]\n" " 6: Equal(5, 5) [7]\n" " 7: If(6)\n" "BasicBlock 3, pred: 2, succ: 5, 4\n" " 8: Equal(1, 1) [9]\n" " 9: If(8)\n" - "BasicBlock 4, pred: 3, succ: 8\n" + "BasicBlock 4, pred: 3, succ: 2\n" " 10: Goto\n" - "BasicBlock 5, pred: 3, succ: 8\n" + "BasicBlock 5, pred: 3, succ: 2\n" " 11: Goto\n" "BasicBlock 6, pred: 2, succ: 7\n" " 12: Return(5)\n" "BasicBlock 7, pred: 6\n" - " 13: Exit\n" - // Synthesized single back edge of loop. - "BasicBlock 8, pred: 5, 4, succ: 2\n" - " 14: Phi(1, 2) [5]\n" - " 15: Goto\n"; + " 13: Exit\n"; const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index fcc86d5163..89035a3683 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -13,34 +13,33 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "stack_map_stream.h" namespace art { -void StackMapStream::AddStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { - StackMapEntry entry; - entry.dex_pc = dex_pc; - entry.native_pc_offset = native_pc_offset; - entry.register_mask = register_mask; - entry.sp_mask = sp_mask; - entry.num_dex_registers = num_dex_registers; - entry.inlining_depth = inlining_depth; - entry.dex_register_locations_start_index = dex_register_locations_.Size(); - entry.inline_infos_start_index = inline_infos_.Size(); - entry.dex_register_map_hash = 0; +void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + current_entry_.dex_pc = dex_pc; + current_entry_.native_pc_offset = native_pc_offset; + current_entry_.register_mask = register_mask; + current_entry_.sp_mask = sp_mask; + current_entry_.num_dex_registers = num_dex_registers; + current_entry_.inlining_depth = inlining_depth; + current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_map_hash = 0; + current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; if (num_dex_registers != 0) { - entry.live_dex_registers_mask = + current_entry_.live_dex_registers_mask = new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); } else { - entry.live_dex_registers_mask = nullptr; + current_entry_.live_dex_registers_mask = nullptr; } - stack_maps_.Add(entry); if (sp_mask != nullptr) { stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); @@ -52,14 +51,16 @@ void StackMapStream::AddStackMapEntry(uint32_t dex_pc, dex_pc_max_ = std::max(dex_pc_max_, dex_pc); native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); register_mask_max_ = std::max(register_mask_max_, register_mask); + current_dex_register_ = 0; } -void StackMapStream::AddDexRegisterEntry(uint16_t dex_register, - DexRegisterLocation::Kind kind, - int32_t value) { - StackMapEntry entry = stack_maps_.Get(stack_maps_.Size() - 1); - DCHECK_LT(dex_register, entry.num_dex_registers); +void StackMapStream::EndStackMapEntry() { + current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); + stack_maps_.Add(current_entry_); + current_entry_ = StackMapEntry(); +} +void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { if (kind != DexRegisterLocation::Kind::kNone) { // Ensure we only use non-compressed location kind at this stage. DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) @@ -82,44 +83,76 @@ void StackMapStream::AddDexRegisterEntry(uint16_t dex_register, location_catalog_entries_indices_.Insert(std::make_pair(location, index)); } - entry.live_dex_registers_mask->SetBit(dex_register); - entry.dex_register_map_hash += - (1 << (dex_register % (sizeof(entry.dex_register_map_hash) * kBitsPerByte))); - entry.dex_register_map_hash += static_cast<uint32_t>(value); - entry.dex_register_map_hash += static_cast<uint32_t>(kind); - stack_maps_.Put(stack_maps_.Size() - 1, entry); + if (in_inline_frame_) { + // TODO: Support sharing DexRegisterMap across InlineInfo. + DCHECK_LT(current_dex_register_, current_inline_info_.num_dex_registers); + current_inline_info_.live_dex_registers_mask->SetBit(current_dex_register_); + } else { + DCHECK_LT(current_dex_register_, current_entry_.num_dex_registers); + current_entry_.live_dex_registers_mask->SetBit(current_dex_register_); + current_entry_.dex_register_map_hash += (1 << + (current_dex_register_ % (sizeof(current_entry_.dex_register_map_hash) * kBitsPerByte))); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(value); + current_entry_.dex_register_map_hash += static_cast<uint32_t>(kind); + } } + current_dex_register_++; } -void StackMapStream::AddInlineInfoEntry(uint32_t method_index) { - InlineInfoEntry entry; - entry.method_index = method_index; - inline_infos_.Add(entry); +void StackMapStream::BeginInlineInfoEntry(uint32_t method_index, + uint32_t dex_pc, + uint32_t num_dex_registers) { + DCHECK(!in_inline_frame_); + in_inline_frame_ = true; + current_inline_info_.method_index = method_index; + current_inline_info_.dex_pc = dex_pc; + current_inline_info_.num_dex_registers = num_dex_registers; + current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size(); + if (num_dex_registers != 0) { + current_inline_info_.live_dex_registers_mask = + new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); + } else { + current_inline_info_.live_dex_registers_mask = nullptr; + } + current_dex_register_ = 0; } -size_t StackMapStream::ComputeNeededSize() { - size_t size = CodeInfo::kFixedSize - + ComputeDexRegisterLocationCatalogSize() - + ComputeStackMapsSize() - + ComputeDexRegisterMapsSize() - + ComputeInlineInfoSize(); - // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. - return size; +void StackMapStream::EndInlineInfoEntry() { + DCHECK(in_inline_frame_); + DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers) + << "Inline information contains less registers than expected"; + in_inline_frame_ = false; + inline_infos_.Add(current_inline_info_); + current_inline_info_ = InlineInfoEntry(); } -size_t StackMapStream::ComputeStackMaskSize() const { - int number_of_bits = stack_mask_max_ + 1; // Need room for max element too. - return RoundUp(number_of_bits, kBitsPerByte) / kBitsPerByte; -} +size_t StackMapStream::PrepareForFillIn() { + int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. + stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; + inline_info_size_ = ComputeInlineInfoSize(); + dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + stack_maps_size_ = stack_maps_.Size() + * StackMap::ComputeStackMapSize(stack_mask_size_, + inline_info_size_, + dex_register_maps_size_, + dex_pc_max_, + native_pc_offset_max_, + register_mask_max_); + dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); -size_t StackMapStream::ComputeStackMapsSize() { - return stack_maps_.Size() * StackMap::ComputeStackMapSize( - ComputeStackMaskSize(), - ComputeInlineInfoSize(), - ComputeDexRegisterMapsSize(), - dex_pc_max_, - native_pc_offset_max_, - register_mask_max_); + // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. + needed_size_ = CodeInfo::kFixedSize + + dex_register_location_catalog_size_ + + stack_maps_size_ + + dex_register_maps_size_ + + inline_info_size_; + + dex_register_location_catalog_start_ = CodeInfo::kFixedSize; + stack_maps_start_ = dex_register_location_catalog_start_ + dex_register_location_catalog_size_; + dex_register_maps_start_ = stack_maps_start_ + stack_maps_size_; + inline_infos_start_ = dex_register_maps_start_ + dex_register_maps_size_; + + return needed_size_; } size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { @@ -134,17 +167,18 @@ size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { return size; } -size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) const { +size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, + const BitVector& live_dex_registers_mask) const { // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; // Add the live bit mask for the Dex register liveness. - size += DexRegisterMap::GetLiveBitMaskSize(entry.num_dex_registers); + size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); // Compute the size of the set of live Dex register entries. size_t number_of_live_dex_registers = 0; for (size_t dex_register_number = 0; - dex_register_number < entry.num_dex_registers; + dex_register_number < num_dex_registers; ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { + if (live_dex_registers_mask.IsBitSet(dex_register_number)) { ++number_of_live_dex_registers; } } @@ -157,12 +191,20 @@ size_t StackMapStream::ComputeDexRegisterMapSize(const StackMapEntry& entry) con return size; } -size_t StackMapStream::ComputeDexRegisterMapsSize() { +size_t StackMapStream::ComputeDexRegisterMapsSize() const { size_t size = 0; + size_t inline_info_index = 0; for (size_t i = 0; i < stack_maps_.Size(); ++i) { - if (FindEntryWithTheSameDexMap(i) == kNoSameDexMapFound) { + StackMapEntry entry = stack_maps_.Get(i); + if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { + size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask); + } else { // Entries with the same dex map will have the same offset. - size += ComputeDexRegisterMapSize(stack_maps_.Get(i)); + } + for (size_t j = 0; j < entry.inlining_depth; ++j) { + InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++); + size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers, + *inline_entry.live_dex_registers_mask); } } return size; @@ -174,55 +216,33 @@ size_t StackMapStream::ComputeInlineInfoSize() const { + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } -size_t StackMapStream::ComputeDexRegisterLocationCatalogStart() const { - return CodeInfo::kFixedSize; -} - -size_t StackMapStream::ComputeStackMapsStart() const { - return ComputeDexRegisterLocationCatalogStart() + ComputeDexRegisterLocationCatalogSize(); -} - -size_t StackMapStream::ComputeDexRegisterMapsStart() { - return ComputeStackMapsStart() + ComputeStackMapsSize(); -} - -size_t StackMapStream::ComputeInlineInfoStart() { - return ComputeDexRegisterMapsStart() + ComputeDexRegisterMapsSize(); -} - void StackMapStream::FillIn(MemoryRegion region) { + DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; + DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn"; + CodeInfo code_info(region); - DCHECK_EQ(region.size(), ComputeNeededSize()); + DCHECK_EQ(region.size(), needed_size_); code_info.SetOverallSize(region.size()); - size_t stack_mask_size = ComputeStackMaskSize(); - - size_t dex_register_map_size = ComputeDexRegisterMapsSize(); - size_t inline_info_size = ComputeInlineInfoSize(); - MemoryRegion dex_register_locations_region = region.Subregion( - ComputeDexRegisterMapsStart(), - dex_register_map_size); + dex_register_maps_start_, dex_register_maps_size_); MemoryRegion inline_infos_region = region.Subregion( - ComputeInlineInfoStart(), - inline_info_size); + inline_infos_start_, inline_info_size_); - code_info.SetEncoding(inline_info_size, - dex_register_map_size, + code_info.SetEncoding(inline_info_size_, + dex_register_maps_size_, dex_pc_max_, native_pc_offset_max_, register_mask_max_); code_info.SetNumberOfStackMaps(stack_maps_.Size()); - code_info.SetStackMaskSize(stack_mask_size); - DCHECK_EQ(code_info.GetStackMapsSize(), ComputeStackMapsSize()); + code_info.SetStackMaskSize(stack_mask_size_); + DCHECK_EQ(code_info.GetStackMapsSize(), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfDexRegisterLocationCatalogEntries( - location_catalog_entries_.Size()); + code_info.SetNumberOfDexRegisterLocationCatalogEntries(location_catalog_entries_.Size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( - ComputeDexRegisterLocationCatalogStart(), - ComputeDexRegisterLocationCatalogSize()); + dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); // Offset in `dex_register_location_catalog` where to store the next // register location. @@ -253,41 +273,26 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetDexRegisterMapOffset(code_info, StackMap::kNoDexRegisterMap); } else { // Search for an entry with the same dex map. - size_t entry_with_same_map = FindEntryWithTheSameDexMap(i); - if (entry_with_same_map != kNoSameDexMapFound) { + if (entry.same_dex_register_map_as_ != kNoSameDexMapFound) { // If we have a hit reuse the offset. stack_map.SetDexRegisterMapOffset(code_info, - code_info.GetStackMapAt(entry_with_same_map).GetDexRegisterMapOffset(code_info)); + code_info.GetStackMapAt(entry.same_dex_register_map_as_) + .GetDexRegisterMapOffset(code_info)); } else { // New dex registers maps should be added to the stack map. - MemoryRegion register_region = - dex_register_locations_region.Subregion( - next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry)); + MemoryRegion register_region = dex_register_locations_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( code_info, register_region.start() - dex_register_locations_region.start()); - // Set the live bit mask. - dex_register_map.SetLiveBitMask(entry.num_dex_registers, *entry.live_dex_registers_mask); - - // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < entry.num_dex_registers; - ++dex_register_number) { - if (entry.live_dex_registers_mask->IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = - dex_register_locations_.Get(entry.dex_register_locations_start_index - + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - entry.num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } - } + // Set the dex register location. + FillInDexRegisterMap(dex_register_map, + entry.num_dex_registers, + *entry.live_dex_registers_mask, + entry.dex_register_locations_start_index); } } @@ -304,54 +309,81 @@ void StackMapStream::FillIn(MemoryRegion region) { code_info, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); - for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(j + entry.inline_infos_start_index); - inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); + for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { + InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index); + inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index); + inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc); + if (inline_entry.num_dex_registers == 0) { + // No dex map available. + inline_info.SetDexRegisterMapOffsetAtDepth(depth, StackMap::kNoDexRegisterMap); + DCHECK(inline_entry.live_dex_registers_mask == nullptr); + } else { + MemoryRegion register_region = dex_register_locations_region.Subregion( + next_dex_register_map_offset, + ComputeDexRegisterMapSize(inline_entry.num_dex_registers, + *inline_entry.live_dex_registers_mask)); + next_dex_register_map_offset += register_region.size(); + DexRegisterMap dex_register_map(register_region); + inline_info.SetDexRegisterMapOffsetAtDepth( + depth, register_region.start() - dex_register_locations_region.start()); + + FillInDexRegisterMap(dex_register_map, + inline_entry.num_dex_registers, + *inline_entry.live_dex_registers_mask, + inline_entry.dex_register_locations_start_index); + } } } else { - if (inline_info_size != 0) { + if (inline_info_size_ != 0) { stack_map.SetInlineDescriptorOffset(code_info, StackMap::kNoInlineInfo); } } } } -size_t StackMapStream::FindEntryWithTheSameDexMap(size_t entry_index) { - StackMapEntry entry = stack_maps_.Get(entry_index); - auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.dex_register_map_hash); +void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, + uint32_t num_dex_registers, + const BitVector& live_dex_registers_mask, + uint32_t start_index_in_dex_register_locations) const { + dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); + // Set the dex register location mapping data. + for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; + dex_register_number < num_dex_registers; + ++dex_register_number) { + if (live_dex_registers_mask.IsBitSet(dex_register_number)) { + size_t location_catalog_entry_index = dex_register_locations_.Get( + start_index_in_dex_register_locations + index_in_dex_register_locations); + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + num_dex_registers, + location_catalog_entries_.Size()); + ++index_in_dex_register_locations; + } + } +} + +size_t StackMapStream::FindEntryWithTheSameDexMap() { + size_t current_entry_index = stack_maps_.Size(); + auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(entry_index); - dex_map_hash_to_stack_map_indices_.Put(entry.dex_register_map_hash, stack_map_indices); + stack_map_indices.Add(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); return kNoSameDexMapFound; } - // TODO: We don't need to add ourselves to the map if we can guarantee that - // FindEntryWithTheSameDexMap is called just once per stack map entry. - // A good way to do this is to cache the offset in the stack map entry. This - // is easier to do if we add markers when the stack map constructions begins - // and when it ends. - - // We might have collisions, so we need to check whether or not we should - // add the entry to the map. `needs_to_be_added` keeps track of this. - bool needs_to_be_added = true; - size_t result = kNoSameDexMapFound; + // We might have collisions, so we need to check whether or not we really have a match. for (size_t i = 0; i < entries_it->second.Size(); i++) { size_t test_entry_index = entries_it->second.Get(i); - if (test_entry_index == entry_index) { - needs_to_be_added = false; - } else if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), entry)) { - result = test_entry_index; - needs_to_be_added = false; - break; + if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + return test_entry_index; } } - if (needs_to_be_added) { - entries_it->second.Add(entry_index); - } - return result; + entries_it->second.Add(current_entry_index); + return kNoSameDexMapFound; } bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const { diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 990e682216..4c03f9f90a 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -70,7 +70,21 @@ class StackMapStream : public ValueObject { native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()) {} + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + current_entry_(), + current_inline_info_(), + stack_mask_size_(0), + inline_info_size_(0), + dex_register_maps_size_(0), + stack_maps_size_(0), + dex_register_location_catalog_size_(0), + dex_register_location_catalog_start_(0), + stack_maps_start_(0), + dex_register_maps_start_(0), + inline_infos_start_(0), + needed_size_(0), + current_dex_register_(0), + in_inline_frame_(false) {} // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -84,45 +98,52 @@ class StackMapStream : public ValueObject { size_t inline_infos_start_index; BitVector* live_dex_registers_mask; uint32_t dex_register_map_hash; + size_t same_dex_register_map_as_; }; struct InlineInfoEntry { + uint32_t dex_pc; uint32_t method_index; + uint32_t num_dex_registers; + BitVector* live_dex_registers_mask; + size_t dex_register_locations_start_index; }; - void AddStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth); - - void AddDexRegisterEntry(uint16_t dex_register, - DexRegisterLocation::Kind kind, - int32_t value); - - void AddInlineInfoEntry(uint32_t method_index); + void BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* sp_mask, + uint32_t num_dex_registers, + uint8_t inlining_depth); + void EndStackMapEntry(); - size_t ComputeNeededSize(); - size_t ComputeStackMaskSize() const; - size_t ComputeStackMapsSize(); - size_t ComputeDexRegisterLocationCatalogSize() const; - size_t ComputeDexRegisterMapSize(const StackMapEntry& entry) const; - size_t ComputeDexRegisterMapsSize(); - size_t ComputeInlineInfoSize() const; + void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value); - size_t ComputeDexRegisterLocationCatalogStart() const; - size_t ComputeStackMapsStart() const; - size_t ComputeDexRegisterMapsStart(); - size_t ComputeInlineInfoStart(); + void BeginInlineInfoEntry(uint32_t method_index, + uint32_t dex_pc, + uint32_t num_dex_registers); + void EndInlineInfoEntry(); + // Prepares the stream to fill in a memory region. Must be called before FillIn. + // Returns the size (in bytes) needed to store this stream. + size_t PrepareForFillIn(); void FillIn(MemoryRegion region); private: - // Returns the index of an entry with the same dex register map + size_t ComputeDexRegisterLocationCatalogSize() const; + size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers, + const BitVector& live_dex_registers_mask) const; + size_t ComputeDexRegisterMapsSize() const; + size_t ComputeInlineInfoSize() const; + + // Returns the index of an entry with the same dex register map as the current_entry, // or kNoSameDexMapFound if no such entry exists. - size_t FindEntryWithTheSameDexMap(size_t entry_index); + size_t FindEntryWithTheSameDexMap(); bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const; + void FillInDexRegisterMap(DexRegisterMap dex_register_map, + uint32_t num_dex_registers, + const BitVector& live_dex_registers_mask, + uint32_t start_index_in_dex_register_locations) const; ArenaAllocator* allocator_; GrowableArray<StackMapEntry> stack_maps_; @@ -146,6 +167,21 @@ class StackMapStream : public ValueObject { ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + StackMapEntry current_entry_; + InlineInfoEntry current_inline_info_; + size_t stack_mask_size_; + size_t inline_info_size_; + size_t dex_register_maps_size_; + size_t stack_maps_size_; + size_t dex_register_location_catalog_size_; + size_t dex_register_location_catalog_start_; + size_t stack_maps_start_; + size_t dex_register_maps_start_; + size_t inline_infos_start_; + size_t needed_size_; + uint32_t current_dex_register_; + bool in_inline_frame_; + static constexpr uint32_t kNoSameDexMapFound = -1; DISALLOW_COPY_AND_ASSIGN(StackMapStream); diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 8d160bc81e..e04fa98887 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -40,11 +40,12 @@ TEST(StackMapTest, Test1) { ArenaBitVector sp_mask(&arena, 0, false); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Short location. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -123,20 +124,25 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(2); sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); - stream.AddDexRegisterEntry(0, Kind::kInStack, 0); // Short location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. - stream.AddInlineInfoEntry(42); - stream.AddInlineInfoEntry(82); + size_t number_of_dex_registers_in_inline_info = 0; + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. + stream.BeginInlineInfoEntry(82, 3, number_of_dex_registers_in_inline_info); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(42, 2, number_of_dex_registers_in_inline_info); + stream.EndInlineInfoEntry(); + stream.EndStackMapEntry(); ArenaBitVector sp_mask2(&arena, 0, true); sp_mask2.SetBit(3); sp_mask1.SetBit(8); - stream.AddStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kInRegister, 18); // Short location. - stream.AddDexRegisterEntry(1, Kind::kInFpuRegister, 3); // Short location. + stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kInRegister, 18); // Short location. + stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -208,8 +214,10 @@ TEST(StackMapTest, Test2) { ASSERT_TRUE(stack_map.HasInlineInfo(code_info)); InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map); ASSERT_EQ(2u, inline_info.GetDepth()); - ASSERT_EQ(42u, inline_info.GetMethodReferenceIndexAtDepth(0)); - ASSERT_EQ(82u, inline_info.GetMethodReferenceIndexAtDepth(1)); + ASSERT_EQ(82u, inline_info.GetMethodIndexAtDepth(0)); + ASSERT_EQ(42u, inline_info.GetMethodIndexAtDepth(1)); + ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(0)); + ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(1)); } // Second stack map. @@ -273,11 +281,12 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kNone, 0); // No location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kNone, 0); // No location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -353,22 +362,24 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 1024; // Create the first stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { // Use two different Dex register locations to populate this map, // as using a single value (in the whole CodeInfo object) would // make this Dex register mapping data empty (see // art::DexRegisterMap::SingleEntrySizeInBits). - stream.AddDexRegisterEntry(i, Kind::kConstant, i % 2); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, i % 2); // Short location. } + stream.EndStackMapEntry(); // Create the second stack map (and its Dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(i, Kind::kConstant, 0); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, 0); // Short location. } + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -413,19 +424,22 @@ TEST(StackMapTest, TestShareDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kInRegister, 0); // Short location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - stream.AddDexRegisterEntry(0, Kind::kInRegister, 2); // Short location. - stream.AddDexRegisterEntry(1, Kind::kConstant, -2); // Large location. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.AddDexRegisterEntry(Kind::kInRegister, 2); // Short location. + stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -462,9 +476,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ArenaBitVector sp_mask(&arena, 0, false); uint32_t number_of_dex_registers = 0; - stream.AddStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); - size_t size = stream.ComputeNeededSize(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillIn(region); @@ -490,4 +505,167 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_FALSE(stack_map.HasInlineInfo(code_info)); } +TEST(StackMapTest, InlineTest) { + ArenaPool pool; + ArenaAllocator arena(&pool); + StackMapStream stream(&arena); + + ArenaBitVector sp_mask1(&arena, 0, true); + sp_mask1.SetBit(2); + sp_mask1.SetBit(4); + + // First stack map. + stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2); + stream.AddDexRegisterEntry(Kind::kInStack, 0); + stream.AddDexRegisterEntry(Kind::kConstant, 4); + + stream.BeginInlineInfoEntry(42, 2, 1); + stream.AddDexRegisterEntry(Kind::kInStack, 8); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(82, 3, 3); + stream.AddDexRegisterEntry(Kind::kInStack, 16); + stream.AddDexRegisterEntry(Kind::kConstant, 20); + stream.AddDexRegisterEntry(Kind::kInRegister, 15); + stream.EndInlineInfoEntry(); + + stream.EndStackMapEntry(); + + // Second stack map. + stream.BeginStackMapEntry(2, 22, 0x3, &sp_mask1, 2, 3); + stream.AddDexRegisterEntry(Kind::kInStack, 56); + stream.AddDexRegisterEntry(Kind::kConstant, 0); + + stream.BeginInlineInfoEntry(42, 2, 1); + stream.AddDexRegisterEntry(Kind::kInStack, 12); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(82, 3, 3); + stream.AddDexRegisterEntry(Kind::kInStack, 80); + stream.AddDexRegisterEntry(Kind::kConstant, 10); + stream.AddDexRegisterEntry(Kind::kInRegister, 5); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(52, 5, 0); + stream.EndInlineInfoEntry(); + + stream.EndStackMapEntry(); + + // Third stack map. + stream.BeginStackMapEntry(4, 56, 0x3, &sp_mask1, 2, 0); + stream.AddDexRegisterEntry(Kind::kNone, 0); + stream.AddDexRegisterEntry(Kind::kConstant, 4); + stream.EndStackMapEntry(); + + // Fourth stack map. + stream.BeginStackMapEntry(6, 78, 0x3, &sp_mask1, 2, 3); + stream.AddDexRegisterEntry(Kind::kInStack, 56); + stream.AddDexRegisterEntry(Kind::kConstant, 0); + + stream.BeginInlineInfoEntry(42, 2, 0); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(52, 5, 1); + stream.AddDexRegisterEntry(Kind::kInRegister, 2); + stream.EndInlineInfoEntry(); + stream.BeginInlineInfoEntry(52, 10, 2); + stream.AddDexRegisterEntry(Kind::kNone, 0); + stream.AddDexRegisterEntry(Kind::kInRegister, 3); + stream.EndInlineInfoEntry(); + + stream.EndStackMapEntry(); + + size_t size = stream.PrepareForFillIn(); + void* memory = arena.Alloc(size, kArenaAllocMisc); + MemoryRegion region(memory, size); + stream.FillIn(region); + + CodeInfo ci(region); + + { + // Verify first stack map. + StackMap sm0 = ci.GetStackMapAt(0); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, 2); + ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); + ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci)); + + InlineInfo if0 = ci.GetInlineInfoOf(sm0); + ASSERT_EQ(2u, if0.GetDepth()); + ASSERT_EQ(2u, if0.GetDexPcAtDepth(0)); + ASSERT_EQ(42u, if0.GetMethodIndexAtDepth(0)); + ASSERT_EQ(3u, if0.GetDexPcAtDepth(1)); + ASSERT_EQ(82u, if0.GetMethodIndexAtDepth(1)); + + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, 1); + ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci)); + + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, 3); + ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci)); + ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci)); + ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci)); + } + + { + // Verify second stack map. + StackMap sm1 = ci.GetStackMapAt(1); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, 2); + ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); + ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci)); + + InlineInfo if1 = ci.GetInlineInfoOf(sm1); + ASSERT_EQ(3u, if1.GetDepth()); + ASSERT_EQ(2u, if1.GetDexPcAtDepth(0)); + ASSERT_EQ(42u, if1.GetMethodIndexAtDepth(0)); + ASSERT_EQ(3u, if1.GetDexPcAtDepth(1)); + ASSERT_EQ(82u, if1.GetMethodIndexAtDepth(1)); + ASSERT_EQ(5u, if1.GetDexPcAtDepth(2)); + ASSERT_EQ(52u, if1.GetMethodIndexAtDepth(2)); + + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, 1); + ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci)); + + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, 3); + ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci)); + ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci)); + ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci)); + + ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2)); + } + + { + // Verify third stack map. + StackMap sm2 = ci.GetStackMapAt(2); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, 2); + ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0)); + ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci)); + ASSERT_FALSE(sm2.HasInlineInfo(ci)); + } + + { + // Verify fourth stack map. + StackMap sm3 = ci.GetStackMapAt(3); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, 2); + ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); + ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci)); + + InlineInfo if2 = ci.GetInlineInfoOf(sm3); + ASSERT_EQ(3u, if2.GetDepth()); + ASSERT_EQ(2u, if2.GetDexPcAtDepth(0)); + ASSERT_EQ(42u, if2.GetMethodIndexAtDepth(0)); + ASSERT_EQ(5u, if2.GetDexPcAtDepth(1)); + ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(1)); + ASSERT_EQ(10u, if2.GetDexPcAtDepth(2)); + ASSERT_EQ(52u, if2.GetMethodIndexAtDepth(2)); + + ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0)); + + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, 1); + ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci)); + + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, 2); + ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0)); + ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci)); + } +} + } // namespace art diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index a5a0eb2114..5ca66a1de6 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -30,7 +30,7 @@ namespace art { static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); - HGraph* graph = new (&allocator) HGraph(&allocator); + HGraph* graph = CreateGraph(&allocator); HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); |