diff options
Diffstat (limited to 'compiler/optimizing')
61 files changed, 3185 insertions, 820 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d6c3515726..811a3bdf0c 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_containers.h" #include "bounds_check_elimination.h" #include "nodes.h" -#include "utils/arena_containers.h" namespace art { @@ -28,18 +28,11 @@ class MonotonicValueRange; */ class ValueBound : public ValueObject { public: - ValueBound(HInstruction* instruction, int constant) { + ValueBound(HInstruction* instruction, int32_t constant) { if (instruction != nullptr && instruction->IsIntConstant()) { - // Normalizing ValueBound with constant instruction. - int instr_const = instruction->AsIntConstant()->GetValue(); - if (constant >= 0 && (instr_const <= INT_MAX - constant)) { - // No overflow. - instruction_ = nullptr; - constant_ = instr_const + constant; - return; - } - if (constant < 0 && (instr_const >= INT_MIN - constant)) { - // No underflow. + // Normalize ValueBound with constant instruction. + int32_t instr_const = instruction->AsIntConstant()->GetValue(); + if (!WouldAddOverflowOrUnderflow(instr_const, constant)) { instruction_ = nullptr; constant_ = instr_const + constant; return; @@ -49,6 +42,41 @@ class ValueBound : public ValueObject { constant_ = constant; } + // Return whether (left + right) overflows or underflows. + static bool WouldAddOverflowOrUnderflow(int32_t left, int32_t right) { + if (right == 0) { + return false; + } + if ((right > 0) && (left <= INT_MAX - right)) { + // No overflow. + return false; + } + if ((right < 0) && (left >= INT_MIN - right)) { + // No underflow. + return false; + } + return true; + } + + static bool IsAddOrSubAConstant(HInstruction* instruction, + HInstruction** left_instruction, + int* right_constant) { + if (instruction->IsAdd() || instruction->IsSub()) { + HBinaryOperation* bin_op = instruction->AsBinaryOperation(); + HInstruction* left = bin_op->GetLeft(); + HInstruction* right = bin_op->GetRight(); + if (right->IsIntConstant()) { + *left_instruction = left; + int32_t c = right->AsIntConstant()->GetValue(); + *right_constant = instruction->IsAdd() ? c : -c; + return true; + } + } + *left_instruction = nullptr; + *right_constant = 0; + return false; + } + // Try to detect useful value bound format from an instruction, e.g. // a constant or array length related value. static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) { @@ -63,13 +91,12 @@ class ValueBound : public ValueObject { return ValueBound(instruction, 0); } // Try to detect (array.length + c) format. - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left->IsArrayLength() && right->IsIntConstant()) { + HInstruction *left; + int32_t right; + if (IsAddOrSubAConstant(instruction, &left, &right)) { + if (left->IsArrayLength()) { *found = true; - return ValueBound(left, right->AsIntConstant()->GetValue()); + return ValueBound(left, right); } } @@ -79,10 +106,13 @@ class ValueBound : public ValueObject { } HInstruction* GetInstruction() const { return instruction_; } - int GetConstant() const { return constant_; } + int32_t GetConstant() const { return constant_; } - bool IsRelativeToArrayLength() const { - return instruction_ != nullptr && instruction_->IsArrayLength(); + bool IsRelatedToArrayLength() const { + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + return (instruction_ != nullptr) && + (instruction_->IsArrayLength() || instruction_->IsNewArray()); } bool IsConstant() const { @@ -96,54 +126,45 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - // Returns if it's certain bound1 >= bound2. - bool GreaterThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ >= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; + static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { + // Null check on the NewArray should have been eliminated by instruction + // simplifier already. + if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { + return instruction->InputAt(0)->AsNewArray(); } - // Not comparable. Just return false. - return false; + return instruction; } - // Returns if it's certain bound1 <= bound2. - bool LessThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ <= bound.constant_; - } - if (IsRelativeToArrayLength()) { - // Array length is guaranteed to be no less than 0. - // No overflow/underflow can happen if both constants are negative. - if (constant_ <= 0 && bound.constant_ <= 0) { - return constant_ <= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; - } + static bool Equal(HInstruction* instruction1, HInstruction* instruction2) { + if (instruction1 == instruction2) { + return true; } - // In case the array length is some constant, we can - // still compare. - if (IsConstant() && bound.IsRelativeToArrayLength()) { - HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0); - if (array->IsNullCheck()) { - array = array->AsNullCheck()->InputAt(0); - } - if (array->IsNewArray()) { - HInstruction* len = array->InputAt(0); - if (len->IsIntConstant()) { - int len_const = len->AsIntConstant()->GetValue(); - return constant_ <= len_const + bound.GetConstant(); - } - } + if (instruction1 == nullptr || instruction2 == nullptr) { + return false; } + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); + instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + return instruction1 == instruction2; + } + + // Returns if it's certain this->bound >= `bound`. + bool GreaterThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ >= bound.constant_; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain this->bound <= `bound`. + bool LessThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ <= bound.constant_; + } // Not comparable. Just return false. return false; } @@ -151,10 +172,11 @@ class ValueBound : public ValueObject { // Try to narrow lower bound. Returns the greatest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::max(bound1.constant_, bound2.constant_)); + if (bound1.GreaterThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.GreaterThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. @@ -165,58 +187,71 @@ class ValueBound : public ValueObject { // Try to narrow upper bound. Returns the lowest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::min(bound1.constant_, bound2.constant_)); + if (bound1.LessThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.LessThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. // Favor array length as upper bound. - return bound1.IsRelativeToArrayLength() ? bound1 : bound2; + return bound1.IsRelatedToArrayLength() ? bound1 : bound2; } - // Add a constant to a ValueBound. If the constant part of the ValueBound - // overflows/underflows, then we can't accurately represent it. For correctness, - // just return Max/Min() depending on whether the returned ValueBound is used for - // lower/upper bound. - ValueBound Add(int c, bool* overflow_or_underflow) const { - *overflow_or_underflow = false; + // Add a constant to a ValueBound. + // `overflow` or `underflow` will return whether the resulting bound may + // overflow or underflow an int. + ValueBound Add(int32_t c, bool* overflow, bool* underflow) const { + *overflow = *underflow = false; if (c == 0) { return *this; } - int new_constant; + int32_t new_constant; if (c > 0) { if (constant_ > INT_MAX - c) { - // Constant part overflows. - *overflow_or_underflow = true; + *overflow = true; return Max(); - } else { - new_constant = constant_ + c; } + + new_constant = constant_ + c; + // (array.length + non-positive-constant) won't overflow an int. + if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) { + return ValueBound(instruction_, new_constant); + } + // Be conservative. + *overflow = true; + return Max(); } else { if (constant_ < INT_MIN - c) { - // Constant part underflows. - *overflow_or_underflow = true; - return Max(); - } else { - new_constant = constant_ + c; + *underflow = true; + return Min(); } + + new_constant = constant_ + c; + // Regardless of the value new_constant, (array.length+new_constant) will + // never underflow since array.length is no less than 0. + if (IsConstant() || IsRelatedToArrayLength()) { + return ValueBound(instruction_, new_constant); + } + // Be conservative. + *underflow = true; + return Min(); } return ValueBound(instruction_, new_constant); } private: HInstruction* instruction_; - int constant_; + int32_t constant_; }; /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: * comparisons related to array bounds, array bounds check, add/sub on top - * of an existing value range, or a loop phi corresponding to an + * of an existing value range, NewArray or a loop phi corresponding to an * incrementing/decrementing array index (MonotonicValueRange). */ class ValueRange : public ArenaObject<kArenaAllocMisc> { @@ -241,8 +276,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { return true; } DCHECK(!other_range->IsMonotonicValueRange()); - return lower_.GreaterThanOrEqual(other_range->lower_) && - upper_.LessThanOrEqual(other_range->upper_); + return lower_.GreaterThanOrEqualTo(other_range->lower_) && + upper_.LessThanOrEqualTo(other_range->upper_); } // Returns the intersection of this and range. @@ -263,29 +298,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound::NarrowUpperBound(upper_, range->upper_)); } - // Shift a range by a constant. If either bound can't be represented - // as (instruction+c) format due to possible overflow/underflow, - // return the full integer range. - ValueRange* Add(int constant) const { - bool overflow_or_underflow; - ValueBound lower = lower_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); + // Shift a range by a constant. + ValueRange* Add(int32_t constant) const { + bool overflow, underflow; + ValueBound lower = lower_.Add(constant, &overflow, &underflow); + if (underflow) { + // Lower bound underflow will wrap around to positive values + // and invalidate the upper bound. + return nullptr; } - ValueBound upper = upper_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); + ValueBound upper = upper_.Add(constant, &overflow, &underflow); + if (overflow) { + // Upper bound overflow will wrap around to negative values + // and invalidate the lower bound. + return nullptr; } return new (allocator_) ValueRange(allocator_, lower, upper); } - // Return [INT_MIN, INT_MAX]. - ValueRange* FullIntRange() const { - return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max()); - } - private: ArenaAllocator* const allocator_; const ValueBound lower_; // inclusive @@ -304,7 +334,7 @@ class MonotonicValueRange : public ValueRange { public: MonotonicValueRange(ArenaAllocator* allocator, HInstruction* initial, - int increment, + int32_t increment, ValueBound bound) // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's // used as a regular value range, due to possible overflow/underflow. @@ -343,23 +373,17 @@ class MonotonicValueRange : public ValueRange { // make assumptions about the max array length, e.g. due to the max heap size, // divided by the element size (such as 4 bytes for each integer array), we can // lower this number and rule out some possible overflows. - int max_array_len = INT_MAX; - - int upper = INT_MAX; - if (range->GetUpper().IsConstant()) { - upper = range->GetUpper().GetConstant(); - } else if (range->GetUpper().IsRelativeToArrayLength()) { - int constant = range->GetUpper().GetConstant(); - if (constant <= 0) { - // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc. - upper = max_array_len + constant; - } else { - // There might be overflow. Give up narrowing. - return this; - } - } else { - // There might be overflow. Give up narrowing. - return this; + int32_t max_array_len = INT_MAX; + + // max possible integer value of range's upper value. + int32_t upper = INT_MAX; + // Try to lower upper. + ValueBound upper_bound = range->GetUpper(); + if (upper_bound.IsConstant()) { + upper = upper_bound.GetConstant(); + } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) { + // Normal case. e.g. <= array.length - 1. + upper = max_array_len + upper_bound.GetConstant(); } // If we can prove for the last number in sequence of initial_, @@ -368,13 +392,13 @@ class MonotonicValueRange : public ValueRange { // then this MonoticValueRange is narrowed to a normal value range. // Be conservative first, assume last number in the sequence hits upper. - int last_num_in_sequence = upper; + int32_t last_num_in_sequence = upper; if (initial_->IsIntConstant()) { - int initial_constant = initial_->AsIntConstant()->GetValue(); + int32_t initial_constant = initial_->AsIntConstant()->GetValue(); if (upper <= initial_constant) { last_num_in_sequence = upper; } else { - // Cast to int64_t for the substraction part to avoid int overflow. + // Cast to int64_t for the substraction part to avoid int32_t overflow. last_num_in_sequence = initial_constant + ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; } @@ -392,23 +416,22 @@ class MonotonicValueRange : public ValueRange { ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); // Need to take care of underflow. Try to prove underflow won't happen - // for common cases. Basically need to be able to prove for any value - // that's >= range->GetLower(), it won't be positive with value+increment. + // for common cases. if (range->GetLower().IsConstant()) { - int constant = range->GetLower().GetConstant(); + int32_t constant = range->GetLower().GetConstant(); if (constant >= INT_MIN - increment_) { return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); } } - // There might be underflow. Give up narrowing. + // For non-constant lower bound, just assume might be underflow. Give up narrowing. return this; } } private: HInstruction* const initial_; - const int increment_; + const int32_t increment_; ValueBound bound_; // Additional value bound info for initial_; DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); @@ -446,13 +469,26 @@ class BCEVisitor : public HGraphVisitor { return nullptr; } - // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range', - // and push the narrowed value range to 'successor'. + // Narrow the value range of `instruction` at the end of `basic_block` with `range`, + // and push the narrowed value range to `successor`. void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, - HBasicBlock* successor, ValueRange* range) { + HBasicBlock* successor, ValueRange* range) { ValueRange* existing_range = LookupValueRange(instruction, basic_block); - ValueRange* narrowed_range = (existing_range == nullptr) ? - range : existing_range->Narrow(range); + if (existing_range == nullptr) { + if (range != nullptr) { + GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range); + } + return; + } + if (existing_range->IsMonotonicValueRange()) { + DCHECK(instruction->IsLoopHeaderPhi()); + // Make sure the comparison is in the loop header so each increment is + // checked with a comparison. + if (instruction->GetBlock() != basic_block) { + return; + } + } + ValueRange* narrowed_range = existing_range->Narrow(range); if (narrowed_range != nullptr) { GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range); } @@ -472,10 +508,12 @@ class BCEVisitor : public HGraphVisitor { bool found; ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found); + // Each comparison can establish a lower bound and an upper bound + // for the left hand side. ValueBound lower = bound; ValueBound upper = bound; if (!found) { - // No constant or array.length+c bound found. + // No constant or array.length+c format bound found. // For i<j, we can still use j's upper bound as i's upper bound. Same for lower. ValueRange* range = LookupValueRange(right, block); if (range != nullptr) { @@ -487,13 +525,13 @@ class BCEVisitor : public HGraphVisitor { } } - bool overflow_or_underflow; + bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -501,11 +539,11 @@ class BCEVisitor : public HGraphVisitor { } // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -513,11 +551,11 @@ class BCEVisitor : public HGraphVisitor { } } else if (cond == kCondGT || cond == kCondGE) { // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -525,10 +563,10 @@ class BCEVisitor : public HGraphVisitor { } if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -541,41 +579,56 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - ValueRange* index_range = LookupValueRange(index, block); - - if (index_range != nullptr) { - ValueBound lower = ValueBound(nullptr, 0); // constant 0 - ValueBound upper = ValueBound(array_length, -1); // array_length - 1 - ValueRange* array_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); - if (index_range->FitsIn(array_range)) { - ReplaceBoundsCheck(bounds_check, index); + DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + + if (!index->IsIntConstant()) { + ValueRange* index_range = LookupValueRange(index, block); + if (index_range != nullptr) { + ValueBound lower = ValueBound(nullptr, 0); // constant 0 + ValueBound upper = ValueBound(array_length, -1); // array_length - 1 + ValueRange* array_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + if (index_range->FitsIn(array_range)) { + ReplaceBoundsCheck(bounds_check, index); + return; + } + } + } else { + int32_t constant = index->AsIntConstant()->GetValue(); + if (constant < 0) { + // Will always throw exception. + return; + } + if (array_length->IsIntConstant()) { + if (constant < array_length->AsIntConstant()->GetValue()) { + ReplaceBoundsCheck(bounds_check, index); + } return; } - } - if (index->IsIntConstant()) { - ValueRange* array_length_range = LookupValueRange(array_length, block); - int constant = index->AsIntConstant()->GetValue(); - if (array_length_range != nullptr && - array_length_range->GetLower().IsConstant()) { - if (constant < array_length_range->GetLower().GetConstant()) { + DCHECK(array_length->IsArrayLength()); + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr) { + ValueBound lower = existing_range->GetLower(); + DCHECK(lower.IsConstant()); + if (constant < lower.GetConstant()) { ReplaceBoundsCheck(bounds_check, index); return; + } else { + // Existing range isn't strong enough to eliminate the bounds check. + // Fall through to update the array_length range with info from this + // bounds check. } } // Once we have an array access like 'array[5] = 1', we record array.length >= 6. + // We currently don't do it for non-constant index since a valid array[i] can't prove + // a valid array[i-1] yet due to the lower bound side. ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), lower, upper); - ValueRange* existing_range = LookupValueRange(array_length, block); - ValueRange* new_range = range; - if (existing_range != nullptr) { - new_range = range->Narrow(existing_range); - } - GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range); + GetValueRangeMap(block)->Overwrite(array_length->GetId(), range); } } @@ -588,14 +641,12 @@ class BCEVisitor : public HGraphVisitor { if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { DCHECK_EQ(phi->InputCount(), 2U); HInstruction* instruction = phi->InputAt(1); - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left == phi && right->IsIntConstant()) { + HInstruction *left; + int32_t increment; + if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) { + if (left == phi) { HInstruction* initial_value = phi->InputAt(0); ValueRange* range = nullptr; - int increment = right->AsIntConstant()->GetValue(); if (increment == 0) { // Add constant 0. It's really a fixed value. range = new (GetGraph()->GetArena()) ValueRange( @@ -676,29 +727,122 @@ class BCEVisitor : public HGraphVisitor { // Here we are interested in the typical triangular case of nested loops, // such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i // is the index for outer loop. In this case, we know j is bounded by array.length-1. + + // Try to handle (array.length - i) or (array.length + c - i) format. + HInstruction* left_of_left; // left input of left. + int32_t right_const = 0; + if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &right_const)) { + left = left_of_left; + } + // The value of left input of the sub equals (left + right_const). + if (left->IsArrayLength()) { HInstruction* array_length = left->AsArrayLength(); ValueRange* right_range = LookupValueRange(right, sub->GetBlock()); if (right_range != nullptr) { ValueBound lower = right_range->GetLower(); ValueBound upper = right_range->GetUpper(); - if (lower.IsConstant() && upper.IsRelativeToArrayLength()) { + if (lower.IsConstant() && upper.IsRelatedToArrayLength()) { HInstruction* upper_inst = upper.GetInstruction(); - if (upper_inst->IsArrayLength() && - upper_inst->AsArrayLength() == array_length) { - // (array.length - v) where v is in [c1, array.length + c2] - // gets [-c2, array.length - c1] as its value range. - ValueRange* range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), - ValueBound(nullptr, - upper.GetConstant()), - ValueBound(array_length, - lower.GetConstant())); - GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + // Make sure it's the same array. + if (ValueBound::Equal(array_length, upper_inst)) { + int32_t c0 = right_const; + int32_t c1 = lower.GetConstant(); + int32_t c2 = upper.GetConstant(); + // (array.length + c0 - v) where v is in [c1, array.length + c2] + // gets [c0 - c2, array.length + c0 - c1] as its value range. + if (!ValueBound::WouldAddOverflowOrUnderflow(c0, -c2) && + !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) { + if ((c0 - c1) <= 0) { + // array.length + (c0 - c1) won't overflow/underflow. + ValueRange* range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound(nullptr, right_const - upper.GetConstant()), + ValueBound(array_length, right_const - lower.GetConstant())); + GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range); + } + } } } } } } + void FindAndHandlePartialArrayLength(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsShr() || instruction->IsUShr()); + HInstruction* right = instruction->GetRight(); + int32_t right_const; + if (right->IsIntConstant()) { + right_const = right->AsIntConstant()->GetValue(); + // Detect division by two or more. + if ((instruction->IsDiv() && right_const <= 1) || + (instruction->IsShr() && right_const < 1) || + (instruction->IsUShr() && right_const < 1)) { + return; + } + } else { + return; + } + + // Try to handle array.length/2 or (array.length-1)/2 format. + HInstruction* left = instruction->GetLeft(); + HInstruction* left_of_left; // left input of left. + int32_t c = 0; + if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &c)) { + left = left_of_left; + } + // The value of left input of instruction equals (left + c). + + // (array_length + 1) or smaller divided by two or more + // always generate a value in [INT_MIN, array_length]. + // This is true even if array_length is INT_MAX. + if (left->IsArrayLength() && c <= 1) { + if (instruction->IsUShr() && c < 0) { + // Make sure for unsigned shift, left side is not negative. + // e.g. if array_length is 2, ((array_length - 3) >>> 2) is way bigger + // than array_length. + return; + } + ValueRange* range = new (GetGraph()->GetArena()) ValueRange( + GetGraph()->GetArena(), + ValueBound(nullptr, INT_MIN), + ValueBound(left, 0)); + GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range); + } + } + + void VisitDiv(HDiv* div) { + FindAndHandlePartialArrayLength(div); + } + + void VisitShr(HShr* shr) { + FindAndHandlePartialArrayLength(shr); + } + + void VisitUShr(HUShr* ushr) { + FindAndHandlePartialArrayLength(ushr); + } + + void VisitNewArray(HNewArray* new_array) { + HInstruction* len = new_array->InputAt(0); + if (!len->IsIntConstant()) { + HInstruction *left; + int32_t right_const; + if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) { + // (left + right_const) is used as size to new the array. + // We record "-right_const <= left <= new_array - right_const"; + ValueBound lower = ValueBound(nullptr, -right_const); + // We use new_array for the bound instead of new_array.length, + // which isn't available as an instruction yet. new_array will + // be treated the same as new_array.length when it's used in a ValueBound. + ValueBound upper = ValueBound(new_array, -right_const); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; DISALLOW_COPY_AND_ASSIGN(BCEVisitor); diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 3dcb08d195..a298413d14 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -14,19 +14,22 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "bounds_check_elimination.h" #include "builder.h" #include "gvn.h" +#include "instruction_simplifier.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" namespace art { -static void RunGvn(HGraph* graph) { +static void RunSimplifierAndGvn(HGraph* graph) { + InstructionSimplifier simplify(graph); + simplify.Run(); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { block3->AddSuccessor(block4); // False successor graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check2)); @@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { exit->AddInstruction(new (&allocator) HExit()); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check5)); @@ -397,7 +400,6 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check); HInstruction* cmp = nullptr; @@ -413,6 +415,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator, loop_header->AddInstruction(array_length); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check); @@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -541,7 +544,6 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(array_length); HInstruction* cmp = nullptr; if (cond == kCondLE) { cmp = new (allocator) HLessThanOrEqual(phi, constant_initial); @@ -553,6 +555,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator, loop_header->AddPhi(phi); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(array_length); HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1); null_check = new (allocator) HNullCheck(parameter, 0); @@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -623,13 +626,13 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); } -// int[] array = new array[10]; +// int[] array = new int[10]; // for (int i=0; i<10; i+=increment) { array[i] = 10; } static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, HInstruction** bounds_check, @@ -669,7 +672,6 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* cmp = nullptr; if (cond == kCondGE) { cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10); @@ -681,6 +683,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, loop_header->AddPhi(phi); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0); HArrayLength* array_length = new (allocator) HArrayLength(null_check); @@ -705,39 +708,39 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { ArenaPool pool; ArenaAllocator allocator(&pool); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. } HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); - // int[] array = new array[10]; + // int[] array = new int[10]; // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -782,7 +785,6 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, loop_body->AddSuccessor(loop_header); HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); - phi->AddInput(constant_initial); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check); HInstruction* cmp = nullptr; @@ -797,6 +799,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator, loop_header->AddInstruction(array_length); loop_header->AddInstruction(cmp); loop_header->AddInstruction(if_inst); + phi->AddInput(constant_initial); null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check); @@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -901,7 +904,6 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph); graph->AddBlock(outer_header); HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); - phi_i->AddInput(constant_0); HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0); HArrayLength* array_length = new (&allocator) HArrayLength(null_check); HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1); @@ -913,11 +915,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_header->AddInstruction(add); outer_header->AddInstruction(cmp); outer_header->AddInstruction(if_inst); + phi_i->AddInput(constant_0); HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph); graph->AddBlock(inner_header); HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt); - phi_j->AddInput(constant_0); null_check = new (&allocator) HNullCheck(parameter, 0); array_length = new (&allocator) HArrayLength(null_check); HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i); @@ -931,6 +933,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { inner_header->AddInstruction(add); inner_header->AddInstruction(cmp); inner_header->AddInstruction(if_inst); + phi_j->AddInput(constant_0); HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph); graph->AddBlock(inner_body_compare); @@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); ASSERT_FALSE(IsRemoved(bounds_check2)); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index c5101363ee..3e4a6169d9 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -17,13 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ +#include "base/arena_object.h" #include "dex_file.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" #include "driver/dex_compilation_unit.h" #include "optimizing_compiler_stats.h" #include "primitive.h" -#include "utils/arena_object.h" #include "utils/growable_array.h" #include "nodes.h" diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index fd4e391470..2a57fdc929 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -40,9 +40,17 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue(); } -void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - DCHECK_EQ(frame_size_, kUninitializedFrameSize); +static bool IsSingleGoto(HBasicBlock* block) { + HLoopInformation* loop_info = block->GetLoopInformation(); + // TODO: Remove the null check b/19084197. + return (block->GetFirstInstruction() != nullptr) + && (block->GetFirstInstruction() == block->GetLastInstruction()) + && block->GetLastInstruction()->IsGoto() + // Back edges generate the suspend check. + && (loop_info == nullptr || !loop_info->IsBackEdge(block)); +} +void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { Initialize(); if (!is_leaf) { MarkNotLeaf(); @@ -58,19 +66,43 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { CompileInternal(allocator, /* is_baseline */ true); } +bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { + DCHECK_EQ(block_order_->Get(current_block_index_), current); + return GetNextBlockToEmit() == FirstNonEmptyBlock(next); +} + +HBasicBlock* CodeGenerator::GetNextBlockToEmit() const { + for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) { + HBasicBlock* block = block_order_->Get(i); + if (!IsSingleGoto(block)) { + return block; + } + } + return nullptr; +} + +HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const { + while (IsSingleGoto(block)) { + block = block->GetSuccessors().Get(0); + } + return block; +} + void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - HGraphVisitor* location_builder = GetLocationBuilder(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) { HBasicBlock* block = block_order_->Get(current_block_index_); + // Don't generate code for an empty block. Its predecessors will branch to its successor + // directly. Also, the label of that block will not be emitted, so this helps catch + // errors where we reference that label. + if (IsSingleGoto(block)) continue; Bind(block); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (is_baseline) { - current->Accept(location_builder); - InitLocations(current); + InitLocationsBaseline(current); } current->Accept(instruction_visitor); } @@ -88,7 +120,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { // The register allocator already called `InitializeCodeGeneration`, // where the frame size has been computed. - DCHECK_NE(frame_size_, kUninitializedFrameSize); DCHECK(block_order_ != nullptr); Initialize(); CompileInternal(allocator, /* is_baseline */ false); @@ -138,13 +169,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; - SetFrameSize(RoundUp( - number_of_spill_slots * kVRegSize - + number_of_out_slots * kVRegSize - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() - + FrameEntrySpillSize(), - kStackAlignment)); + if (number_of_spill_slots == 0 + && !HasAllocatedCalleeSaveRegisters() + && IsLeafMethod() + && !RequiresCurrentMethod()) { + DCHECK_EQ(maximum_number_of_live_core_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + SetFrameSize(CallPushesPC() ? GetWordSize() : 0); + } else { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + number_of_out_slots * kVRegSize + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + FrameEntrySpillSize(), + kStackAlignment)); + } } Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const { @@ -294,7 +334,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } -void CodeGenerator::InitLocations(HInstruction* instruction) { +void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { + AllocateLocations(instruction); if (instruction->GetLocations() == nullptr) { if (instruction->IsTemporary()) { HInstruction* previous = instruction->GetPrevious(); @@ -320,10 +361,17 @@ void CodeGenerator::InitLocations(HInstruction* instruction) { } } -bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { - DCHECK_EQ(block_order_->Get(current_block_index_), current); - return (current_block_index_ < block_order_->Size() - 1) - && (block_order_->Get(current_block_index_ + 1) == next); +void CodeGenerator::AllocateLocations(HInstruction* instruction) { + instruction->Accept(GetLocationBuilder()); + LocationSummary* locations = instruction->GetLocations(); + if (!instruction->IsSuspendCheckEntry()) { + if (locations != nullptr && locations->CanCall()) { + MarkNotLeaf(); + } + if (instruction->NeedsCurrentMethod()) { + SetRequiresCurrentMethod(); + } + } } CodeGenerator* CodeGenerator::Create(HGraph* graph, @@ -572,7 +620,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { Location location = locations->GetEnvironmentAt(i); switch (location.GetKind()) { case Location::kConstant: { - DCHECK(current == location.GetConstant()); + DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value)); @@ -588,6 +636,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) { } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value); + } else if (current->IsNullConstant()) { + stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0); } else { DCHECK(current->IsFloatConstant()); int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue()); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ab63b911b2..f46a36d02f 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,7 +30,6 @@ namespace art { static size_t constexpr kVRegSize = 4; -static size_t constexpr kUninitializedFrameSize = 0; // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -92,6 +91,8 @@ class CodeGenerator { HGraph* GetGraph() const { return graph_; } + HBasicBlock* GetNextBlockToEmit() const; + HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const; bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const; size_t GetStackSlotOfParameter(HParameterValue* parameter) const { @@ -107,8 +108,6 @@ class CodeGenerator { virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; - virtual HGraphVisitor* GetLocationBuilder() = 0; - virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; @@ -196,6 +195,15 @@ class CodeGenerator { void MarkNotLeaf() { is_leaf_ = false; + requires_current_method_ = true; + } + + void SetRequiresCurrentMethod() { + requires_current_method_ = true; + } + + bool RequiresCurrentMethod() const { + return requires_current_method_; } // Clears the spill slots taken by loop phis in the `LocationSummary` of the @@ -228,6 +236,41 @@ class CodeGenerator { allocated_registers_.Add(location); } + void AllocateLocations(HInstruction* instruction); + + // Tells whether the stack frame of the compiled method is + // considered "empty", that is either actually having a size of zero, + // or just containing the saved return address register. + bool HasEmptyFrame() const { + return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); + } + + static int32_t GetInt32ValueOf(HConstant* constant) { + if (constant->IsIntConstant()) { + return constant->AsIntConstant()->GetValue(); + } else if (constant->IsNullConstant()) { + return 0; + } else { + DCHECK(constant->IsFloatConstant()); + return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } + } + + static int64_t GetInt64ValueOf(HConstant* constant) { + if (constant->IsIntConstant()) { + return constant->AsIntConstant()->GetValue(); + } else if (constant->IsNullConstant()) { + return 0; + } else if (constant->IsFloatConstant()) { + return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); + } else if (constant->IsLongConstant()) { + return constant->AsLongConstant()->GetValue(); + } else { + DCHECK(constant->IsDoubleConstant()); + return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue()); + } + } + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, @@ -236,7 +279,7 @@ class CodeGenerator { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) - : frame_size_(kUninitializedFrameSize), + : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), @@ -255,6 +298,7 @@ class CodeGenerator { block_order_(nullptr), current_block_index_(0), is_leaf_(true), + requires_current_method_(false), stack_map_stream_(graph->GetArena()) {} // Register allocation logic. @@ -269,11 +313,12 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual ParallelMoveResolver* GetMoveResolver() = 0; + virtual HGraphVisitor* GetLocationBuilder() = 0; + virtual HGraphVisitor* GetInstructionVisitor() = 0; // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { - DCHECK_NE(frame_size_, kUninitializedFrameSize); return GetFrameSize() - FrameEntrySpillSize(); } @@ -289,6 +334,25 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + bool HasAllocatedCalleeSaveRegisters() const { + // We check the core registers against 1 because it always comprises the return PC. + return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) + || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); + } + + bool CallPushesPC() const { + InstructionSet instruction_set = GetInstructionSet(); + return instruction_set == kX86 || instruction_set == kX86_64; + } + + // Arm64 has its own type for a label, so we need to templatize this method + // to share the logic. + template <typename T> + T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const { + block = FirstNonEmptyBlock(block); + return raw_pointer_to_labels_array + block->GetBlockId(); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -311,7 +375,7 @@ class CodeGenerator { const uint32_t fpu_callee_save_mask_; private: - void InitLocations(HInstruction* instruction); + void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); @@ -328,8 +392,12 @@ class CodeGenerator { // we are generating code for. size_t current_block_index_; + // Whether the method is a leaf method. bool is_leaf_; + // Whether an instruction in the graph accesses the current method. + bool requires_current_method_; + StackMapStream stack_map_stream_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 78fd181dcf..e864ae1cec 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,8 @@ #include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_arm.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -32,11 +34,6 @@ namespace art { namespace arm { -static DRegister FromLowSToD(SRegister reg) { - DCHECK_EQ(reg % 2, 0); - return static_cast<DRegister>(reg / 2); -} - static bool ExpectedPairLayout(Location location) { // We expected this for both core and fpu register pairs. return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); @@ -58,6 +55,10 @@ static constexpr Register kCoreCalleeSaves[] = static constexpr SRegister kFpuCalleeSaves[] = { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 }; +// D31 cannot be split into two S registers, and the register allocator only works on +// S registers. Therefore there is no need to block it. +static constexpr DRegister DTMP = D31; + class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeRuntimeCallingConvention() @@ -73,20 +74,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() -class SlowPathCodeARM : public SlowPathCode { - public: - SlowPathCodeARM() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); -}; - class NullCheckSlowPathARM : public SlowPathCodeARM { public: explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {} @@ -396,10 +383,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(true), isa_features_(isa_features) { - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline)); // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } @@ -508,6 +491,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) { void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + // Save one extra register for baseline. Note that on thumb2, there is no easy + // instruction to restore just the PC, so this actually helps both baseline + // and non-baseline to save and restore at least two registers at entry and exit. + core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take @@ -529,6 +516,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { + return; + } + if (!skip_overflow_check) { __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); __ LoadFromOffset(kLoadWord, IP, IP, 0); @@ -547,6 +538,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { } void CodeGeneratorARM::GenerateFrameExit() { + if (HasEmptyFrame()) { + __ bx(LR); + return; + } __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); @@ -784,8 +779,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - int32_t value = const_to_move->AsIntConstant()->GetValue(); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + int32_t value = GetInt32ValueOf(const_to_move); if (location.IsRegister()) { __ LoadImmediate(location.AsRegister<Register>(), value); } else { @@ -952,8 +947,8 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { __ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>())); } else { DCHECK(locations->InAt(1).IsConstant()); - int32_t value = - locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + HConstant* constant = locations->InAt(1).GetConstant(); + int32_t value = CodeGenerator::GetInt32ValueOf(constant); ShifterOperand operand; if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) { __ cmp(left, operand); @@ -1114,6 +1109,17 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1168,44 +1174,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void CodeGeneratorARM::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } -void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorARM intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); - // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value()); - // LR() - __ blx(LR); - } else { - __ bl(codegen_->GetFrameEntryLabel()); +void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; } - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); - DCHECK(!codegen_->IsLeafMethod()); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + + codegen_->GenerateStaticOrDirectCall(invoke, temp); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -1223,10 +1222,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); @@ -3366,16 +3375,44 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex()); } } else if (source.IsDoubleStackSlot()) { - DCHECK(destination.IsDoubleStackSlot()) << destination; - __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex()); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); - __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize)); - __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize)); + if (destination.IsDoubleStackSlot()) { + __ LoadDFromOffset(DTMP, SP, source.GetStackIndex()); + __ StoreDToOffset(DTMP, SP, destination.GetStackIndex()); + } else if (destination.IsRegisterPair()) { + DCHECK(ExpectedPairLayout(destination)); + __ LoadFromOffset( + kLoadWordPair, destination.AsRegisterPairLow<Register>(), SP, source.GetStackIndex()); + } else { + DCHECK(destination.IsFpuRegisterPair()) << destination; + __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + SP, + source.GetStackIndex()); + } + } else if (source.IsRegisterPair()) { + if (destination.IsRegisterPair()) { + __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); + __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + DCHECK(ExpectedPairLayout(source)); + __ StoreToOffset( + kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex()); + } + } else if (source.IsFpuRegisterPair()) { + if (destination.IsFpuRegisterPair()) { + __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); + } else { + DCHECK(destination.IsDoubleStackSlot()) << destination; + __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()), + SP, + destination.GetStackIndex()); + } } else { DCHECK(source.IsConstant()) << source; - HInstruction* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - int32_t value = constant->AsIntConstant()->GetValue(); + HConstant* constant = source.GetConstant(); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { __ LoadImmediate(destination.AsRegister<Register>(), value); } else { @@ -3385,17 +3422,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { } } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); - if (destination.IsRegister()) { - // In the presence of long or double constants, the parallel move resolver will - // split the move into two, but keeps the same constant for both moves. Here, - // we use the low or high part depending on which register this move goes to. - if (destination.reg() % 2 == 0) { - __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value)); - } else { - __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value)); - } + if (destination.IsRegisterPair()) { + __ LoadImmediate(destination.AsRegisterPairLow<Register>(), Low32Bits(value)); + __ LoadImmediate(destination.AsRegisterPairHigh<Register>(), High32Bits(value)); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; __ LoadImmediate(IP, Low32Bits(value)); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); __ LoadImmediate(IP, High32Bits(value)); @@ -3403,20 +3434,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { } } else if (constant->IsDoubleConstant()) { double value = constant->AsDoubleConstant()->GetValue(); - uint64_t int_value = bit_cast<uint64_t, double>(value); - if (destination.IsFpuRegister()) { - // In the presence of long or double constants, the parallel move resolver will - // split the move into two, but keeps the same constant for both moves. Here, - // we use the low or high part depending on which register this move goes to. - if (destination.reg() % 2 == 0) { - __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), - bit_cast<float, uint32_t>(Low32Bits(int_value))); - } else { - __ LoadSImmediate(destination.AsFpuRegister<SRegister>(), - bit_cast<float, uint32_t>(High32Bits(int_value))); - } + if (destination.IsFpuRegisterPair()) { + __ LoadDImmediate(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), value); } else { - DCHECK(destination.IsDoubleStackSlot()); + DCHECK(destination.IsDoubleStackSlot()) << destination; + uint64_t int_value = bit_cast<uint64_t, double>(value); __ LoadImmediate(IP, Low32Bits(int_value)); __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); __ LoadImmediate(IP, High32Bits(int_value)); @@ -3474,6 +3496,40 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { __ vmovrs(IP, source.AsFpuRegister<SRegister>()); __ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>()); __ vmovsr(destination.AsFpuRegister<SRegister>(), IP); + } else if (source.IsRegisterPair() && destination.IsRegisterPair()) { + __ vmovdrr(DTMP, source.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>()); + __ Mov(source.AsRegisterPairLow<Register>(), destination.AsRegisterPairLow<Register>()); + __ Mov(source.AsRegisterPairHigh<Register>(), destination.AsRegisterPairHigh<Register>()); + __ vmovrrd(destination.AsRegisterPairLow<Register>(), + destination.AsRegisterPairHigh<Register>(), + DTMP); + } else if (source.IsRegisterPair() || destination.IsRegisterPair()) { + Register low_reg = source.IsRegisterPair() + ? source.AsRegisterPairLow<Register>() + : destination.AsRegisterPairLow<Register>(); + int mem = source.IsRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination)); + __ vmovdrr(DTMP, low_reg, static_cast<Register>(low_reg + 1)); + __ LoadFromOffset(kLoadWordPair, low_reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); + } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) { + DRegister first = FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()); + DRegister second = FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()); + __ vmovd(DTMP, first); + __ vmovd(first, second); + __ vmovd(second, DTMP); + } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) { + DRegister reg = source.IsFpuRegisterPair() + ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()) + : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()); + int mem = source.IsFpuRegisterPair() + ? destination.GetStackIndex() + : source.GetStackIndex(); + __ vmovd(DTMP, reg); + __ LoadDFromOffset(reg, SP, mem); + __ StoreDToOffset(DTMP, SP, mem); } else if (source.IsFpuRegister() || destination.IsFpuRegister()) { SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>() : destination.AsFpuRegister<SRegister>(); @@ -3482,7 +3538,7 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) { : source.GetStackIndex(); __ vmovrs(IP, reg); - __ LoadFromOffset(kLoadWord, IP, SP, mem); + __ LoadSFromOffset(reg, SP, mem); __ StoreToOffset(kStoreWord, IP, SP, mem); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { Exchange(source.GetStackIndex(), destination.GetStackIndex()); @@ -3776,5 +3832,50 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { + DCHECK_EQ(temp, kArtMethodRegister); + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // temp = temp[index_in_cache] + __ LoadFromOffset( + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = temp[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); + } else { + __ bl(GetFrameEntryLabel()); + } + + RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!IsLeafMethod()); +} + +void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4b03dffd38..f1a3729c13 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); +static constexpr Register kArtMethodRegister = R0; + +static constexpr DRegister FromLowSToD(SRegister reg) { + return DCHECK_CONSTEXPR(reg % 2 == 0, , D0) + static_cast<DRegister>(reg / 2); +} + + class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeDexCallingConvention() @@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver { DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM); }; +class SlowPathCodeARM : public SlowPathCode { + public: + SlowPathCodeARM() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); +}; + class LocationsBuilderARM : public HGraphVisitor { public: LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -230,7 +252,7 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { @@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3bc23fe4f3..0d7864fa35 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, - (1 << LR), - 0, + callee_saved_core_registers.list(), + callee_saved_fp_registers.list(), compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this) { // Save the link register (containing the return address) to mimic Quick. - AddAllocatedRegister(Location::RegisterLocation(LR)); + AddAllocatedRegister(LocationFrom(lr)); } #undef __ @@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); __ Ldr(wzr, MemOperand(temp, 0)); RecordPcInfo(nullptr, 0); } - int frame_size = GetFrameSize(); - __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - - // Stack layout: - // sp[frame_size - 8] : lr. - // ... : other preserved registers. - // sp[frame_size - regs_size]: first preserved register. - // ... : reserved frame space. - // sp[0] : current method. + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + // Stack layout: + // sp[frame_size - 8] : lr. + // ... : other preserved core registers. + // ... : other preserved fp registers. + // ... : reserved frame space. + // sp[0] : current method. + __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + } } void CodeGeneratorARM64::GenerateFrameExit() { - int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - __ Drop(frame_size); + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ Drop(frame_size); + } } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -486,18 +491,21 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, Primitive::Type type = instruction->GetType(); DCHECK_NE(type, Primitive::kPrimVoid); - if (instruction->IsIntConstant() || instruction->IsLongConstant()) { - int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue() - : instruction->AsLongConstant()->GetValue(); + if (instruction->IsIntConstant() + || instruction->IsLongConstant() + || instruction->IsNullConstant()) { + int64_t value = GetInt64ValueOf(instruction->AsConstant()); if (location.IsRegister()) { Register dst = RegisterFrom(location, type); - DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) || + DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) || (instruction->IsLongConstant() && dst.Is64Bits())); __ Mov(dst, value); } else { DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX(); + Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant()) + ? temps.AcquireW() + : temps.AcquireX(); __ Mov(temp, value); __ Str(temp, StackOperandFrom(location)); } @@ -555,26 +563,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { - // Block reserved registers: - // ip0 (VIXL temporary) - // ip1 (VIXL temporary) - // tr - // lr - // sp is not part of the allocatable registers, so we don't need to block it. - // TODO: Avoid blocking callee-saved registers, and instead preserve them - // where necessary. +void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { + // Blocked core registers: + // lr : Runtime reserved. + // tr : Runtime reserved. + // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it. + // ip1 : VIXL core temp. + // ip0 : VIXL core temp. + // + // Blocked fp registers: + // d31 : VIXL fp temp. CPURegList reserved_core_registers = vixl_reserved_core_registers; reserved_core_registers.Combine(runtime_reserved_core_registers); - reserved_core_registers.Combine(quick_callee_saved_registers); while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true; } + CPURegList reserved_fp_registers = vixl_reserved_fp_registers; - reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP()); - while (!reserved_core_registers.IsEmpty()) { + while (!reserved_fp_registers.IsEmpty()) { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; } + + if (is_baseline) { + CPURegList reserved_core_baseline_registers = callee_saved_core_registers; + while (!reserved_core_baseline_registers.IsEmpty()) { + blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; + } + + CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; + while (!reserved_fp_baseline_registers.IsEmpty()) { + blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; + } + } } Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { @@ -626,10 +646,12 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg } void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { - if (constant->IsIntConstant() || constant->IsLongConstant()) { - __ Mov(Register(destination), - constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() - : constant->AsLongConstant()->GetValue()); + if (constant->IsIntConstant()) { + __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); + } else if (constant->IsLongConstant()) { + __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); + } else if (constant->IsNullConstant()) { + __ Mov(Register(destination), 0); } else if (constant->IsFloatConstant()) { __ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue()); } else { @@ -643,6 +665,8 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { DCHECK(constant.IsConstant()); HConstant* cst = constant.GetConstant(); return (cst->IsIntConstant() && type == Primitive::kPrimInt) || + // Null is mapped to a core W register, which we associate with kPrimInt. + (cst->IsNullConstant() && type == Primitive::kPrimInt) || (cst->IsLongConstant() && type == Primitive::kPrimLong) || (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); @@ -663,7 +687,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri if (unspecified_type) { HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; if (source.IsStackSlot() || - (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) { + (src_cst != nullptr && (src_cst->IsIntConstant() + || src_cst->IsFloatConstant() + || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; } else { @@ -709,7 +735,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; - if (src_cst->IsIntConstant()) { + if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) { temp = temps.AcquireW(); } else if (src_cst->IsLongConstant()) { temp = temps.AcquireX(); @@ -947,6 +973,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { + DCHECK(RequiresCurrentMethod()); DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); } @@ -1370,7 +1397,13 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); + HInstruction* right = compare->InputAt(1); + if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || + (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) { + locations->SetInAt(1, Location::ConstantLocation(right->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresFpuRegister()); + } locations->SetOut(Location::RequiresRegister()); break; } @@ -1400,9 +1433,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { case Primitive::kPrimDouble: { Register result = OutputRegister(compare); FPRegister left = InputFPRegisterAt(compare, 0); - FPRegister right = InputFPRegisterAt(compare, 1); - - __ Fcmp(left, right); + if (compare->GetLocations()->InAt(1).IsConstant()) { + if (kIsDebugBuild) { + HInstruction* right = compare->GetLocations()->InAt(1).GetConstant(); + DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) || + (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))); + } + // 0.0 is the only immediate that can be encoded directly in a FCMP instruction. + __ Fcmp(left, 0.0); + } else { + __ Fcmp(left, InputFPRegisterAt(compare, 1)); + } if (compare->IsGtBias()) { __ Cset(result, ne); } else { @@ -1752,6 +1793,16 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall); @@ -2545,6 +2596,18 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { HandleBinaryOp(instruction); } +void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9a99dcccea..afb7fc3718 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi const vixl::Register tr = vixl::x18; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. +const vixl::Register kQuickSuspendRegister = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); -const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister, - vixl::kXRegSize, - kArm64CalleeSaveRefSpills); +// TODO: When the runtime does not use kQuickSuspendRegister as a suspend +// counter remove it from the reserved registers list. +const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr); + +// Callee-saved registers defined by AAPCS64. +const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, + vixl::kXRegSize, + vixl::x19.code(), + vixl::x30.code()); +const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister, + vixl::kDRegSize, + vixl::d8.code(), + vixl::d15.code()); Location ARM64ReturnLocation(Primitive::Type return_type); class SlowPathCodeARM64 : public SlowPathCode { @@ -191,16 +201,20 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - static const vixl::CPURegList& GetFramePreservedRegisters() { - static const vixl::CPURegList frame_preserved_regs = - vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit()); - return frame_preserved_regs; + vixl::CPURegList GetFramePreservedCoreRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, + core_spill_mask_); + } + + vixl::CPURegList GetFramePreservedFPRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize, + fpu_spill_mask_); } void Bind(HBasicBlock* block) OVERRIDE; vixl::Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_ + block->GetBlockId(); + return CommonGetLabelOf<vixl::Label>(block_labels_, block); } void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 98f93a418a..1101569174 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); - __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + if (!HasEmptyFrame()) { + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + } } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + if (!HasEmptyFrame()) { + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + } } void CodeGeneratorX86::Bind(HBasicBlock* block) { @@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { } void CodeGeneratorX86::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } @@ -597,13 +602,7 @@ void CodeGeneratorX86::Move32(Location destination, Location source) { __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int32_t value; - if (constant->IsIntConstant()) { - value = constant->AsIntConstant()->GetValue(); - } else { - DCHECK(constant->IsFloatConstant()); - value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); - } + int32_t value = GetInt32ValueOf(constant); __ movl(Address(ESP, destination.GetStackIndex()), Immediate(value)); } else { DCHECK(source.IsStackSlot()); @@ -669,8 +668,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - Immediate imm(const_to_move->AsIntConstant()->GetValue()); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + Immediate imm(GetInt32ValueOf(const_to_move)); if (location.IsRegister()) { __ movl(location.AsRegister<Register>(), imm); } else if (location.IsStackSlot()) { @@ -920,7 +919,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) { locations->InAt(1).AsRegister<Register>()); } else if (locations->InAt(1).IsConstant()) { HConstant* instruction = locations->InAt(1).GetConstant(); - Immediate imm(instruction->AsIntConstant()->GetValue()); + Immediate imm(CodeGenerator::GetInt32ValueOf(instruction)); __ cmpl(locations->InAt(0).AsRegister<Register>(), imm); } else { __ cmpl(locations->InAt(0).AsRegister<Register>(), @@ -989,6 +988,17 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1799,7 +1809,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -1843,8 +1853,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: { if (second.IsFpuRegister()) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); - } else { - __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } @@ -1852,8 +1860,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { case Primitive::kPrimDouble: { if (second.IsFpuRegister()) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); - } else { - __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); } break; } @@ -3495,8 +3501,8 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - Immediate imm(constant->AsIntConstant()->GetValue()); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + Immediate imm(CodeGenerator::GetInt32ValueOf(constant)); if (destination.IsRegister()) { __ movl(destination.AsRegister<Register>(), imm); } else { @@ -3904,5 +3910,17 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } +void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace x86 } // namespace art diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 107ddafea4..f5a9b7d1f7 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -234,7 +234,7 @@ class CodeGeneratorX86 : public CodeGenerator { void LoadCurrentMethod(Register reg); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2ff53a0603..41a19e11f0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } + if (HasEmptyFrame()) { + return; + } + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { @@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } void CodeGeneratorX86_64::GenerateFrameExit() { + if (HasEmptyFrame()) { + return; + } uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { @@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { } void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } @@ -599,13 +607,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) { source.AsFpuRegister<XmmRegister>()); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - int32_t value; - if (constant->IsFloatConstant()) { - value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue()); - } else { - DCHECK(constant->IsIntConstant()); - value = constant->AsIntConstant()->GetValue(); - } + int32_t value = GetInt32ValueOf(constant); __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } else { DCHECK(source.IsStackSlot()) << source; @@ -649,8 +651,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction, if (locations != nullptr && locations->Out().IsConstant()) { HConstant* const_to_move = locations->Out().GetConstant(); - if (const_to_move->IsIntConstant()) { - Immediate imm(const_to_move->AsIntConstant()->GetValue()); + if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) { + Immediate imm(GetInt32ValueOf(const_to_move)); if (location.IsRegister()) { __ movl(location.AsRegister<CpuRegister>(), imm); } else if (location.IsStackSlot()) { @@ -790,7 +792,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); @@ -806,8 +808,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Immediate(rhs.GetConstant()->AsIntConstant()->GetValue())); + int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); @@ -883,15 +889,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. __ xorq(reg, reg); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - locations->InAt(1).AsRegister<CpuRegister>()); - } else if (locations->InAt(1).IsConstant()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue())); + Location lhs = locations->InAt(0); + Location rhs = locations->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); + } else if (rhs.IsConstant()) { + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } __ setcc(X86_64Condition(comp->GetCondition()), reg); } @@ -1018,6 +1028,17 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) { UNUSED(constant); } +void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + locations->SetOut(Location::ConstantLocation(constant)); +} + +void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) { + // Will be generated at use site. + UNUSED(constant); +} + void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); @@ -1840,8 +1861,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1869,16 +1890,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { LocationSummary* locations = add->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); + Location out = locations->Out(); switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); + } } else if (second.IsConstant()) { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ addl(first.AsRegister<CpuRegister>(), imm); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); + } } else { + DCHECK(first.Equals(locations->Out())); __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; @@ -2754,7 +2786,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); } else if (obj.IsStackSlot()) { __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); } else { @@ -3236,13 +3268,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); - if (constant->IsIntConstant()) { - Immediate imm(constant->AsIntConstant()->GetValue()); + if (constant->IsIntConstant() || constant->IsNullConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(constant); if (destination.IsRegister()) { - __ movl(destination.AsRegister<CpuRegister>(), imm); + if (value == 0) { + __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); + } else { + __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); + } } else { DCHECK(destination.IsStackSlot()) << destination; - __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); @@ -3675,5 +3711,17 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } +void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) { + // Nothing to do, this should be removed during prepare for register allocator. + UNUSED(instruction); + LOG(FATAL) << "Unreachable"; +} + } // namespace x86_64 } // namespace art diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dbdbf869db..707c9992c0 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -232,7 +232,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void LoadCurrentMethod(CpuRegister reg); Label* GetLabelOf(HBasicBlock* block) const { - return block_labels_.GetRawStorage() + block->GetBlockId(); + return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } void Initialize() OVERRIDE { diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index b246c6f98d..7623e421fd 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -14,11 +14,11 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index e05d9b3b0f..2bfecc696a 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -14,13 +14,13 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "dex_file.h" #include "dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "pretty_printer.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 4ebb1363cc..a7f1f74e27 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -160,6 +160,22 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { instruction->GetId())); } } + + // Ensure 'instruction' has pointers to its inputs' use entries. + for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) { + HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i); + HInstruction* input = input_record.GetInstruction(); + HUseListNode<HInstruction*>* use_node = input_record.GetUseNode(); + if (use_node == nullptr || !input->GetUses().Contains(use_node)) { + AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry " + "at input %u (%s:%d).", + instruction->DebugName(), + instruction->GetId(), + static_cast<unsigned>(i), + input->DebugName(), + input->GetId())); + } + } } void SSAChecker::VisitBasicBlock(HBasicBlock* block) { @@ -285,6 +301,19 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } +static Primitive::Type PrimitiveKind(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimShort: + case Primitive::kPrimChar: + case Primitive::kPrimInt: + return Primitive::kPrimInt; + default: + return type; + } +} + void SSAChecker::VisitPhi(HPhi* phi) { VisitInstruction(phi); @@ -321,18 +350,17 @@ void SSAChecker::VisitPhi(HPhi* phi) { } } } -} - -static Primitive::Type PrimitiveKind(Primitive::Type type) { - switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - return Primitive::kPrimInt; - default: - return type; + // Ensure that the inputs have the same primitive kind as the phi. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) { + AddError(StringPrintf( + "Input %d at index %zu of phi %d from block %d does not have the " + "same type as the phi: %s versus %s", + input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), + Primitive::PrettyDescriptor(input->GetType()), + Primitive::PrettyDescriptor(phi->GetType()))); + } } } diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index c59f8366fa..4742e4d073 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 835bca688f..c59273753e 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -184,6 +184,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor { output_ << " " << instruction->GetValue(); } + void VisitPhi(HPhi* phi) OVERRIDE { + output_ << " " << phi->GetRegNumber(); + } + void PrintInstruction(HInstruction* instruction) { output_ << instruction->DebugName(); instruction->Accept(this); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 89bba2d9f6..cb448c883f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -270,7 +270,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { set = new (allocator_) ValueSet(allocator_); } else { HBasicBlock* dominator = block->GetDominator(); - set = sets_.Get(dominator->GetBlockId())->Copy(); + set = sets_.Get(dominator->GetBlockId()); if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) { // We have to copy if the dominator has other successors, or `block` is not a successor // of the dominator. diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index 4a48fee2fb..a81d49aa0c 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -14,12 +14,12 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "gvn.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 32f6972c84..d55a3ca00b 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -159,7 +159,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, SsaDeadPhiElimination dead_phi(callee_graph); HDeadCodeElimination dce(callee_graph); HConstantFolding fold(callee_graph); - InstructionSimplifier simplify(callee_graph); + InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { &redundant_phi, @@ -176,7 +176,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, if (depth_ + 1 < kDepthLimit) { HInliner inliner( - callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1); + callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1); inliner.Run(); } @@ -221,7 +221,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, // after optimizations get a unique id. graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId()); VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file); - outer_stats_->RecordStat(kInlinedInvoke); + MaybeRecordStat(kInlinedInvoke); return true; } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 07d893e7c9..8e9cf837df 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -35,10 +35,9 @@ class HInliner : public HOptimization { CompilerDriver* compiler_driver, OptimizingCompilerStats* stats, size_t depth = 0) - : HOptimization(outer_graph, true, "inliner"), + : HOptimization(outer_graph, true, "inliner", stats), outer_compilation_unit_(outer_compilation_unit), compiler_driver_(compiler_driver), - outer_stats_(stats), depth_(depth) {} void Run() OVERRIDE; @@ -48,7 +47,6 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; CompilerDriver* const compiler_driver_; - OptimizingCompilerStats* const outer_stats_; const size_t depth_; DISALLOW_COPY_AND_ASSIGN(HInliner); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17c8f337ca..fd99070780 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -16,11 +16,15 @@ #include "instruction_simplifier.h" +#include "mirror/class-inl.h" +#include "scoped_thread_state_change.h" + namespace art { class InstructionSimplifierVisitor : public HGraphVisitor { public: - explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {} + InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), stats_(stats) {} private: void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE; @@ -28,10 +32,14 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitArraySet(HArraySet* equal) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitNullCheck(HNullCheck* instruction) OVERRIDE; + void VisitArrayLength(HArrayLength* instruction) OVERRIDE; + void VisitCheckCast(HCheckCast* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; }; void InstructionSimplifier::Run() { - InstructionSimplifierVisitor visitor(graph_); + InstructionSimplifierVisitor visitor(graph_, stats_); visitor.VisitInsertionOrder(); } @@ -40,6 +48,28 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { if (!obj->CanBeNull()) { null_check->ReplaceWith(obj); null_check->GetBlock()->RemoveInstruction(null_check); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedNullCheck); + } + } +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); + if (!load_class->IsResolved()) { + // If the class couldn't be resolve it's not safe to compare against it. It's + // default type would be Top which might be wider that the actual class type + // and thus producing wrong results. + return; + } + ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ScopedObjectAccess soa(Thread::Current()); + if (class_rti.IsSupertypeOf(obj_rti)) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } } } @@ -75,6 +105,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { } } +void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { + HInstruction* input = instruction->InputAt(0); + // If the array is a NewArray with constant size, replace the array length + // with the constant instruction. This helps the bounds check elimination phase. + if (input->IsNewArray()) { + input = input->InputAt(0); + if (input->IsIntConstant()) { + instruction->ReplaceWith(input); + } + } +} + void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); if (value->GetType() != Primitive::kPrimNot) return; diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index bca6697d05..a7ff755aed 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -19,6 +19,7 @@ #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -27,8 +28,10 @@ namespace art { */ class InstructionSimplifier : public HOptimization { public: - explicit InstructionSimplifier(HGraph* graph, const char* name = "instruction_simplifier") - : HOptimization(graph, true, name) {} + InstructionSimplifier(HGraph* graph, + OptimizingCompilerStats* stats = nullptr, + const char* name = "instruction_simplifier") + : HOptimization(graph, true, name, stats) {} void Run() OVERRIDE; }; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc new file mode 100644 index 0000000000..a82d80af13 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.cc @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_arm.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/arm/assembler_arm.h" + +namespace art { + +namespace arm { + +ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() { + return codegen_->GetAssembler(); +} + +ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +#define __ codegen->GetAssembler()-> + +static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + DCHECK_NE(type, Primitive::kPrimVoid); + + if (Primitive::IsIntegralType(type)) { + if (type == Primitive::kPrimLong) { + Register trg_reg_lo = trg.AsRegisterPairLow<Register>(); + Register trg_reg_hi = trg.AsRegisterPairHigh<Register>(); + Register res_reg_lo = R0; + Register res_reg_hi = R1; + if (trg_reg_lo != res_reg_hi) { + if (trg_reg_lo != res_reg_lo) { + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + } else { + DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi); + } + } else { + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + } + } else { + Register trg_reg = trg.AsRegister<Register>(); + Register res_reg = R0; + if (trg_reg != res_reg) { + __ mov(trg_reg, ShifterOperand(res_reg)); + } + } + } else { + UNIMPLEMENTED(FATAL) << "Floating-point return."; + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathARM : public SlowPathCodeARM { + public: + explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ b(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM); +}; + +#undef __ + +bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovrrd(output.AsRegisterPairLow<Register>(), + output.AsRegisterPairHigh<Register>(), + FromLowSToD(input.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>()); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()), + input.AsRegisterPairLow<Register>(), + input.AsRegisterPairHigh<Register>()); + } else { + __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location out = locations->Out(); + + if (is64bit) { + __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, + bool is64bit, + ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location output = locations->Out(); + + Register mask = locations->GetTemp(0).AsRegister<Register>(); + + if (is64bit) { + Register in_reg_lo = in.AsRegisterPairLow<Register>(); + Register in_reg_hi = in.AsRegisterPairHigh<Register>(); + Register out_reg_lo = output.AsRegisterPairLow<Register>(); + Register out_reg_hi = output.AsRegisterPairHigh<Register>(); + + DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected."; + + __ Asr(mask, in_reg_hi, 31); + __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask)); + __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask)); + __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo)); + __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi)); + } else { + Register in_reg = in.AsRegister<Register>(); + Register out_reg = output.AsRegister<Register>(); + + __ Asr(mask, in_reg, 31); + __ add(out_reg, in_reg, ShifterOperand(mask)); + __ eor(out_reg, mask, ShifterOperand(out_reg)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + + +void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + ArmAssembler* assembler) { + Register op1 = locations->InAt(0).AsRegister<Register>(); + Register op2 = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + __ cmp(op1, ShifterOperand(op2)); + + __ it((is_min) ? Condition::LT : Condition::GT, kItElse); + __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT); + __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + ArmAssembler* assembler = GetAssembler(); + __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>(); + Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>(); + if (addr == lo) { + __ ldr(hi, Address(addr, 4)); + __ ldr(lo, Address(addr, 0)); + } else { + __ ldr(lo, Address(addr, 0)); + __ ldr(hi, Address(addr, 4)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0)); + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4)); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ LoadFromOffset(kLoadWord, + invoke->GetLocations()->Out().AsRegister<Register>(), + TR, + Thread::PeerOffset<kArmPointerSize>().Int32Value()); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorARM* codegen) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK((type == Primitive::kPrimInt) || + (type == Primitive::kPrimLong) || + (type == Primitive::kPrimNot)); + ArmAssembler* assembler = codegen->GetAssembler(); + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + + if (type == Primitive::kPrimLong) { + Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + } else { + Register trg = locations->Out().AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features, + Primitive::Type type, + bool is_volatile, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + if (type == Primitive::kPrimLong) { + // Potentially need temps for ldrexd-strexd loop. + if (is_volatile && !features.HasAtomicLdrdAndStrd()) { + locations->AddTemp(Location::RequiresRegister()); // Temp_lo. + locations->AddTemp(Location::RequiresRegister()); // Temp_hi. + } + } else if (type == Primitive::kPrimNot) { + // Temps for card-marking. + locations->AddTemp(Location::RequiresRegister()); // Temp. + locations->AddTemp(Location::RequiresRegister()); // Card. + } +} + +void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Register value; + + if (is_volatile || is_ordered) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimLong) { + Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); + value = value_lo; + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register temp_lo = locations->GetTemp(0).AsRegister<Register>(); + Register temp_hi = locations->GetTemp(1).AsRegister<Register>(); + Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); + + __ add(IP, base, ShifterOperand(offset)); + Label loop_head; + __ Bind(&loop_head); + __ ldrexd(temp_lo, temp_hi, IP); + __ strexd(temp_lo, value_lo, value_hi, IP); + __ cmp(temp_lo, ShifterOperand(0)); + __ b(&loop_head, NE); + } else { + __ add(IP, base, ShifterOperand(offset)); + __ strd(value_lo, Address(IP)); + } + } else { + value = locations->InAt(3).AsRegister<Register>(); + __ str(value, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimNot) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen->MarkGCCard(temp, card, base, value); + } +} + +void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); +} + +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); // Pointer. + locations->AddTemp(Location::RequiresRegister()); // Temp 1. + locations->AddTemp(Location::RequiresRegister()); // Temp 2. +} + +static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { + DCHECK_NE(type, Primitive::kPrimLong); + + ArmAssembler* assembler = codegen->GetAssembler(); + + Register out = locations->Out().AsRegister<Register>(); // Boolean result. + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + + Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. + Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. + codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo); + } + + // Prevent reordering with prior memory operations. + __ dmb(ISH); + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + Label loop_head; + __ Bind(&loop_head); + + __ ldrex(tmp_lo, tmp_ptr); + + __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + + __ it(EQ, ItState::kItT); + __ strex(tmp_lo, value_lo, tmp_ptr, EQ); + __ cmp(tmp_lo, ShifterOperand(1), EQ); + + __ b(&loop_head, EQ); + + __ dmb(ISH); + + __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ it(CC); + __ mov(out, ShifterOperand(0), CC); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); +} + +void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const MemberOffset value_offset = mirror::String::ValueOffset(); + // Location of count + const MemberOffset count_offset = mirror::String::CountOffset(); + // Starting offset within data array + const MemberOffset offset_offset = mirror::String::OffsetOffset(); + // Start of char data with array_ + const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); + + Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. + Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. + Register out = locations->Out().AsRegister<Register>(); // Result character. + + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register array_temp = locations->GetTemp(1).AsRegister<Register>(); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + + __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length. + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ cmp(idx, ShifterOperand(temp)); + __ b(slow_path->GetEntryLabel(), CS); + + // Index computation. + __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset. + __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset. + __ add(temp, temp, ShifterOperand(idx)); + DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. + __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2)); + + // Load the value. + __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp]. + + __ Bind(slow_path->GetExitLabel()); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) +UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMinLongLong) +UNIMPLEMENTED_INTRINSIC(MathMaxLongLong) +UNIMPLEMENTED_INTRINSIC(MathCeil) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathFloor) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h new file mode 100644 index 0000000000..8bfb7d4686 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class ArmInstructionSetFeatures; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace arm { + +class ArmAssembler; +class CodeGeneratorARM; + +class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features) + : arena_(arena), features_(features) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + const ArmInstructionSetFeatures& features_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM); +}; + +class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + ArmAssembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorARM* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 7a3d7d8389..8874edc341 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { } static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - // We only support FP registers here. LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) { - // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index 10f24d8148..bf9b8e59c5 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -66,8 +66,7 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* input = environment->GetInstructionAt(i); if (input != nullptr && IsPhiOf(input, info->GetHeader())) { - HUseListNode<HEnvironment*>* env_use = environment->GetInstructionEnvUseAt(i); - input->RemoveEnvironmentUser(env_use); + environment->RemoveAsUserOfInput(i); HInstruction* incoming = input->InputAt(0); environment->SetRawEnvAt(i, incoming); incoming->AddEnvUseAt(environment, i); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index eb27965c79..f22b7a7e82 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,6 +16,7 @@ #include <fstream> +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "code_generator.h" @@ -29,7 +30,6 @@ #include "pretty_printer.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index ac8759c805..28000c18f8 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "optimizing_unit_test.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 0558b85b47..17914e8206 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -24,7 +25,6 @@ #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index c9be570c73..907eff162f 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -24,7 +25,6 @@ #include "optimizing_unit_test.h" #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index 990d662d86..4ac1fe8573 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -64,6 +64,13 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); + if (location.IsRegister() || location.IsFpuRegister()) { + os << location.reg(); + } else if (location.IsPair()) { + os << location.low() << ":" << location.high(); + } else if (location.IsStackSlot() || location.IsDoubleStackSlot()) { + os << location.GetStackIndex(); + } return os; } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index bf27c5cf7a..198cc15cce 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -17,10 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_ #define ART_COMPILER_OPTIMIZING_LOCATIONS_H_ +#include "base/arena_object.h" #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/arena_object.h" #include "utils/growable_array.h" namespace art { @@ -151,6 +151,10 @@ class Location : public ValueObject { return GetKind() == kFpuRegisterPair; } + bool IsRegisterKind() const { + return IsRegister() || IsFpuRegister() || IsRegisterPair() || IsFpuRegisterPair(); + } + int reg() const { DCHECK(IsRegister() || IsFpuRegister()); return GetPayload(); @@ -268,6 +272,20 @@ class Location : public ValueObject { return value_ == other.value_; } + bool Contains(Location other) const { + if (Equals(other)) { + return true; + } else if (IsFpuRegisterPair() && other.IsFpuRegister()) { + return other.reg() == low() || other.reg() == high(); + } else if (IsRegisterPair() && other.IsRegister()) { + return other.reg() == low() || other.reg() == high(); + } else if (IsDoubleStackSlot() && other.IsStackSlot()) { + return (GetStackIndex() == other.GetStackIndex()) + || (GetStackIndex() + 4 == other.GetStackIndex()); + } + return false; + } + const char* DebugString() const { switch (GetKind()) { case kInvalid: return "I"; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd36598171..93787b8bfd 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -18,6 +18,7 @@ #include "ssa_builder.h" #include "utils/growable_array.h" +#include "scoped_thread_state_change.h" namespace art { @@ -33,17 +34,14 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { static void RemoveAsUser(HInstruction* instruction) { for (size_t i = 0; i < instruction->InputCount(); i++) { - instruction->InputAt(i)->RemoveUser(instruction, i); + instruction->RemoveAsUserOfInput(i); } HEnvironment* environment = instruction->GetEnvironment(); if (environment != nullptr) { for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i); - if (vreg_env_use != nullptr) { - HInstruction* vreg = environment->GetInstructionAt(i); - DCHECK(vreg != nullptr); - vreg->RemoveEnvironmentUser(vreg_env_use); + if (environment->GetInstructionAt(i) != nullptr) { + environment->RemoveAsUserOfInput(i); } } } @@ -63,22 +61,19 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit } } -void HGraph::RemoveBlock(HBasicBlock* block) const { - for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { - block->GetSuccessors().Get(j)->RemovePredecessor(block); - } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - block->RemovePhi(it.Current()->AsPhi()); - } - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - block->RemoveInstruction(it.Current()); - } -} - void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const { for (size_t i = 0; i < blocks_.Size(); ++i) { if (!visited.IsBitSet(i)) { - RemoveBlock(blocks_.Get(i)); + HBasicBlock* block = blocks_.Get(i); + for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) { + block->GetSuccessors().Get(j)->RemovePredecessor(block); + } + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false); + } } } } @@ -292,6 +287,15 @@ bool HGraph::AnalyzeNaturalLoops() const { return true; } +HNullConstant* HGraph::GetNullConstant() { + if (cached_null_constant_ == nullptr) { + cached_null_constant_ = new (arena_) HNullConstant(); + entry_block_->InsertInstructionBefore(cached_null_constant_, + entry_block_->GetLastInstruction()); + } + return cached_null_constant_; +} + void HLoopInformation::Add(HBasicBlock* block) { blocks_.SetBit(block->GetBlockId()); } @@ -429,22 +433,24 @@ void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) { static void Remove(HInstructionList* instruction_list, HBasicBlock* block, - HInstruction* instruction) { + HInstruction* instruction, + bool ensure_safety) { DCHECK_EQ(block, instruction->GetBlock()); - DCHECK(instruction->GetUses().IsEmpty()); - DCHECK(instruction->GetEnvUses().IsEmpty()); instruction->SetBlock(nullptr); instruction_list->RemoveInstruction(instruction); - - RemoveAsUser(instruction); + if (ensure_safety) { + DCHECK(instruction->GetUses().IsEmpty()); + DCHECK(instruction->GetEnvUses().IsEmpty()); + RemoveAsUser(instruction); + } } -void HBasicBlock::RemoveInstruction(HInstruction* instruction) { - Remove(&instructions_, this, instruction); +void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) { + Remove(&instructions_, this, instruction, ensure_safety); } -void HBasicBlock::RemovePhi(HPhi* phi) { - Remove(&phis_, this, phi); +void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) { + Remove(&phis_, this, phi, ensure_safety); } void HEnvironment::CopyFrom(HEnvironment* env) { @@ -457,15 +463,9 @@ void HEnvironment::CopyFrom(HEnvironment* env) { } } -template <typename T> -static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) { - HUseListNode<T>* current; - for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) { - current = use_it.Current(); - if (current->GetUser() == user && current->GetIndex() == input_index) { - list->Remove(current); - } - } +void HEnvironment::RemoveAsUserOfInput(size_t index) const { + const HUserRecord<HEnvironment*> user_record = vregs_.Get(index); + user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); } HInstruction* HInstruction::GetNextDisregardingMoves() const { @@ -484,14 +484,6 @@ HInstruction* HInstruction::GetPreviousDisregardingMoves() const { return previous; } -void HInstruction::RemoveUser(HInstruction* user, size_t input_index) { - RemoveFromUseList(user, input_index, &uses_); -} - -void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) { - env_uses_.Remove(use); -} - void HInstructionList::AddInstruction(HInstruction* instruction) { if (first_instruction_ == nullptr) { DCHECK(last_instruction_ == nullptr); @@ -602,7 +594,7 @@ void HInstruction::ReplaceWith(HInstruction* other) { } void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { - InputAt(index)->RemoveUser(this, index); + RemoveAsUserOfInput(index); SetRawInputAt(index, replacement); replacement->AddUseAt(this, index); } @@ -613,7 +605,7 @@ size_t HInstruction::EnvironmentSize() const { void HPhi::AddInput(HInstruction* input) { DCHECK(input->GetBlock() != nullptr); - inputs_.Add(input); + inputs_.Add(HUserRecord<HInstruction*>(input)); input->AddUseAt(this, inputs_.Size() - 1); } @@ -990,4 +982,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { + ScopedObjectAccess soa(Thread::Current()); + os << "[" + << " is_top=" << rhs.IsTop() + << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get())) + << " is_exact=" << rhs.IsExact() + << " ]"; + return os; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 30d869d026..de448cc483 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -17,23 +17,28 @@ #ifndef ART_COMPILER_OPTIMIZING_NODES_H_ #define ART_COMPILER_OPTIMIZING_NODES_H_ +#include "base/arena_object.h" #include "entrypoints/quick/quick_entrypoints_enum.h" +#include "handle.h" +#include "handle_scope.h" #include "invoke_type.h" #include "locations.h" +#include "mirror/class.h" #include "offsets.h" #include "primitive.h" -#include "utils/arena_object.h" #include "utils/arena_bit_vector.h" #include "utils/growable_array.h" namespace art { +class GraphChecker; class HBasicBlock; class HEnvironment; class HInstruction; class HIntConstant; class HInvoke; class HGraphVisitor; +class HNullConstant; class HPhi; class HSuspendCheck; class LiveInterval; @@ -194,6 +199,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { return reverse_post_order_; } + HNullConstant* GetNullConstant(); + private: HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const; void VisitBlockForDominatorTree(HBasicBlock* block, @@ -205,7 +212,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { ArenaBitVector* visiting); void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const; void RemoveDeadBlocks(const ArenaBitVector& visited) const; - void RemoveBlock(HBasicBlock* block) const; ArenaAllocator* const arena_; @@ -233,6 +239,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // The current id to assign to a newly added instruction. See HInstruction.id_. int32_t current_instruction_id_; + // Cached null constant that might be created when building SSA form. + HNullConstant* cached_null_constant_; + ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); DISALLOW_COPY_AND_ASSIGN(HGraph); }; @@ -481,14 +490,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { void ReplaceWith(HBasicBlock* other); void AddInstruction(HInstruction* instruction); - void RemoveInstruction(HInstruction* instruction); void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); void AddPhi(HPhi* phi); void InsertPhiAfter(HPhi* instruction, HPhi* cursor); - void RemovePhi(HPhi* phi); + // RemoveInstruction and RemovePhi delete a given instruction from the respective + // instruction list. With 'ensure_safety' set to true, it verifies that the + // instruction is not in use and removes it from the use lists of its inputs. + void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true); + void RemovePhi(HPhi* phi, bool ensure_safety = true); bool IsLoopHeader() const { return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this); @@ -574,6 +586,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(ArrayLength, Instruction) \ M(ArraySet, Instruction) \ M(BoundsCheck, Instruction) \ + M(BoundType, Instruction) \ M(CheckCast, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ @@ -610,6 +623,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> { M(NewInstance, Instruction) \ M(Not, UnaryOperation) \ M(NotEqual, Condition) \ + M(NullConstant, Instruction) \ M(NullCheck, Instruction) \ M(Or, BinaryOperation) \ M(ParallelMove, Instruction) \ @@ -704,6 +718,9 @@ class HUseList : public ValueObject { } void Remove(HUseListNode<T>* node) { + DCHECK(node != nullptr); + DCHECK(Contains(node)); + if (node->prev_ != nullptr) { node->prev_->next_ = node->next_; } @@ -715,6 +732,18 @@ class HUseList : public ValueObject { } } + bool Contains(const HUseListNode<T>* node) const { + if (node == nullptr) { + return false; + } + for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) { + if (current == node) { + return true; + } + } + return false; + } + bool IsEmpty() const { return first_ == nullptr; } @@ -750,6 +779,33 @@ class HUseIterator : public ValueObject { friend class HValue; }; +// This class is used by HEnvironment and HInstruction classes to record the +// instructions they use and pointers to the corresponding HUseListNodes kept +// by the used instructions. +template <typename T> +class HUserRecord : public ValueObject { + public: + HUserRecord() : instruction_(nullptr), use_node_(nullptr) {} + explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {} + + HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node) + : instruction_(old_record.instruction_), use_node_(use_node) { + DCHECK(instruction_ != nullptr); + DCHECK(use_node_ != nullptr); + DCHECK(old_record.use_node_ == nullptr); + } + + HInstruction* GetInstruction() const { return instruction_; } + HUseListNode<T>* GetUseNode() const { return use_node_; } + + private: + // Instruction used by the user. + HInstruction* instruction_; + + // Corresponding entry in the use list kept by 'instruction_'. + HUseListNode<T>* use_node_; +}; + // Represents the side effects an instruction may have. class SideEffects : public ValueObject { public: @@ -820,50 +876,118 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> { : vregs_(arena, number_of_vregs) { vregs_.SetSize(number_of_vregs); for (size_t i = 0; i < number_of_vregs; i++) { - vregs_.Put(i, VRegInfo(nullptr, nullptr)); + vregs_.Put(i, HUserRecord<HEnvironment*>()); } } void CopyFrom(HEnvironment* env); void SetRawEnvAt(size_t index, HInstruction* instruction) { - vregs_.Put(index, VRegInfo(instruction, nullptr)); + vregs_.Put(index, HUserRecord<HEnvironment*>(instruction)); } + HInstruction* GetInstructionAt(size_t index) const { + return vregs_.Get(index).GetInstruction(); + } + + void RemoveAsUserOfInput(size_t index) const; + + size_t Size() const { return vregs_.Size(); } + + private: // Record instructions' use entries of this environment for constant-time removal. + // It should only be called by HInstruction when a new environment use is added. void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) { DCHECK(env_use->GetUser() == this); size_t index = env_use->GetIndex(); - VRegInfo info = vregs_.Get(index); - DCHECK(info.vreg_ != nullptr); - DCHECK(info.node_ == nullptr); - vregs_.Put(index, VRegInfo(info.vreg_, env_use)); + vregs_.Put(index, HUserRecord<HEnvironment*>(vregs_.Get(index), env_use)); } - HInstruction* GetInstructionAt(size_t index) const { - return vregs_.Get(index).vreg_; + GrowableArray<HUserRecord<HEnvironment*> > vregs_; + + friend HInstruction; + + DISALLOW_COPY_AND_ASSIGN(HEnvironment); +}; + +class ReferenceTypeInfo : ValueObject { + public: + typedef Handle<mirror::Class> TypeHandle; + + static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (type_handle->IsObjectClass()) { + // Override the type handle to be consistent with the case when we get to + // Top but don't have the Object class available. It avoids having to guess + // what value the type_handle has when it's Top. + return ReferenceTypeInfo(TypeHandle(), is_exact, true); + } else { + return ReferenceTypeInfo(type_handle, is_exact, false); + } } - HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const { - return vregs_.Get(index).node_; + static ReferenceTypeInfo CreateTop(bool is_exact) { + return ReferenceTypeInfo(TypeHandle(), is_exact, true); } - size_t Size() const { return vregs_.Size(); } + bool IsExact() const { return is_exact_; } + bool IsTop() const { return is_top_; } - private: - struct VRegInfo { - HInstruction* vreg_; - HUseListNode<HEnvironment*>* node_; + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use) - : vreg_(instruction), node_(env_use) {} - }; + bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsTop()) { + // Top (equivalent for java.lang.Object) is supertype of anything. + return true; + } + if (rti.IsTop()) { + // If we get here `this` is not Top() so it can't be a supertype. + return false; + } + return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } - GrowableArray<VRegInfo> vregs_; + // Returns true if the type information provide the same amount of details. + // Note that it does not mean that the instructions have the same actual type + // (e.g. tops are equal but they can be the result of a merge). + bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsExact() != rti.IsExact()) { + return false; + } + if (IsTop() && rti.IsTop()) { + // `Top` means java.lang.Object, so the types are equivalent. + return true; + } + if (IsTop() || rti.IsTop()) { + // If only one is top or object than they are not equivalent. + // NB: We need this extra check because the type_handle of `Top` is invalid + // and we cannot inspect its reference. + return false; + } - DISALLOW_COPY_AND_ASSIGN(HEnvironment); + // Finally check the types. + return GetTypeHandle().Get() == rti.GetTypeHandle().Get(); + } + + private: + ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {} + ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top) + : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {} + + // The class of the object. + TypeHandle type_handle_; + // Whether or not the type is exact or a superclass of the actual type. + // Whether or not we have any information about this type. + bool is_exact_; + // A true value here means that the object type should be java.lang.Object. + // We don't have access to the corresponding mirror object every time so this + // flag acts as a substitute. When true, the TypeHandle refers to a null + // pointer and should not be used. + bool is_top_; }; +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); + class HInstruction : public ArenaObject<kArenaAllocMisc> { public: explicit HInstruction(SideEffects side_effects) @@ -876,7 +1000,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { locations_(nullptr), live_interval_(nullptr), lifetime_position_(kNoLifetime), - side_effects_(side_effects) {} + side_effects_(side_effects), + reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {} virtual ~HInstruction() {} @@ -899,13 +1024,15 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); } virtual size_t InputCount() const = 0; - virtual HInstruction* InputAt(size_t i) const = 0; + HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); } virtual void Accept(HGraphVisitor* visitor) = 0; virtual const char* DebugName() const = 0; virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; } - virtual void SetRawInputAt(size_t index, HInstruction* input) = 0; + void SetRawInputAt(size_t index, HInstruction* input) { + SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input)); + } virtual bool NeedsEnvironment() const { return false; } virtual bool IsControlFlow() const { return false; } @@ -914,12 +1041,24 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { // Does not apply for all instructions, but having this at top level greatly // simplifies the null check elimination. - virtual bool CanBeNull() const { return true; } + virtual bool CanBeNull() const { + DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types"; + return true; + } virtual bool CanDoImplicitNullCheck() const { return false; } + void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) { + reference_type_info_ = reference_type_info; + } + + ReferenceTypeInfo GetReferenceTypeInfo() const { return reference_type_info_; } + void AddUseAt(HInstruction* user, size_t index) { - uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); + DCHECK(user != nullptr); + HUseListNode<HInstruction*>* use = + uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena()); + user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use)); } void AddEnvUseAt(HEnvironment* user, size_t index) { @@ -929,11 +1068,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { user->RecordEnvUse(env_use); } - void RemoveUser(HInstruction* user, size_t index); - void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use); + void RemoveAsUserOfInput(size_t input) { + HUserRecord<HInstruction*> input_use = InputRecordAt(input); + input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode()); + } - const HUseList<HInstruction*>& GetUses() { return uses_; } - const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; } + const HUseList<HInstruction*>& GetUses() const { return uses_; } + const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; } bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); } bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); } @@ -1015,7 +1156,25 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; } bool HasLiveInterval() const { return live_interval_ != nullptr; } + bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); } + + // Returns whether the code generation of the instruction will require to have access + // to the current method. Such instructions are: + // (1): Instructions that require an environment, as calling the runtime requires + // to walk the stack and have the current method stored at a specific stack address. + // (2): Object literals like classes and strings, that are loaded from the dex cache + // fields of the current method. + bool NeedsCurrentMethod() const { + return NeedsEnvironment() || IsLoadClass() || IsLoadString(); + } + + protected: + virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0; + virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0; + private: + void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); } + HInstruction* previous_; HInstruction* next_; HBasicBlock* block_; @@ -1050,7 +1209,12 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { const SideEffects side_effects_; + // TODO: for primitive types this should be marked as invalid. + ReferenceTypeInfo reference_type_info_; + + friend class GraphChecker; friend class HBasicBlock; + friend class HEnvironment; friend class HGraph; friend class HInstructionList; @@ -1170,15 +1334,16 @@ class HTemplateInstruction: public HInstruction { virtual ~HTemplateInstruction() {} virtual size_t InputCount() const { return N; } - virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; } protected: - virtual void SetRawInputAt(size_t i, HInstruction* instruction) { - inputs_[i] = instruction; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; } + + void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_[i] = input; } private: - EmbeddedArray<HInstruction*, N> inputs_; + EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_; friend class SsaBuilder; }; @@ -1663,6 +1828,22 @@ class HDoubleConstant : public HConstant { DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; +class HNullConstant : public HConstant { + public: + HNullConstant() : HConstant(Primitive::kPrimNot) {} + + bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + + size_t ComputeHashCode() const OVERRIDE { return 0; } + + DECLARE_INSTRUCTION(NullConstant); + + private: + DISALLOW_COPY_AND_ASSIGN(HNullConstant); +}; + // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). class HIntConstant : public HConstant { @@ -1718,7 +1899,6 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic); class HInvoke : public HInstruction { public: virtual size_t InputCount() const { return inputs_.Size(); } - virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); } // Runtime needs to walk the stack, so Dex -> Dex calls need to // know their environment. @@ -1728,10 +1908,6 @@ class HInvoke : public HInstruction { SetRawInputAt(index, argument); } - virtual void SetRawInputAt(size_t index, HInstruction* input) { - inputs_.Put(index, input); - } - virtual Primitive::Type GetType() const { return return_type_; } uint32_t GetDexPc() const { return dex_pc_; } @@ -1763,7 +1939,12 @@ class HInvoke : public HInstruction { inputs_.SetSize(number_of_arguments); } - GrowableArray<HInstruction*> inputs_; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } + void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_.Put(index, input); + } + + GrowableArray<HUserRecord<HInstruction*> > inputs_; const Primitive::Type return_type_; const uint32_t dex_pc_; const uint32_t dex_method_index_; @@ -2259,11 +2440,6 @@ class HPhi : public HInstruction { } size_t InputCount() const OVERRIDE { return inputs_.Size(); } - HInstruction* InputAt(size_t i) const OVERRIDE { return inputs_.Get(i); } - - void SetRawInputAt(size_t index, HInstruction* input) OVERRIDE { - inputs_.Put(index, input); - } void AddInput(HInstruction* input); @@ -2282,8 +2458,15 @@ class HPhi : public HInstruction { DECLARE_INSTRUCTION(Phi); + protected: + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); } + + void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE { + inputs_.Put(index, input); + } + private: - GrowableArray<HInstruction*> inputs_; + GrowableArray<HUserRecord<HInstruction*> > inputs_; const uint32_t reg_number_; Primitive::Type type_; bool is_live_; @@ -2608,7 +2791,8 @@ class HLoadClass : public HExpression<0> { type_index_(type_index), is_referrers_class_(is_referrers_class), dex_pc_(dex_pc), - generate_clinit_check_(false) {} + generate_clinit_check_(false), + loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {} bool CanBeMoved() const OVERRIDE { return true; } @@ -2646,6 +2830,20 @@ class HLoadClass : public HExpression<0> { return !is_referrers_class_; } + ReferenceTypeInfo GetLoadedClassRTI() { + return loaded_class_rti_; + } + + void SetLoadedClassRTI(ReferenceTypeInfo rti) { + // Make sure we only set exact types (the loaded class should never be merged). + DCHECK(rti.IsExact()); + loaded_class_rti_ = rti; + } + + bool IsResolved() { + return loaded_class_rti_.IsExact(); + } + DECLARE_INSTRUCTION(LoadClass); private: @@ -2656,6 +2854,8 @@ class HLoadClass : public HExpression<0> { // Used for code generation. bool generate_clinit_check_; + ReferenceTypeInfo loaded_class_rti_; + DISALLOW_COPY_AND_ASSIGN(HLoadClass); }; @@ -2858,6 +3058,32 @@ class HInstanceOf : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HInstanceOf); }; +class HBoundType : public HExpression<1> { + public: + HBoundType(HInstruction* input, ReferenceTypeInfo bound_type) + : HExpression(Primitive::kPrimNot, SideEffects::None()), + bound_type_(bound_type) { + SetRawInputAt(0, input); + } + + const ReferenceTypeInfo& GetBoundType() const { return bound_type_; } + + bool CanBeNull() const OVERRIDE { + // `null instanceof ClassX` always return false so we can't be null. + return false; + } + + DECLARE_INSTRUCTION(BoundType); + + private: + // Encodes the most upper class that this instruction can have. In other words + // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()). + // It is used to bound the type in cases like `if (x instanceof ClassX) {}` + const ReferenceTypeInfo bound_type_; + + DISALLOW_COPY_AND_ASSIGN(HBoundType); +}; + class HCheckCast : public HTemplateInstruction<2> { public: HCheckCast(HInstruction* object, @@ -2959,7 +3185,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> { // True if this blocks a move from the given location. bool Blocks(Location loc) const { - return !IsEliminated() && source_.Equals(loc); + return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_)); } // A move is redundant if it's been eliminated, if its source and @@ -3000,46 +3226,19 @@ class HParallelMove : public HTemplateInstruction<0> { void AddMove(Location source, Location destination, HInstruction* instruction) { DCHECK(source.IsValid()); DCHECK(destination.IsValid()); - // The parallel move resolver does not handle pairs. So we decompose the - // pair locations into two moves. - if (source.IsPair() && destination.IsPair()) { - AddMove(source.ToLow(), destination.ToLow(), instruction); - AddMove(source.ToHigh(), destination.ToHigh(), nullptr); - } else if (source.IsPair()) { - DCHECK(destination.IsDoubleStackSlot()) << destination; - AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction); - AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr); - } else if (destination.IsPair()) { - if (source.IsConstant()) { - // We put the same constant in the move. The code generator will handle which - // low or high part to use. - AddMove(source, destination.ToLow(), instruction); - AddMove(source, destination.ToHigh(), nullptr); - } else { - DCHECK(source.IsDoubleStackSlot()); - AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction); - // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to - // always be 4. - static constexpr int kHighOffset = 4; - AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)), - destination.ToHigh(), - nullptr); - } - } else { - if (kIsDebugBuild) { - if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) - << "Doing parallel moves for the same instruction."; - } - } + if (kIsDebugBuild) { + if (instruction != nullptr) { for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) - << "Same destination for two moves in a parallel move."; + DCHECK_NE(moves_.Get(i).GetInstruction(), instruction) + << "Doing parallel moves for the same instruction."; } } - moves_.Add(MoveOperands(source, destination, instruction)); + for (size_t i = 0, e = moves_.Size(); i < e; ++i) { + DCHECK(!destination.Equals(moves_.Get(i).GetDestination())) + << "Same destination for two moves in a parallel move."; + } } + moves_.Add(MoveOperands(source, destination, instruction)); } MoveOperands* MoveOperandsAt(size_t index) const { diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 5dbdc74924..4cf22d3b2e 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -14,8 +14,8 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "nodes.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index b99f6784f7..b13e07eb22 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -21,6 +21,12 @@ namespace art { +void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const { + if (stats_ != nullptr) { + stats_->RecordStat(compilation_stat); + } +} + void HOptimization::Check() { if (kIsDebugBuild) { if (is_in_ssa_form_) { diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index d9e082a7f3..af39e092c7 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #include "nodes.h" +#include "optimizing_compiler_stats.h" namespace art { @@ -34,8 +35,10 @@ class HOptimization : public ValueObject { public: HOptimization(HGraph* graph, bool is_in_ssa_form, - const char* pass_name) + const char* pass_name, + OptimizingCompilerStats* stats = nullptr) : graph_(graph), + stats_(stats), is_in_ssa_form_(is_in_ssa_form), pass_name_(pass_name) {} @@ -51,7 +54,11 @@ class HOptimization : public ValueObject { void Check(); protected: + void MaybeRecordStat(MethodCompilationStat compilation_stat) const; + HGraph* const graph_; + // Used to record stats about the optimization. + OptimizingCompilerStats* const stats_; private: // Does the analyzed graph use the SSA form? diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c518f33f53..2fef8c7b3a 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -19,6 +19,7 @@ #include <fstream> #include <stdint.h> +#include "base/arena_allocator.h" #include "base/dumpable.h" #include "base/timing_logger.h" #include "bounds_check_elimination.h" @@ -47,7 +48,6 @@ #include "ssa_phi_elimination.h" #include "ssa_liveness_analysis.h" #include "reference_type_propagation.h" -#include "utils/arena_allocator.h" namespace art { @@ -201,6 +201,7 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info) const; @@ -293,13 +294,15 @@ static void RunOptimizations(HOptimization* optimizations[], static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, - PassInfoPrinter* pass_info_printer) { + PassInfoPrinter* pass_info_printer, + StackHandleScopeCollection* handles) { SsaRedundantPhiElimination redundant_phi(graph); SsaDeadPhiElimination dead_phi(graph); HDeadCodeElimination dce(graph); HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph); + InstructionSimplifier simplify1(graph, stats); HInliner inliner(graph, dex_compilation_unit, driver, stats); @@ -308,8 +311,8 @@ static void RunOptimizations(HGraph* graph, GVNOptimization gvn(graph, side_effects); LICM licm(graph, side_effects); BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph); - InstructionSimplifier simplify2(graph, "instruction_simplifier_after_types"); + ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles); + InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver); @@ -348,10 +351,12 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) { CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, CodeGenerator* codegen, CompilerDriver* compiler_driver, + const DexFile& dex_file, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer) const { - RunOptimizations( - graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info_printer); + StackHandleScopeCollection handles(Thread::Current()); + RunOptimizations(graph, compiler_driver, &compilation_stats_, + dex_file, dex_compilation_unit, pass_info_printer, &handles); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -376,7 +381,10 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, compiler_driver, codegen->GetInstructionSet(), ArrayRef<const uint8_t>(allocator.GetMemory()), - codegen->GetFrameSize(), + // Follow Quick's behavior and set the frame size to zero if it is + // considered "empty" (see the definition of + // art::CodeGenerator::HasEmptyFrame). + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), ArrayRef<const uint8_t>(stack_map)); @@ -400,17 +408,21 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, - codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), - codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - &src_mapping_table, - AlignVectorSize(mapping_table), - AlignVectorSize(vmap_table), - AlignVectorSize(gc_map), - ArrayRef<const uint8_t>()); + return CompiledMethod::SwapAllocCompiledMethod( + compiler_driver, + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(allocator.GetMemory()), + // Follow Quick's behavior and set the frame size to zero if it is + // considered "empty" (see the definition of + // art::CodeGenerator::HasEmptyFrame). + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + &src_mapping_table, + AlignVectorSize(mapping_table), + AlignVectorSize(vmap_table), + AlignVectorSize(gc_map), + ArrayRef<const uint8_t>()); } CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, @@ -508,6 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return CompileOptimized(graph, codegen.get(), compiler_driver, + dex_file, dex_compilation_unit, &pass_info_printer); } else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) { diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index cc2723df99..3ebf0f8cd2 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -43,6 +43,8 @@ enum MethodCompilationStat { kNotCompiledCantAccesType, kNotOptimizedRegisterAllocator, kNotCompiledUnhandledInstruction, + kRemovedCheckedCast, + kRemovedNullCheck, kLastStat }; @@ -96,6 +98,8 @@ class OptimizingCompilerStats { case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType"; case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; + case kRemovedCheckedCast: return "kRemovedCheckedCast"; + case kRemovedNullCheck: return "kRemovedNullCheck"; default: LOG(FATAL) << "invalid stat"; } return ""; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index debe466560..7d0641ec13 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -57,17 +57,49 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { // unallocated, or the move was already eliminated). for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); - // The parallel move resolver algorithm does not work with register pairs. - DCHECK(!move->GetSource().IsPair()); - DCHECK(!move->GetDestination().IsPair()); if (!move->IsRedundant()) { moves_.Add(move); } } } +// Update the source of `move`, knowing that `updated_location` has been swapped +// with `new_source`. Note that `updated_location` can be a pair, therefore if +// `move` is non-pair, we need to extract which register to use. +static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) { + Location source = move->GetSource(); + if (new_source.GetKind() == source.GetKind()) { + DCHECK(updated_location.Equals(source)); + move->SetSource(new_source); + } else if (new_source.IsStackSlot() + || new_source.IsDoubleStackSlot() + || source.IsStackSlot() + || source.IsDoubleStackSlot()) { + // Stack slots never take part of a pair/non-pair swap. + DCHECK(updated_location.Equals(source)); + move->SetSource(new_source); + } else if (source.IsRegister()) { + DCHECK(new_source.IsRegisterPair()) << new_source; + DCHECK(updated_location.IsRegisterPair()) << updated_location; + if (updated_location.low() == source.reg()) { + move->SetSource(Location::RegisterLocation(new_source.low())); + } else { + DCHECK_EQ(updated_location.high(), source.reg()); + move->SetSource(Location::RegisterLocation(new_source.high())); + } + } else if (source.IsFpuRegister()) { + DCHECK(new_source.IsFpuRegisterPair()) << new_source; + DCHECK(updated_location.IsFpuRegisterPair()) << updated_location; + if (updated_location.low() == source.reg()) { + move->SetSource(Location::FpuRegisterLocation(new_source.low())); + } else { + DCHECK_EQ(updated_location.high(), source.reg()); + move->SetSource(Location::FpuRegisterLocation(new_source.high())); + } + } +} -void ParallelMoveResolver::PerformMove(size_t index) { +MoveOperands* ParallelMoveResolver::PerformMove(size_t index) { // Each call to this function performs a move and deletes it from the move // graph. We first recursively perform any move blocking this one. We // mark a move as "pending" on entry to PerformMove in order to detect @@ -75,35 +107,59 @@ void ParallelMoveResolver::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - DCHECK(!moves_.Get(index)->IsPending()); - DCHECK(!moves_.Get(index)->IsRedundant()); + MoveOperands* move = moves_.Get(index); + DCHECK(!move->IsPending()); + if (move->IsRedundant()) { + // Because we swap register pairs first, following, un-pending + // moves may become redundant. + move->Eliminate(); + return nullptr; + } // Clear this move's destination to indicate a pending move. The actual // destination is saved in a stack-allocated local. Recursion may allow // multiple moves to be pending. - DCHECK(!moves_.Get(index)->GetSource().IsInvalid()); - Location destination = moves_.Get(index)->MarkPending(); + DCHECK(!move->GetSource().IsInvalid()); + Location destination = move->MarkPending(); // Perform a depth-first traversal of the move graph to resolve // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. + MoveOperands* required_swap = nullptr; for (size_t i = 0; i < moves_.Size(); ++i) { const MoveOperands& other_move = *moves_.Get(i); if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, - // this call cannot create a blocking move via a swap (this loop does - // not miss any). Assume there is a non-blocking move with source A + // calling `PerformMove` cannot create a blocking move via a swap + // (this loop does not miss any). + // For example, assume there is a non-blocking move with source A // and this move is blocked on source B and there is a swap of A and // B. Then A and B must be involved in the same cycle (or they would // not be swapped). Since this move's destination is B and there is // only a single incoming edge to an operand, this move must also be // involved in the same cycle. In that case, the blocking move will // be created but will be "pending" when we return from PerformMove. - PerformMove(i); + required_swap = PerformMove(i); + + if (required_swap == move) { + // If this move is required to swap, we do so without looking + // at the next moves. Swapping is not blocked by anything, it just + // updates other moves's source. + break; + } else if (required_swap == moves_.Get(i)) { + // If `other_move` was swapped, we iterate again to find a new + // potential cycle. + required_swap = nullptr; + i = 0; + } else if (required_swap != nullptr) { + // A move is required to swap. We walk back the cycle to find the + // move by just returning from this `PerforrmMove`. + moves_.Get(index)->ClearPending(destination); + return required_swap; + } } } - MoveOperands* move = moves_.Get(index); // We are about to resolve this move and don't need it marked as // pending, so restore its destination. @@ -113,19 +169,30 @@ void ParallelMoveResolver::PerformMove(size_t index) { // so it may now be the last move in the cycle. If so remove it. if (move->GetSource().Equals(destination)) { move->Eliminate(); - return; + DCHECK(required_swap == nullptr); + return nullptr; } // The move may be blocked on a (at most one) pending move, in which case // we have a cycle. Search for such a blocking move and perform a swap to // resolve it. bool do_swap = false; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - do_swap = true; - break; + if (required_swap != nullptr) { + DCHECK_EQ(required_swap, move); + do_swap = true; + } else { + for (size_t i = 0; i < moves_.Size(); ++i) { + const MoveOperands& other_move = *moves_.Get(i); + if (other_move.Blocks(destination)) { + DCHECK(other_move.IsPending()); + if (!destination.IsPair() && other_move.GetSource().IsPair()) { + // We swap pairs before swapping non-pairs. Go back from the + // cycle by returning the pair that must be swapped. + return moves_.Get(i); + } + do_swap = true; + break; + } } } @@ -140,15 +207,21 @@ void ParallelMoveResolver::PerformMove(size_t index) { for (size_t i = 0; i < moves_.Size(); ++i) { const MoveOperands& other_move = *moves_.Get(i); if (other_move.Blocks(source)) { - moves_.Get(i)->SetSource(swap_destination); + UpdateSourceOf(moves_.Get(i), source, swap_destination); } else if (other_move.Blocks(swap_destination)) { - moves_.Get(i)->SetSource(source); + UpdateSourceOf(moves_.Get(i), swap_destination, source); } } + // If the swap was required because of a pair in the middle of a cycle, + // we return the swapped move, so that the caller knows it needs to re-iterate + // its dependency loop. + return required_swap; } else { // This move is not blocked. EmitMove(index); move->Eliminate(); + DCHECK(required_swap == nullptr); + return nullptr; } } diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 7ec1dd2deb..3fa1b37afd 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -83,7 +83,15 @@ class ParallelMoveResolver : public ValueObject { // Perform the move at the moves_ index in question (possibly requiring // other moves to satisfy dependencies). - void PerformMove(size_t index); + // + // Return whether another move in the dependency cycle needs to swap. This + // is to handle pair swaps, where we want the pair to swap first to avoid + // building pairs that are unexpected by the code generator. For example, if + // we were to swap R1 with R2, we would need to update all locations using + // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make + // the code generator understand such pairs, but it's easier and cleaner to + // just not create such pairs and exchange pairs in priority. + MoveOperands* PerformMove(size_t index); DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); }; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 28b5697bbd..44a3da2817 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -14,9 +14,9 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" @@ -165,7 +165,7 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterPairLocation(2, 3), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); } { @@ -180,7 +180,7 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterLocation(4), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); } { @@ -195,7 +195,89 @@ TEST(ParallelMoveTest, Pairs) { Location::RegisterLocation(0), nullptr); resolver.EmitNativeCode(moves); - ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str()); + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterLocation(2), + Location::RegisterLocation(7), + nullptr); + moves->AddMove( + Location::RegisterLocation(7), + Location::RegisterLocation(1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str()); + } + { + TestParallelMoveResolver resolver(&allocator); + HParallelMove* moves = new (&allocator) HParallelMove(&allocator); + moves->AddMove( + Location::RegisterPairLocation(2, 3), + Location::RegisterPairLocation(0, 1), + nullptr); + moves->AddMove( + Location::RegisterPairLocation(0, 1), + Location::RegisterPairLocation(2, 3), + nullptr); + resolver.EmitNativeCode(moves); + ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str()); } } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 12acd0884a..2d9a2bf330 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -42,6 +42,11 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); } +void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) { + bound_type->ReplaceWith(bound_type->InputAt(0)); + bound_type->GetBlock()->RemoveInstruction(bound_type); +} + void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { HLoadClass* cls = check->GetLoadClass(); check->ReplaceWith(cls); diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 0fdb65ffe0..0f697fbc25 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -36,6 +36,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { virtual void VisitNullCheck(HNullCheck* check) OVERRIDE; virtual void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; + virtual void VisitBoundType(HBoundType* bound_type) OVERRIDE; virtual void VisitClinitCheck(HClinitCheck* check) OVERRIDE; virtual void VisitCondition(HCondition* condition) OVERRIDE; diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 9cf8235d85..293fde978e 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "dex_file.h" @@ -21,7 +22,6 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc index 7e274f6ebf..fe23fcf326 100644 --- a/compiler/optimizing/primitive_type_propagation.cc +++ b/compiler/optimizing/primitive_type_propagation.cc @@ -40,6 +40,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_ // Re-compute and update the type of the instruction. Returns // whether or not the type was changed. bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { + DCHECK(phi->IsLive()); Primitive::Type existing = phi->GetType(); Primitive::Type new_type = existing; @@ -49,15 +50,20 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { } phi->SetType(new_type); - if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) { + if (new_type == Primitive::kPrimDouble + || new_type == Primitive::kPrimFloat + || new_type == Primitive::kPrimNot) { // If the phi is of floating point type, we need to update its inputs to that // type. For inputs that are phis, we need to recompute their types. for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { HInstruction* input = phi->InputAt(i); if (input->GetType() != new_type) { - HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); + HInstruction* equivalent = (new_type == Primitive::kPrimNot) + ? SsaBuilder::GetReferenceTypeEquivalent(input) + : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); phi->ReplaceInput(equivalent, i); if (equivalent->IsPhi()) { + equivalent->AsPhi()->SetLive(); AddToWorklist(equivalent->AsPhi()); } } @@ -78,15 +84,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { if (block->IsLoopHeader()) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - // Set the initial type for the phi. Use the non back edge input for reaching - // a fixed point faster. - Primitive::Type phi_type = phi->GetType(); - // We merge with the existing type, that has been set by the SSA builder. - DCHECK(phi_type == Primitive::kPrimVoid - || phi_type == Primitive::kPrimFloat - || phi_type == Primitive::kPrimDouble); - phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType())); - AddToWorklist(phi); + if (phi->IsLive()) { + AddToWorklist(phi); + } } } else { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { @@ -95,7 +95,10 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { // doing a reverse post-order visit, therefore either the phi users are // non-loop phi and will be visited later in the visit, or are loop-phis, // and they are already in the work list. - UpdateType(it.Current()->AsPhi()); + HPhi* phi = it.Current()->AsPhi(); + if (phi->IsLive()) { + UpdateType(phi); + } } } } @@ -110,13 +113,14 @@ void PrimitiveTypePropagation::ProcessWorklist() { } void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { + DCHECK(instruction->IsLive()); worklist_.Add(instruction); } void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr) { + if (phi != nullptr && phi->IsLive()) { AddToWorklist(phi); } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 24e6837f45..76b8d7eacf 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -16,16 +16,17 @@ #include "reference_type_propagation.h" +#include "class_linker.h" +#include "mirror/class-inl.h" +#include "mirror/dex_cache.h" +#include "scoped_thread_state_change.h" + namespace art { -// TODO: Only do the analysis on reference types. We currently have to handle -// the `null` constant, that is represented as a `HIntConstant` and therefore -// has the Primitive::kPrimInt type. +// TODO: handle: a !=/== null. void ReferenceTypePropagation::Run() { - // Compute null status for instructions. - - // To properly propagate not-null info we need to visit in the dominator-based order. + // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { @@ -34,9 +35,210 @@ void ReferenceTypePropagation::Run() { ProcessWorklist(); } +void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { + // TODO: handle other instructions that give type info + // (NewArray/Call/Field accesses/array accesses) + + // Initialize exact types first for faster convergence. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (instr->IsNewInstance()) { + VisitNewInstance(instr->AsNewInstance()); + } else if (instr->IsLoadClass()) { + VisitLoadClass(instr->AsLoadClass()); + } + } + + // Handle Phis. + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + VisitPhi(it.Current()->AsPhi()); + } + + // Add extra nodes to bound types. + BoundTypeForIfInstanceOf(block); +} + +// Detects if `block` is the True block for the pattern +// `if (x instanceof ClassX) { }` +// If that's the case insert an HBoundType instruction to bound the type of `x` +// to `ClassX` in the scope of the dominated blocks. +void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { + HInstruction* lastInstruction = block->GetLastInstruction(); + if (!lastInstruction->IsIf()) { + return; + } + HInstruction* ifInput = lastInstruction->InputAt(0); + // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual). + if (!ifInput->IsEqual()) { + return; + } + HInstruction* instanceOf = ifInput->InputAt(0); + HInstruction* comp_value = ifInput->InputAt(1); + if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) { + return; + } + + HInstruction* obj = instanceOf->InputAt(0); + HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); + + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti); + + // Narrow the type as much as possible. + { + ScopedObjectAccess soa(Thread::Current()); + if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) { + bound_type->SetReferenceTypeInfo(obj_rti); + } else { + bound_type->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + } + + block->InsertInstructionBefore(bound_type, lastInstruction); + // Pick the right successor based on the value we compare against. + HIntConstant* comp_value_int = comp_value->AsIntConstant(); + HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0 + ? lastInstruction->AsIf()->IfFalseSuccessor() + : lastInstruction->AsIf()->IfTrueSuccessor(); + + for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (instanceOfTrueBlock->Dominates(user->GetBlock())) { + user->ReplaceInput(bound_type, it.Current()->GetIndex()); + } + } +} + +void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + // Get type from dex cache assuming it was populated by the verifier. + mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); + if (resolved_class != nullptr) { + MutableHandle<mirror::Class> handle = handles_->NewHandle(resolved_class); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true)); + } +} + +void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { + ScopedObjectAccess soa(Thread::Current()); + mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_); + // Get type from dex cache assuming it was populated by the verifier. + mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex()); + if (resolved_class != nullptr) { + Handle<mirror::Class> handle = handles_->NewHandle(resolved_class); + instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true)); + } + Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass()); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true)); +} + +void ReferenceTypePropagation::VisitPhi(HPhi* phi) { + if (phi->GetType() != Primitive::kPrimNot) { + return; + } + + if (phi->GetBlock()->IsLoopHeader()) { + // Set the initial type for the phi. Use the non back edge input for reaching + // a fixed point faster. + AddToWorklist(phi); + phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); + phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo()); + } else { + // Eagerly compute the type of the phi, for quicker convergence. Note + // that we don't need to add users to the worklist because we are + // doing a reverse post-order visit, therefore either the phi users are + // non-loop phi and will be visited later in the visit, or are loop-phis, + // and they are already in the work list. + UpdateNullability(phi); + UpdateReferenceTypeInfo(phi); + } +} + +ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a, + const ReferenceTypeInfo& b) { + bool is_exact = a.IsExact() && b.IsExact(); + bool is_top = a.IsTop() || b.IsTop(); + Handle<mirror::Class> type_handle; + + if (!is_top) { + if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) { + type_handle = a.GetTypeHandle(); + } else if (a.IsSupertypeOf(b)) { + type_handle = a.GetTypeHandle(); + is_exact = false; + } else if (b.IsSupertypeOf(a)) { + type_handle = b.GetTypeHandle(); + is_exact = false; + } else { + // TODO: Find a common super class. + is_top = true; + is_exact = false; + } + } + + return is_top + ? ReferenceTypeInfo::CreateTop(is_exact) + : ReferenceTypeInfo::Create(type_handle, is_exact); +} + +bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { + ScopedObjectAccess soa(Thread::Current()); + + ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo(); + if (instr->IsBoundType()) { + UpdateBoundType(instr->AsBoundType()); + } else if (instr->IsPhi()) { + UpdatePhi(instr->AsPhi()); + } else { + LOG(FATAL) << "Invalid instruction (should not get here)"; + } + + return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); +} + +void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { + ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + // Be sure that we don't go over the bounded type. + ReferenceTypeInfo bound_rti = instr->GetBoundType(); + if (!bound_rti.IsSupertypeOf(new_rti)) { + new_rti = bound_rti; + } + instr->SetReferenceTypeInfo(new_rti); +} + +void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { + ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); + if (new_rti.IsTop() && !new_rti.IsExact()) { + // Early return if we are Top and inexact. + instr->SetReferenceTypeInfo(new_rti); + return; + } + for (size_t i = 1; i < instr->InputCount(); i++) { + new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo()); + if (new_rti.IsTop()) { + if (!new_rti.IsExact()) { + break; + } else { + continue; + } + } + } + instr->SetReferenceTypeInfo(new_rti); +} + // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. -bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) { +bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { + DCHECK(instr->IsPhi() || instr->IsBoundType()); + + if (!instr->IsPhi()) { + return false; + } + + HPhi* phi = instr->AsPhi(); bool existing_can_be_null = phi->CanBeNull(); bool new_can_be_null = false; for (size_t i = 0; i < phi->InputCount(); i++) { @@ -47,48 +249,26 @@ bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) { return existing_can_be_null != new_can_be_null; } - -void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - if (block->IsLoopHeader()) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Set the initial type for the phi. Use the non back edge input for reaching - // a fixed point faster. - HPhi* phi = it.Current()->AsPhi(); - AddToWorklist(phi); - phi->SetCanBeNull(phi->InputAt(0)->CanBeNull()); - } - } else { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Eagerly compute the type of the phi, for quicker convergence. Note - // that we don't need to add users to the worklist because we are - // doing a reverse post-order visit, therefore either the phi users are - // non-loop phi and will be visited later in the visit, or are loop-phis, - // and they are already in the work list. - UpdateNullability(it.Current()->AsPhi()); - } - } -} - void ReferenceTypePropagation::ProcessWorklist() { while (!worklist_.IsEmpty()) { - HPhi* instruction = worklist_.Pop(); - if (UpdateNullability(instruction)) { + HInstruction* instruction = worklist_.Pop(); + if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) { AddDependentInstructionsToWorklist(instruction); } } } -void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) { +void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType(); worklist_.Add(instruction); } -void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) { +void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr) { - AddToWorklist(phi); + HInstruction* user = it.Current()->GetUser(); + if (user->IsPhi() || user->IsBoundType()) { + AddToWorklist(user); } } } - } // namespace art diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index a74319d0c5..e346dbfc6c 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -17,31 +17,57 @@ #ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ +#include "driver/dex_compilation_unit.h" +#include "handle_scope-inl.h" #include "nodes.h" #include "optimization.h" +#include "optimizing_compiler_stats.h" namespace art { /** * Propagates reference types to instructions. - * TODO: Currently only nullability is computed. */ class ReferenceTypePropagation : public HOptimization { public: - explicit ReferenceTypePropagation(HGraph* graph) + ReferenceTypePropagation(HGraph* graph, + const DexFile& dex_file, + const DexCompilationUnit& dex_compilation_unit, + StackHandleScopeCollection* handles) : HOptimization(graph, true, "reference_type_propagation"), + dex_file_(dex_file), + dex_compilation_unit_(dex_compilation_unit), + handles_(handles), worklist_(graph->GetArena(), kDefaultWorklistSize) {} void Run() OVERRIDE; private: + void VisitNewInstance(HNewInstance* new_instance); + void VisitLoadClass(HLoadClass* load_class); + void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); + + void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + void BoundTypeForIfInstanceOf(HBasicBlock* block); + void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HPhi* phi); - bool UpdateNullability(HPhi* phi); + void AddToWorklist(HInstruction* instr); + void AddDependentInstructionsToWorklist(HInstruction* instr); + + bool UpdateNullability(HInstruction* instr); + bool UpdateReferenceTypeInfo(HInstruction* instr); + + ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + const DexFile& dex_file_; + const DexCompilationUnit& dex_compilation_unit_; + StackHandleScopeCollection* handles_; - GrowableArray<HPhi*> worklist_; + GrowableArray<HInstruction*> worklist_; static constexpr size_t kDefaultWorklistSize = 8; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 3809720cb4..54e62a5b2c 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()), physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()), temp_intervals_(allocator, 4), - spill_slots_(allocator, kDefaultNumberOfSpillSlots), + int_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + long_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + float_spill_slots_(allocator, kDefaultNumberOfSpillSlots), + double_spill_slots_(allocator, kDefaultNumberOfSpillSlots), safepoints_(allocator, 0), processing_core_registers_(false), number_of_registers_(-1), @@ -252,8 +255,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { && (instruction->GetType() != Primitive::kPrimFloat); if (locations->CanCall()) { - if (!instruction->IsSuspendCheck()) { - codegen_->MarkNotLeaf(); + if (codegen_->IsLeafMethod()) { + // TODO: We do this here because we do not want the suspend check to artificially + // create live registers. We should find another place, but this is currently the + // simplest. + DCHECK(instruction->IsSuspendCheckEntry()); + instruction->GetBlock()->RemoveInstruction(instruction); + return; } safepoints_.Add(instruction); if (locations->OnlyCallsOnSlowPath()) { @@ -433,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const { } } - return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_, + return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, allocator_, processing_core_registers_, log_fatal_on_failure); } @@ -1128,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) { } size_t end = last_sibling->GetEnd(); + GrowableArray<size_t>* spill_slots = nullptr; + switch (interval->GetType()) { + case Primitive::kPrimDouble: + spill_slots = &double_spill_slots_; + break; + case Primitive::kPrimLong: + spill_slots = &long_spill_slots_; + break; + case Primitive::kPrimFloat: + spill_slots = &float_spill_slots_; + break; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + spill_slots = &int_spill_slots_; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); + } + // Find an available spill slot. size_t slot = 0; - for (size_t e = spill_slots_.Size(); slot < e; ++slot) { - // We check if it is less rather than less or equal because the parallel move - // resolver does not work when a single spill slot needs to be exchanged with - // a double spill slot. The strict comparison avoids needing to exchange these - // locations at the same lifetime position. - if (spill_slots_.Get(slot) < parent->GetStart() - && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) { + for (size_t e = spill_slots->Size(); slot < e; ++slot) { + if (spill_slots->Get(slot) <= parent->GetStart() + && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) { break; } } if (parent->NeedsTwoSpillSlots()) { - if (slot == spill_slots_.Size()) { + if (slot == spill_slots->Size()) { // We need a new spill slot. - spill_slots_.Add(end); - spill_slots_.Add(end); - } else if (slot == spill_slots_.Size() - 1) { - spill_slots_.Put(slot, end); - spill_slots_.Add(end); + spill_slots->Add(end); + spill_slots->Add(end); + } else if (slot == spill_slots->Size() - 1) { + spill_slots->Put(slot, end); + spill_slots->Add(end); } else { - spill_slots_.Put(slot, end); - spill_slots_.Put(slot + 1, end); + spill_slots->Put(slot, end); + spill_slots->Put(slot + 1, end); } } else { - if (slot == spill_slots_.Size()) { + if (slot == spill_slots->Size()) { // We need a new spill slot. - spill_slots_.Add(end); + spill_slots->Add(end); } else { - spill_slots_.Put(slot, end); + spill_slots->Put(slot, end); } } - parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize); + // Note that the exact spill slot location will be computed when we resolve, + // that is when we know the number of spill slots for each type. + parent->SetSpillSlot(slot); } static bool IsValidDestination(Location destination) { @@ -1511,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, } void RegisterAllocator::Resolve() { - codegen_->InitializeCodeGeneration(spill_slots_.Size(), + codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(), maximum_number_of_live_core_registers_, maximum_number_of_live_fp_registers_, reserved_out_slots_, @@ -1537,6 +1566,39 @@ void RegisterAllocator::Resolve() { } else if (current->HasSpillSlot()) { current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize()); } + } else if (current->HasSpillSlot()) { + // Adjust the stack slot, now that we know the number of them for each type. + // The way this implementation lays out the stack is the following: + // [parameter slots ] + // [double spill slots ] + // [long spill slots ] + // [float spill slots ] + // [int/ref values ] + // [maximum out values ] (number of arguments for calls) + // [art method ]. + uint32_t slot = current->GetSpillSlot(); + switch (current->GetType()) { + case Primitive::kPrimDouble: + slot += long_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimLong: + slot += float_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimFloat: + slot += int_spill_slots_.Size(); + FALLTHROUGH_INTENDED; + case Primitive::kPrimNot: + case Primitive::kPrimInt: + case Primitive::kPrimChar: + case Primitive::kPrimByte: + case Primitive::kPrimBoolean: + case Primitive::kPrimShort: + slot += reserved_out_slots_; + break; + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected type for interval " << current->GetType(); + } + current->SetSpillSlot(slot * kVRegSize); } Location source = current->ToLocation(); diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index b8f70bdc18..ff2f106b74 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -75,7 +75,10 @@ class RegisterAllocator { } size_t GetNumberOfSpillSlots() const { - return spill_slots_.Size(); + return int_spill_slots_.Size() + + long_spill_slots_.Size() + + float_spill_slots_.Size() + + double_spill_slots_.Size(); } private: @@ -171,8 +174,14 @@ class RegisterAllocator { // where an instruction requires a temporary. GrowableArray<LiveInterval*> temp_intervals_; - // The spill slots allocated for live intervals. - GrowableArray<size_t> spill_slots_; + // The spill slots allocated for live intervals. We ensure spill slots + // are typed to avoid (1) doing moves and swaps between two different kinds + // of registers, and (2) swapping between a single stack slot and a double + // stack slot. This simplifies the parallel move resolver. + GrowableArray<size_t> int_spill_slots_; + GrowableArray<size_t> long_spill_slots_; + GrowableArray<size_t> float_spill_slots_; + GrowableArray<size_t> double_spill_slots_; // Instructions that need a safepoint. GrowableArray<HInstruction*> safepoints_; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 0cc00c0fde..e5d06a9f8b 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" @@ -25,7 +26,6 @@ #include "register_allocator.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c9a21aa681..3dc75059b2 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -42,20 +42,33 @@ void SsaBuilder::BuildSsa() { } } - // 3) Remove dead phis. This will remove phis that are only used by environments: + // 3) Mark dead phis. This will mark phis that are only used by environments: // at the DEX level, the type of these phis does not need to be consistent, but // our code generator will complain if the inputs of a phi do not have the same - // type (modulo the special case of `null`). - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.Run(); + // type. The marking allows the type propagation to know which phis it needs + // to handle. We mark but do not eliminate: the elimination will be done in + // step 5). + { + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.MarkDeadPhis(); + } // 4) Propagate types of phis. At this point, phis are typed void in the general - // case, or float or double when we created a floating-point equivalent. So we + // case, or float/double/reference when we created an equivalent phi. So we // need to propagate the types across phis to give them a correct type. PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Clear locals. + // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run + // the algorithm and this time elimimates them. + // TODO: Make this work with debug info and reference liveness. We currently + // eagerly remove phis used in environments. + { + SsaDeadPhiElimination dead_phis(GetGraph()); + dead_phis.Run(); + } + + // 6) Clear locals. // TODO: Move this to a dead code eliminator phase. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); @@ -185,15 +198,24 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) { /** * Because of Dex format, we might end up having the same phi being - * used for non floating point operations and floating point operations. Because - * we want the graph to be correctly typed (and thereafter avoid moves between + * used for non floating point operations and floating point / reference operations. + * Because we want the graph to be correctly typed (and thereafter avoid moves between * floating point registers and core registers), we need to create a copy of the - * phi with a floating point type. + * phi with a floating point / reference type. */ -static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) { - // We place the floating point phi next to this phi. +static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); - if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) { + if (next != nullptr + && next->AsPhi()->GetRegNumber() == phi->GetRegNumber() + && next->GetType() != type) { + // Move to the next phi to see if it is the one we are looking for. + next = next->GetNext(); + } + + if (next == nullptr + || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) + || (next->GetType() != type)) { ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { @@ -223,7 +245,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } else if (value->IsIntConstant()) { return GetFloatEquivalent(value->AsIntConstant()); } else if (value->IsPhi()) { - return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type); + return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type); } else { // For other instructions, we assume the verifier has checked that the dex format is correctly // typed and the value in a dex register will not be used for both floating point and @@ -234,12 +256,25 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } } +HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { + if (value->IsIntConstant()) { + DCHECK_EQ(value->AsIntConstant()->GetValue(), 0); + return value->GetBlock()->GetGraph()->GetNullConstant(); + } else { + DCHECK(value->IsPhi()); + return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot); + } +} + void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber()); - if (load->GetType() != value->GetType() - && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) { - // If the operation requests a specific type, we make sure its input is of that type. - value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + // If the operation requests a specific type, we make sure its input is of that type. + if (load->GetType() != value->GetType()) { + if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { + value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); + } else if (load->GetType() == Primitive::kPrimNot) { + value = GetReferenceTypeEquivalent(value); + } } load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 2eec87b618..148e9590c3 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -58,6 +58,8 @@ class SsaBuilder : public HGraphVisitor { HInstruction* instruction, Primitive::Type type); + static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + private: // Locals for the current block being visited. HEnvironment* current_locals_; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1b06315fce..bebb73ba22 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - HGraphVisitor* location_builder = codegen_->GetLocationBuilder(); for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(location_builder); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(codegen_->GetLocationBuilder()); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -312,7 +311,12 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) { return live_in->UnionIfNotIn(live_out, kill); } +static int RegisterOrLowRegister(Location location) { + return location.IsPair() ? location.low() : location.reg(); +} + int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { + DCHECK(!IsHighInterval()); if (GetParent() == this && defined_by_ != nullptr) { // This is the first interval for the instruction. Try to find // a register based on its definition. @@ -334,8 +338,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { if (user->IsPhi()) { // If the phi has a register, try to use the same. Location phi_location = user->GetLiveInterval()->ToLocation(); - if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) { - return phi_location.reg(); + if (phi_location.IsRegisterKind()) { + DCHECK(SameRegisterKind(phi_location)); + int reg = RegisterOrLowRegister(phi_location); + if (free_until[reg] >= use_position) { + return reg; + } } const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors(); // If the instruction dies at the phi assignment, we can try having the @@ -348,8 +356,11 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { HInstruction* input = user->InputAt(i); Location location = input->GetLiveInterval()->GetLocationAt( predecessors.Get(i)->GetLifetimeEnd() - 1); - if (location.IsRegister() && free_until[location.reg()] >= use_position) { - return location.reg(); + if (location.IsRegisterKind()) { + int reg = RegisterOrLowRegister(location); + if (free_until[reg] >= use_position) { + return reg; + } } } } @@ -360,8 +371,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const { // We use the user's lifetime position - 1 (and not `use_position`) because the // register is blocked at the beginning of the user. size_t position = user->GetLifetimePosition() - 1; - if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) { - return expected.reg(); + if (expected.IsRegisterKind()) { + DCHECK(SameRegisterKind(expected)); + int reg = RegisterOrLowRegister(expected); + if (free_until[reg] >= position) { + return reg; + } } } } @@ -383,8 +398,9 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the end of the predecessor, we know its register can // be reused. Location input_location = input_interval.ToLocation(); - if (SameRegisterKind(input_location)) { - return input_location.reg(); + if (input_location.IsRegisterKind()) { + DCHECK(SameRegisterKind(input_location)); + return RegisterOrLowRegister(input_location); } } } @@ -399,8 +415,9 @@ int LiveInterval::FindHintAtDefinition() const { // If the input dies at the start of this instruction, we know its register can // be reused. Location location = input_interval.ToLocation(); - if (SameRegisterKind(location)) { - return location.reg(); + if (location.IsRegisterKind()) { + DCHECK(SameRegisterKind(location)); + return RegisterOrLowRegister(location); } } } @@ -409,9 +426,19 @@ int LiveInterval::FindHintAtDefinition() const { } bool LiveInterval::SameRegisterKind(Location other) const { - return IsFloatingPoint() - ? other.IsFpuRegister() - : other.IsRegister(); + if (IsFloatingPoint()) { + if (IsLowInterval() || IsHighInterval()) { + return other.IsFpuRegisterPair(); + } else { + return other.IsFpuRegister(); + } + } else { + if (IsLowInterval() || IsHighInterval()) { + return other.IsRegisterPair(); + } else { + return other.IsRegister(); + } + } } bool LiveInterval::NeedsTwoSpillSlots() const { diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index fd30c1bc76..2f2e2d1fab 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -19,6 +19,11 @@ namespace art { void SsaDeadPhiElimination::Run() { + MarkDeadPhis(); + EliminateDeadPhis(); +} + +void SsaDeadPhiElimination::MarkDeadPhis() { // Add to the worklist phis referenced by non-phi instructions. for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -49,7 +54,9 @@ void SsaDeadPhiElimination::Run() { } } } +} +void SsaDeadPhiElimination::EliminateDeadPhis() { // Remove phis that are not live. Visit in post order so that phis // that are not inputs of loop phis can be removed when they have // no users left (dead phis might use dead phis). @@ -57,31 +64,33 @@ void SsaDeadPhiElimination::Run() { HBasicBlock* block = it.Current(); HInstruction* current = block->GetFirstPhi(); HInstruction* next = nullptr; + HPhi* phi; while (current != nullptr) { + phi = current->AsPhi(); next = current->GetNext(); - if (current->AsPhi()->IsDead()) { - if (current->HasUses()) { - for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done(); + if (phi->IsDead()) { + // Make sure the phi is only used by other dead phis. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - HUseListNode<HInstruction*>* user_node = use_it.Current(); - HInstruction* user = user_node->GetUser(); + HInstruction* user = use_it.Current()->GetUser(); DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); DCHECK(user->AsPhi()->IsDead()) << user->GetId(); - // Just put itself as an input. The phi will be removed in this loop anyway. - user->SetRawInputAt(user_node->GetIndex(), user); - current->RemoveUser(user, user_node->GetIndex()); } } - if (current->HasEnvironmentUses()) { - for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done(); - use_it.Advance()) { - HUseListNode<HEnvironment*>* user_node = use_it.Current(); - HEnvironment* user = user_node->GetUser(); - user->SetRawEnvAt(user_node->GetIndex(), nullptr); - current->RemoveEnvironmentUser(user_node); - } + // Remove the phi from use lists of its inputs. + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + phi->RemoveAsUserOfInput(i); + } + // Remove the phi from environments that use it. + for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done(); + use_it.Advance()) { + HUseListNode<HEnvironment*>* user_node = use_it.Current(); + HEnvironment* user = user_node->GetUser(); + user->SetRawEnvAt(user_node->GetIndex(), nullptr); } - block->RemovePhi(current->AsPhi()); + // Delete it from the instruction list. + block->RemovePhi(phi, /*ensure_safety=*/ false); } current = next; } diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index b7899712d6..88a5279e14 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -34,6 +34,9 @@ class SsaDeadPhiElimination : public HOptimization { void Run() OVERRIDE; + void MarkDeadPhis(); + void EliminateDeadPhis(); + private: GrowableArray<HPhi*> worklist_; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 7e90b37fe6..7fc1ec6dd1 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "base/arena_allocator.h" #include "base/stringprintf.h" #include "builder.h" #include "dex_file.h" @@ -22,7 +23,6 @@ #include "optimizing_unit_test.h" #include "pretty_printer.h" #include "ssa_builder.h" -#include "utils/arena_allocator.h" #include "gtest/gtest.h" diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 3974e53e6f..5283d5dcca 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -166,18 +166,23 @@ class StackMapStream : public ValueObject { stack_map.SetStackMask(*entry.sp_mask); } - // Set the register map. - MemoryRegion register_region = dex_register_maps_region.Subregion( - next_dex_register_map_offset, - DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); - next_dex_register_map_offset += register_region.size(); - DexRegisterMap dex_register_map(register_region); - stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); - - for (size_t j = 0; j < entry.num_dex_registers; ++j) { - DexRegisterEntry register_entry = - dex_register_maps_.Get(j + entry.dex_register_maps_start_index); - dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + if (entry.num_dex_registers != 0) { + // Set the register map. + MemoryRegion register_region = dex_register_maps_region.Subregion( + next_dex_register_map_offset, + DexRegisterMap::kFixedSize + + entry.num_dex_registers * DexRegisterMap::SingleEntrySize()); + next_dex_register_map_offset += register_region.size(); + DexRegisterMap dex_register_map(register_region); + stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start); + + for (size_t j = 0; j < entry.num_dex_registers; ++j) { + DexRegisterEntry register_entry = + dex_register_maps_.Get(j + entry.dex_register_maps_start_index); + dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value); + } + } else { + stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap); } // Set the inlining info. @@ -196,7 +201,7 @@ class StackMapStream : public ValueObject { inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index); } } else { - stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo); + stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo); } } } diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 5ee6ae049c..744fb45fff 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -61,6 +61,7 @@ TEST(StackMapTest, Test1) { MemoryRegion stack_mask = stack_map.GetStackMask(); ASSERT_TRUE(SameBits(stack_mask, sp_mask)); + ASSERT_TRUE(stack_map.HasDexRegisterMap()); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); @@ -107,6 +108,7 @@ TEST(StackMapTest, Test2) { MemoryRegion stack_mask = stack_map.GetStackMask(); ASSERT_TRUE(SameBits(stack_mask, sp_mask1)); + ASSERT_TRUE(stack_map.HasDexRegisterMap()); DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2); ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0)); ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1)); |