diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/bounds_check_elimination.cc | 409 | ||||
| -rw-r--r-- | compiler/optimizing/bounds_check_elimination_test.cc | 51 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 46 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 41 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 129 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 24 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 74 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 30 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 11 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 73 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 13 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 883 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.h | 88 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 12 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator.cc | 9 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 5 |
17 files changed, 1558 insertions, 342 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d6c3515726..bcee5638fc 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -28,10 +28,10 @@ class MonotonicValueRange; */ class ValueBound : public ValueObject { public: - ValueBound(HInstruction* instruction, int constant) { + ValueBound(HInstruction* instruction, int32_t constant) { if (instruction != nullptr && instruction->IsIntConstant()) { - // Normalizing ValueBound with constant instruction. - int instr_const = instruction->AsIntConstant()->GetValue(); + // Normalize ValueBound with constant instruction. + int32_t instr_const = instruction->AsIntConstant()->GetValue(); if (constant >= 0 && (instr_const <= INT_MAX - constant)) { // No overflow. instruction_ = nullptr; @@ -49,6 +49,25 @@ class ValueBound : public ValueObject { constant_ = constant; } + static bool IsAddOrSubAConstant(HInstruction* instruction, + HInstruction** left_instruction, + int* right_constant) { + if (instruction->IsAdd() || instruction->IsSub()) { + HBinaryOperation* bin_op = instruction->AsBinaryOperation(); + HInstruction* left = bin_op->GetLeft(); + HInstruction* right = bin_op->GetRight(); + if (right->IsIntConstant()) { + *left_instruction = left; + int32_t c = right->AsIntConstant()->GetValue(); + *right_constant = instruction->IsAdd() ? c : -c; + return true; + } + } + *left_instruction = nullptr; + *right_constant = 0; + return false; + } + // Try to detect useful value bound format from an instruction, e.g. // a constant or array length related value. static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) { @@ -63,13 +82,12 @@ class ValueBound : public ValueObject { return ValueBound(instruction, 0); } // Try to detect (array.length + c) format. - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left->IsArrayLength() && right->IsIntConstant()) { + HInstruction *left; + int32_t right; + if (IsAddOrSubAConstant(instruction, &left, &right)) { + if (left->IsArrayLength()) { *found = true; - return ValueBound(left, right->AsIntConstant()->GetValue()); + return ValueBound(left, right); } } @@ -79,10 +97,13 @@ class ValueBound : public ValueObject { } HInstruction* GetInstruction() const { return instruction_; } - int GetConstant() const { return constant_; } + int32_t GetConstant() const { return constant_; } - bool IsRelativeToArrayLength() const { - return instruction_ != nullptr && instruction_->IsArrayLength(); + bool IsRelatedToArrayLength() const { + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + return (instruction_ != nullptr) && + (instruction_->IsArrayLength() || instruction_->IsNewArray()); } bool IsConstant() const { @@ -96,54 +117,45 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - // Returns if it's certain bound1 >= bound2. - bool GreaterThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ >= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; + static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { + // Null check on the NewArray should have been eliminated by instruction + // simplifier already. + if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { + return instruction->InputAt(0)->AsNewArray(); } - // Not comparable. Just return false. - return false; + return instruction; } - // Returns if it's certain bound1 <= bound2. - bool LessThanOrEqual(ValueBound bound) const { - if (instruction_ == bound.instruction_) { - if (instruction_ == nullptr) { - // Pure constant. - return constant_ <= bound.constant_; - } - if (IsRelativeToArrayLength()) { - // Array length is guaranteed to be no less than 0. - // No overflow/underflow can happen if both constants are negative. - if (constant_ <= 0 && bound.constant_ <= 0) { - return constant_ <= bound.constant_; - } - // There might be overflow/underflow. Be conservative for now. - return false; - } + static bool Equal(HInstruction* instruction1, HInstruction* instruction2) { + if (instruction1 == instruction2) { + return true; } - // In case the array length is some constant, we can - // still compare. - if (IsConstant() && bound.IsRelativeToArrayLength()) { - HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0); - if (array->IsNullCheck()) { - array = array->AsNullCheck()->InputAt(0); - } - if (array->IsNewArray()) { - HInstruction* len = array->InputAt(0); - if (len->IsIntConstant()) { - int len_const = len->AsIntConstant()->GetValue(); - return constant_ <= len_const + bound.GetConstant(); - } - } + if (instruction1 == nullptr || instruction2 == nullptr) { + return false; } + // Some bounds are created with HNewArray* as the instruction instead + // of HArrayLength*. They are treated the same. + instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); + instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + return instruction1 == instruction2; + } + + // Returns if it's certain this->bound >= `bound`. + bool GreaterThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ >= bound.constant_; + } + // Not comparable. Just return false. + return false; + } + + // Returns if it's certain this->bound <= `bound`. + bool LessThanOrEqualTo(ValueBound bound) const { + if (Equal(instruction_, bound.instruction_)) { + return constant_ <= bound.constant_; + } // Not comparable. Just return false. return false; } @@ -151,10 +163,11 @@ class ValueBound : public ValueObject { // Try to narrow lower bound. Returns the greatest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::max(bound1.constant_, bound2.constant_)); + if (bound1.GreaterThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.GreaterThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. @@ -165,58 +178,71 @@ class ValueBound : public ValueObject { // Try to narrow upper bound. Returns the lowest of the two if possible. // Pick one if they are not comparable. static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) { - if (bound1.instruction_ == bound2.instruction_) { - // Same instruction, compare the constant part. - return ValueBound(bound1.instruction_, - std::min(bound1.constant_, bound2.constant_)); + if (bound1.LessThanOrEqualTo(bound2)) { + return bound1; + } + if (bound2.LessThanOrEqualTo(bound1)) { + return bound2; } // Not comparable. Just pick one. We may lose some info, but that's ok. // Favor array length as upper bound. - return bound1.IsRelativeToArrayLength() ? bound1 : bound2; + return bound1.IsRelatedToArrayLength() ? bound1 : bound2; } - // Add a constant to a ValueBound. If the constant part of the ValueBound - // overflows/underflows, then we can't accurately represent it. For correctness, - // just return Max/Min() depending on whether the returned ValueBound is used for - // lower/upper bound. - ValueBound Add(int c, bool* overflow_or_underflow) const { - *overflow_or_underflow = false; + // Add a constant to a ValueBound. + // `overflow` or `underflow` will return whether the resulting bound may + // overflow or underflow an int. + ValueBound Add(int32_t c, bool* overflow, bool* underflow) const { + *overflow = *underflow = false; if (c == 0) { return *this; } - int new_constant; + int32_t new_constant; if (c > 0) { if (constant_ > INT_MAX - c) { - // Constant part overflows. - *overflow_or_underflow = true; + *overflow = true; return Max(); - } else { - new_constant = constant_ + c; } + + new_constant = constant_ + c; + // (array.length + non-positive-constant) won't overflow an int. + if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) { + return ValueBound(instruction_, new_constant); + } + // Be conservative. + *overflow = true; + return Max(); } else { if (constant_ < INT_MIN - c) { - // Constant part underflows. - *overflow_or_underflow = true; - return Max(); - } else { - new_constant = constant_ + c; + *underflow = true; + return Min(); + } + + new_constant = constant_ + c; + // Regardless of the value new_constant, (array.length+new_constant) will + // never underflow since array.length is no less than 0. + if (IsConstant() || IsRelatedToArrayLength()) { + return ValueBound(instruction_, new_constant); } + // Be conservative. + *underflow = true; + return Min(); } return ValueBound(instruction_, new_constant); } private: HInstruction* instruction_; - int constant_; + int32_t constant_; }; /** * Represent a range of lower bound and upper bound, both being inclusive. * Currently a ValueRange may be generated as a result of the following: * comparisons related to array bounds, array bounds check, add/sub on top - * of an existing value range, or a loop phi corresponding to an + * of an existing value range, NewArray or a loop phi corresponding to an * incrementing/decrementing array index (MonotonicValueRange). */ class ValueRange : public ArenaObject<kArenaAllocMisc> { @@ -241,8 +267,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { return true; } DCHECK(!other_range->IsMonotonicValueRange()); - return lower_.GreaterThanOrEqual(other_range->lower_) && - upper_.LessThanOrEqual(other_range->upper_); + return lower_.GreaterThanOrEqualTo(other_range->lower_) && + upper_.LessThanOrEqualTo(other_range->upper_); } // Returns the intersection of this and range. @@ -263,29 +289,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound::NarrowUpperBound(upper_, range->upper_)); } - // Shift a range by a constant. If either bound can't be represented - // as (instruction+c) format due to possible overflow/underflow, - // return the full integer range. - ValueRange* Add(int constant) const { - bool overflow_or_underflow; - ValueBound lower = lower_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); - } - ValueBound upper = upper_.Add(constant, &overflow_or_underflow); - if (overflow_or_underflow) { - // We can't accurately represent the bounds anymore. - return FullIntRange(); + // Shift a range by a constant. + ValueRange* Add(int32_t constant) const { + bool overflow, underflow; + ValueBound lower = lower_.Add(constant, &overflow, &underflow); + if (underflow) { + // Lower bound underflow will wrap around to positive values + // and invalidate the upper bound. + return nullptr; + } + ValueBound upper = upper_.Add(constant, &overflow, &underflow); + if (overflow) { + // Upper bound overflow will wrap around to negative values + // and invalidate the lower bound. + return nullptr; } return new (allocator_) ValueRange(allocator_, lower, upper); } - // Return [INT_MIN, INT_MAX]. - ValueRange* FullIntRange() const { - return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max()); - } - private: ArenaAllocator* const allocator_; const ValueBound lower_; // inclusive @@ -304,7 +325,7 @@ class MonotonicValueRange : public ValueRange { public: MonotonicValueRange(ArenaAllocator* allocator, HInstruction* initial, - int increment, + int32_t increment, ValueBound bound) // To be conservative, give it full range [INT_MIN, INT_MAX] in case it's // used as a regular value range, due to possible overflow/underflow. @@ -343,23 +364,17 @@ class MonotonicValueRange : public ValueRange { // make assumptions about the max array length, e.g. due to the max heap size, // divided by the element size (such as 4 bytes for each integer array), we can // lower this number and rule out some possible overflows. - int max_array_len = INT_MAX; - - int upper = INT_MAX; - if (range->GetUpper().IsConstant()) { - upper = range->GetUpper().GetConstant(); - } else if (range->GetUpper().IsRelativeToArrayLength()) { - int constant = range->GetUpper().GetConstant(); - if (constant <= 0) { - // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc. - upper = max_array_len + constant; - } else { - // There might be overflow. Give up narrowing. - return this; - } - } else { - // There might be overflow. Give up narrowing. - return this; + int32_t max_array_len = INT_MAX; + + // max possible integer value of range's upper value. + int32_t upper = INT_MAX; + // Try to lower upper. + ValueBound upper_bound = range->GetUpper(); + if (upper_bound.IsConstant()) { + upper = upper_bound.GetConstant(); + } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) { + // Normal case. e.g. <= array.length - 1. + upper = max_array_len + upper_bound.GetConstant(); } // If we can prove for the last number in sequence of initial_, @@ -368,13 +383,13 @@ class MonotonicValueRange : public ValueRange { // then this MonoticValueRange is narrowed to a normal value range. // Be conservative first, assume last number in the sequence hits upper. - int last_num_in_sequence = upper; + int32_t last_num_in_sequence = upper; if (initial_->IsIntConstant()) { - int initial_constant = initial_->AsIntConstant()->GetValue(); + int32_t initial_constant = initial_->AsIntConstant()->GetValue(); if (upper <= initial_constant) { last_num_in_sequence = upper; } else { - // Cast to int64_t for the substraction part to avoid int overflow. + // Cast to int64_t for the substraction part to avoid int32_t overflow. last_num_in_sequence = initial_constant + ((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_; } @@ -392,23 +407,22 @@ class MonotonicValueRange : public ValueRange { ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper()); // Need to take care of underflow. Try to prove underflow won't happen - // for common cases. Basically need to be able to prove for any value - // that's >= range->GetLower(), it won't be positive with value+increment. + // for common cases. if (range->GetLower().IsConstant()) { - int constant = range->GetLower().GetConstant(); + int32_t constant = range->GetLower().GetConstant(); if (constant >= INT_MIN - increment_) { return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper); } } - // There might be underflow. Give up narrowing. + // For non-constant lower bound, just assume might be underflow. Give up narrowing. return this; } } private: HInstruction* const initial_; - const int increment_; + const int32_t increment_; ValueBound bound_; // Additional value bound info for initial_; DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange); @@ -446,8 +460,8 @@ class BCEVisitor : public HGraphVisitor { return nullptr; } - // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range', - // and push the narrowed value range to 'successor'. + // Narrow the value range of `instruction` at the end of `basic_block` with `range`, + // and push the narrowed value range to `successor`. void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block, HBasicBlock* successor, ValueRange* range) { ValueRange* existing_range = LookupValueRange(instruction, basic_block); @@ -472,10 +486,12 @@ class BCEVisitor : public HGraphVisitor { bool found; ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found); + // Each comparison can establish a lower bound and an upper bound + // for the left hand side. ValueBound lower = bound; ValueBound upper = bound; if (!found) { - // No constant or array.length+c bound found. + // No constant or array.length+c format bound found. // For i<j, we can still use j's upper bound as i's upper bound. Same for lower. ValueRange* range = LookupValueRange(right, block); if (range != nullptr) { @@ -487,13 +503,13 @@ class BCEVisitor : public HGraphVisitor { } } - bool overflow_or_underflow; + bool overflow, underflow; if (cond == kCondLT || cond == kCondLE) { if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -501,11 +517,11 @@ class BCEVisitor : public HGraphVisitor { } // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -513,11 +529,11 @@ class BCEVisitor : public HGraphVisitor { } } else if (cond == kCondGT || cond == kCondGE) { // array.length as a lower bound isn't considered useful. - if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) { - int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive - ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_lower = ValueBound::Min(); + if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) { + int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive + ValueBound new_lower = lower.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); @@ -525,10 +541,10 @@ class BCEVisitor : public HGraphVisitor { } if (!upper.Equals(ValueBound::Max())) { - int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive - ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow); - if (overflow_or_underflow) { - new_upper = ValueBound::Max(); + int32_t compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive + ValueBound new_upper = upper.Add(compensation, &overflow, &underflow); + if (overflow || underflow) { + return; } ValueRange* new_range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); @@ -541,41 +557,56 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - ValueRange* index_range = LookupValueRange(index, block); - - if (index_range != nullptr) { - ValueBound lower = ValueBound(nullptr, 0); // constant 0 - ValueBound upper = ValueBound(array_length, -1); // array_length - 1 - ValueRange* array_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); - if (index_range->FitsIn(array_range)) { - ReplaceBoundsCheck(bounds_check, index); + DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + + if (!index->IsIntConstant()) { + ValueRange* index_range = LookupValueRange(index, block); + if (index_range != nullptr) { + ValueBound lower = ValueBound(nullptr, 0); // constant 0 + ValueBound upper = ValueBound(array_length, -1); // array_length - 1 + ValueRange* array_range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + if (index_range->FitsIn(array_range)) { + ReplaceBoundsCheck(bounds_check, index); + return; + } + } + } else { + int32_t constant = index->AsIntConstant()->GetValue(); + if (constant < 0) { + // Will always throw exception. + return; + } + if (array_length->IsIntConstant()) { + if (constant < array_length->AsIntConstant()->GetValue()) { + ReplaceBoundsCheck(bounds_check, index); + } return; } - } - if (index->IsIntConstant()) { - ValueRange* array_length_range = LookupValueRange(array_length, block); - int constant = index->AsIntConstant()->GetValue(); - if (array_length_range != nullptr && - array_length_range->GetLower().IsConstant()) { - if (constant < array_length_range->GetLower().GetConstant()) { + DCHECK(array_length->IsArrayLength()); + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr) { + ValueBound lower = existing_range->GetLower(); + DCHECK(lower.IsConstant()); + if (constant < lower.GetConstant()) { ReplaceBoundsCheck(bounds_check, index); return; + } else { + // Existing range isn't strong enough to eliminate the bounds check. + // Fall through to update the array_length range with info from this + // bounds check. } } // Once we have an array access like 'array[5] = 1', we record array.length >= 6. + // We currently don't do it for non-constant index since a valid array[i] can't prove + // a valid array[i-1] yet due to the lower bound side. ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); ValueRange* range = new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), lower, upper); - ValueRange* existing_range = LookupValueRange(array_length, block); - ValueRange* new_range = range; - if (existing_range != nullptr) { - new_range = range->Narrow(existing_range); - } - GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range); + GetValueRangeMap(block)->Overwrite(array_length->GetId(), range); } } @@ -588,14 +619,12 @@ class BCEVisitor : public HGraphVisitor { if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) { DCHECK_EQ(phi->InputCount(), 2U); HInstruction* instruction = phi->InputAt(1); - if (instruction->IsAdd()) { - HAdd* add = instruction->AsAdd(); - HInstruction* left = add->GetLeft(); - HInstruction* right = add->GetRight(); - if (left == phi && right->IsIntConstant()) { + HInstruction *left; + int32_t increment; + if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) { + if (left == phi) { HInstruction* initial_value = phi->InputAt(0); ValueRange* range = nullptr; - int increment = right->AsIntConstant()->GetValue(); if (increment == 0) { // Add constant 0. It's really a fixed value. range = new (GetGraph()->GetArena()) ValueRange( @@ -682,10 +711,10 @@ class BCEVisitor : public HGraphVisitor { if (right_range != nullptr) { ValueBound lower = right_range->GetLower(); ValueBound upper = right_range->GetUpper(); - if (lower.IsConstant() && upper.IsRelativeToArrayLength()) { + if (lower.IsConstant() && upper.IsRelatedToArrayLength()) { HInstruction* upper_inst = upper.GetInstruction(); - if (upper_inst->IsArrayLength() && - upper_inst->AsArrayLength() == array_length) { + // Make sure it's the same array. + if (ValueBound::Equal(array_length, upper_inst)) { // (array.length - v) where v is in [c1, array.length + c2] // gets [-c2, array.length - c1] as its value range. ValueRange* range = new (GetGraph()->GetArena()) ValueRange( @@ -699,6 +728,26 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitNewArray(HNewArray* new_array) { + HInstruction* len = new_array->InputAt(0); + if (!len->IsIntConstant()) { + HInstruction *left; + int32_t right_const; + if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) { + // (left + right_const) is used as size to new the array. + // We record "-right_const <= left <= new_array - right_const"; + ValueBound lower = ValueBound(nullptr, -right_const); + // We use new_array for the bound instead of new_array.length, + // which isn't available as an instruction yet. new_array will + // be treated the same as new_array.length when it's used in a ValueBound. + ValueBound upper = ValueBound(new_array, -right_const); + ValueRange* range = new (GetGraph()->GetArena()) + ValueRange(GetGraph()->GetArena(), lower, upper); + GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range); + } + } + } + std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_; DISALLOW_COPY_AND_ASSIGN(BCEVisitor); diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 3dcb08d195..662834a91c 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -17,6 +17,7 @@ #include "bounds_check_elimination.h" #include "builder.h" #include "gvn.h" +#include "instruction_simplifier.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" @@ -26,7 +27,9 @@ namespace art { -static void RunGvn(HGraph* graph) { +static void RunSimplifierAndGvn(HGraph* graph) { + InstructionSimplifier simplify(graph); + simplify.Run(); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { block3->AddSuccessor(block4); // False successor graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check2)); @@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { block3->AddSuccessor(exit); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { exit->AddInstruction(new (&allocator) HExit()); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); ASSERT_FALSE(IsRemoved(bounds_check5)); @@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -623,7 +626,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -710,7 +713,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -719,7 +722,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -728,7 +731,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -737,7 +740,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // HArrayLength which uses the null check as its input. graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_after_gvn(graph); bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); @@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); ASSERT_FALSE(IsRemoved(bounds_check)); @@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); - RunGvn(graph); + RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); ASSERT_FALSE(IsRemoved(bounds_check2)); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index fd4e391470..d0739a6de2 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,8 +41,6 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) { } void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { - DCHECK_EQ(frame_size_, kUninitializedFrameSize); - Initialize(); if (!is_leaf) { MarkNotLeaf(); @@ -59,7 +57,6 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) { } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { - HGraphVisitor* location_builder = GetLocationBuilder(); HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); GenerateFrameEntry(); @@ -69,8 +66,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (is_baseline) { - current->Accept(location_builder); - InitLocations(current); + InitLocationsBaseline(current); } current->Accept(instruction_visitor); } @@ -88,7 +84,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) void CodeGenerator::CompileOptimized(CodeAllocator* allocator) { // The register allocator already called `InitializeCodeGeneration`, // where the frame size has been computed. - DCHECK_NE(frame_size_, kUninitializedFrameSize); DCHECK(block_order_ != nullptr); Initialize(); CompileInternal(allocator, /* is_baseline */ false); @@ -138,13 +133,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; - SetFrameSize(RoundUp( - number_of_spill_slots * kVRegSize - + number_of_out_slots * kVRegSize - + maximum_number_of_live_core_registers * GetWordSize() - + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() - + FrameEntrySpillSize(), - kStackAlignment)); + if (number_of_spill_slots == 0 + && !HasAllocatedCalleeSaveRegisters() + && IsLeafMethod() + && !RequiresCurrentMethod()) { + DCHECK_EQ(maximum_number_of_live_core_registers, 0u); + DCHECK_EQ(maximum_number_of_live_fp_registers, 0u); + SetFrameSize(CallPushesPC() ? GetWordSize() : 0); + } else { + SetFrameSize(RoundUp( + number_of_spill_slots * kVRegSize + + number_of_out_slots * kVRegSize + + maximum_number_of_live_core_registers * GetWordSize() + + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize() + + FrameEntrySpillSize(), + kStackAlignment)); + } } Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const { @@ -294,7 +298,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const { } } -void CodeGenerator::InitLocations(HInstruction* instruction) { +void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) { + AllocateLocations(instruction); if (instruction->GetLocations() == nullptr) { if (instruction->IsTemporary()) { HInstruction* previous = instruction->GetPrevious(); @@ -320,6 +325,19 @@ void CodeGenerator::InitLocations(HInstruction* instruction) { } } +void CodeGenerator::AllocateLocations(HInstruction* instruction) { + instruction->Accept(GetLocationBuilder()); + LocationSummary* locations = instruction->GetLocations(); + if (!instruction->IsSuspendCheckEntry()) { + if (locations != nullptr && locations->CanCall()) { + MarkNotLeaf(); + } + if (instruction->NeedsCurrentMethod()) { + SetRequiresCurrentMethod(); + } + } +} + bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { DCHECK_EQ(block_order_->Get(current_block_index_), current); return (current_block_index_ < block_order_->Size() - 1) diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index ab63b911b2..efd0c84797 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -30,7 +30,6 @@ namespace art { static size_t constexpr kVRegSize = 4; -static size_t constexpr kUninitializedFrameSize = 0; // Binary encoding of 2^32 for type double. static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000); @@ -107,8 +106,6 @@ class CodeGenerator { virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0; - virtual HGraphVisitor* GetLocationBuilder() = 0; - virtual HGraphVisitor* GetInstructionVisitor() = 0; virtual Assembler* GetAssembler() = 0; virtual size_t GetWordSize() const = 0; virtual size_t GetFloatingPointSpillSlotSize() const = 0; @@ -196,6 +193,15 @@ class CodeGenerator { void MarkNotLeaf() { is_leaf_ = false; + requires_current_method_ = true; + } + + void SetRequiresCurrentMethod() { + requires_current_method_ = true; + } + + bool RequiresCurrentMethod() const { + return requires_current_method_; } // Clears the spill slots taken by loop phis in the `LocationSummary` of the @@ -228,6 +234,8 @@ class CodeGenerator { allocated_registers_.Add(location); } + void AllocateLocations(HInstruction* instruction); + protected: CodeGenerator(HGraph* graph, size_t number_of_core_registers, @@ -236,7 +244,7 @@ class CodeGenerator { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options) - : frame_size_(kUninitializedFrameSize), + : frame_size_(0), core_spill_mask_(0), fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), @@ -255,6 +263,7 @@ class CodeGenerator { block_order_(nullptr), current_block_index_(0), is_leaf_(true), + requires_current_method_(false), stack_map_stream_(graph->GetArena()) {} // Register allocation logic. @@ -269,11 +278,12 @@ class CodeGenerator { virtual Location GetStackLocation(HLoadLocal* load) const = 0; virtual ParallelMoveResolver* GetMoveResolver() = 0; + virtual HGraphVisitor* GetLocationBuilder() = 0; + virtual HGraphVisitor* GetInstructionVisitor() = 0; // Returns the location of the first spilled entry for floating point registers, // relative to the stack pointer. uint32_t GetFpuSpillStart() const { - DCHECK_NE(frame_size_, kUninitializedFrameSize); return GetFrameSize() - FrameEntrySpillSize(); } @@ -289,6 +299,21 @@ class CodeGenerator { return GetFpuSpillSize() + GetCoreSpillSize(); } + bool HasAllocatedCalleeSaveRegisters() const { + // We check the core registers against 1 because it always comprises the return PC. + return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1) + || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0); + } + + bool CallPushesPC() const { + InstructionSet instruction_set = GetInstructionSet(); + return instruction_set == kX86 || instruction_set == kX86_64; + } + + bool HasEmptyFrame() const { + return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -311,7 +336,7 @@ class CodeGenerator { const uint32_t fpu_callee_save_mask_; private: - void InitLocations(HInstruction* instruction); + void InitLocationsBaseline(HInstruction* instruction); size_t GetStackOffsetOfSavedRegister(size_t index); void CompileInternal(CodeAllocator* allocator, bool is_baseline); @@ -328,8 +353,12 @@ class CodeGenerator { // we are generating code for. size_t current_block_index_; + // Whether the method is a leaf method. bool is_leaf_; + // Whether an instruction in the graph accesses the current method. + bool requires_current_method_; + StackMapStream stack_map_stream_; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 78fd181dcf..c6c1fd73f9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,8 @@ #include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_arm.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -32,11 +34,6 @@ namespace art { namespace arm { -static DRegister FromLowSToD(SRegister reg) { - DCHECK_EQ(reg % 2, 0); - return static_cast<DRegister>(reg / 2); -} - static bool ExpectedPairLayout(Location location) { // We expected this for both core and fpu register pairs. return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); @@ -73,20 +70,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis #define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())-> #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value() -class SlowPathCodeARM : public SlowPathCode { - public: - SlowPathCodeARM() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); -}; - class NullCheckSlowPathARM : public SlowPathCodeARM { public: explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {} @@ -396,10 +379,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(true), isa_features_(isa_features) { - // Save one extra register for baseline. Note that on thumb2, there is no easy - // instruction to restore just the PC, so this actually helps both baseline - // and non-baseline to save and restore at least two registers at entry and exit. - AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline)); // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } @@ -508,6 +487,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) { void CodeGeneratorARM::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + // Save one extra register for baseline. Note that on thumb2, there is no easy + // instruction to restore just the PC, so this actually helps both baseline + // and non-baseline to save and restore at least two registers at entry and exit. + core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline); DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; // We use vpush and vpop for saving and restoring floating point registers, which take @@ -529,6 +512,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { + return; + } + if (!skip_overflow_check) { __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); __ LoadFromOffset(kLoadWord, IP, IP, 0); @@ -547,6 +534,10 @@ void CodeGeneratorARM::GenerateFrameEntry() { } void CodeGeneratorARM::GenerateFrameExit() { + if (HasEmptyFrame()) { + __ bx(LR); + return; + } __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); if (fpu_spill_mask_ != 0) { SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_)); @@ -1168,44 +1159,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) { } void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void CodeGeneratorARM::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } -void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorARM intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); - // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, - mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value()); - // LR() - __ blx(LR); - } else { - __ bl(codegen_->GetFrameEntryLabel()); +void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; } - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); - DCHECK(!codegen_->IsLeafMethod()); + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + + codegen_->GenerateStaticOrDirectCall(invoke, temp); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -1223,10 +1207,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(), + codegen_->GetInstructionSetFeatures()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); @@ -3776,5 +3770,38 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { + DCHECK_EQ(temp, kArtMethodRegister); + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + if (!invoke->IsRecursive()) { + // temp = temp->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // temp = temp[index_in_cache] + __ LoadFromOffset( + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = temp[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, temp, + mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); + } else { + __ bl(GetFrameEntryLabel()); + } + + RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!IsLeafMethod()); +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4b03dffd38..47d81ff984 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] = { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); +static constexpr Register kArtMethodRegister = R0; + +static constexpr DRegister FromLowSToD(SRegister reg) { + return DCHECK_CONSTEXPR(reg % 2 == 0, , D0) + static_cast<DRegister>(reg / 2); +} + + class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> { public: InvokeDexCallingConvention() @@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver { DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM); }; +class SlowPathCodeARM : public SlowPathCode { + public: + SlowPathCodeARM() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM); +}; + class LocationsBuilderARM : public HGraphVisitor { public: LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen) @@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3bc23fe4f3..46f1a9b51d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, - (1 << LR), - 0, + callee_saved_core_registers.list(), + callee_saved_fp_registers.list(), compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this) { // Save the link register (containing the return address) to mimic Quick. - AddAllocatedRegister(Location::RegisterLocation(LR)); + AddAllocatedRegister(LocationFrom(lr)); } #undef __ @@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); __ Ldr(wzr, MemOperand(temp, 0)); RecordPcInfo(nullptr, 0); } - int frame_size = GetFrameSize(); - __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - - // Stack layout: - // sp[frame_size - 8] : lr. - // ... : other preserved registers. - // sp[frame_size - regs_size]: first preserved register. - // ... : reserved frame space. - // sp[0] : current method. + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + // Stack layout: + // sp[frame_size - 8] : lr. + // ... : other preserved core registers. + // ... : other preserved fp registers. + // ... : reserved frame space. + // sp[0] : current method. + __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); + __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + } } void CodeGeneratorARM64::GenerateFrameExit() { - int frame_size = GetFrameSize(); - __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); - __ Drop(frame_size); + if (!HasEmptyFrame()) { + int frame_size = GetFrameSize(); + __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize()); + __ Drop(frame_size); + } } void CodeGeneratorARM64::Bind(HBasicBlock* block) { @@ -555,26 +560,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) { __ Bind(&done); } -void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const { - // Block reserved registers: - // ip0 (VIXL temporary) - // ip1 (VIXL temporary) - // tr - // lr - // sp is not part of the allocatable registers, so we don't need to block it. - // TODO: Avoid blocking callee-saved registers, and instead preserve them - // where necessary. +void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const { + // Blocked core registers: + // lr : Runtime reserved. + // tr : Runtime reserved. + // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it. + // ip1 : VIXL core temp. + // ip0 : VIXL core temp. + // + // Blocked fp registers: + // d31 : VIXL fp temp. CPURegList reserved_core_registers = vixl_reserved_core_registers; reserved_core_registers.Combine(runtime_reserved_core_registers); - reserved_core_registers.Combine(quick_callee_saved_registers); while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true; } + CPURegList reserved_fp_registers = vixl_reserved_fp_registers; - reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP()); while (!reserved_core_registers.IsEmpty()) { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true; } + + if (is_baseline) { + CPURegList reserved_core_baseline_registers = callee_saved_core_registers; + while (!reserved_core_baseline_registers.IsEmpty()) { + blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true; + } + + CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers; + while (!reserved_fp_baseline_registers.IsEmpty()) { + blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true; + } + } } Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const { @@ -947,6 +964,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { + DCHECK(RequiresCurrentMethod()); DCHECK(current_method.IsW()); __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 9a99dcccea..2e937e2c0f 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi const vixl::Register tr = vixl::x18; // Thread Register static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke. +const vixl::Register kQuickSuspendRegister = vixl::x19; const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1); const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31); -const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr); -const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister, - vixl::kXRegSize, - kArm64CalleeSaveRefSpills); +// TODO: When the runtime does not use kQuickSuspendRegister as a suspend +// counter remove it from the reserved registers list. +const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr); + +// Callee-saved registers defined by AAPCS64. +const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister, + vixl::kXRegSize, + vixl::x19.code(), + vixl::x30.code()); +const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister, + vixl::kDRegSize, + vixl::d8.code(), + vixl::d15.code()); Location ARM64ReturnLocation(Primitive::Type return_type); class SlowPathCodeARM64 : public SlowPathCode { @@ -191,10 +201,14 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - static const vixl::CPURegList& GetFramePreservedRegisters() { - static const vixl::CPURegList frame_preserved_regs = - vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit()); - return frame_preserved_regs; + vixl::CPURegList GetFramePreservedCoreRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, + core_spill_mask_); + } + + vixl::CPURegList GetFramePreservedFPRegisters() const { + return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize, + fpu_spill_mask_); } void Bind(HBasicBlock* block) OVERRIDE; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 98f93a418a..1a95f418bc 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } - __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); - __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + if (!HasEmptyFrame()) { + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); + } } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + if (!HasEmptyFrame()) { + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); + } } void CodeGeneratorX86::Bind(HBasicBlock* block) { @@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { } void CodeGeneratorX86::LoadCurrentMethod(Register reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2ff53a0603..88f17533e8 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { RecordPcInfo(nullptr, 0); } + if (HasEmptyFrame()) { + return; + } + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { Register reg = kCoreCalleeSaves[i]; if (allocated_registers_.ContainsCoreRegister(reg)) { @@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } void CodeGeneratorX86_64::GenerateFrameExit() { + if (HasEmptyFrame()) { + return; + } uint32_t xmm_spill_location = GetFpuSpillStart(); size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { @@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { } void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { + DCHECK(RequiresCurrentMethod()); __ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); } @@ -790,7 +798,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { // Materialized condition, compare against 0. Location lhs = if_instr->GetLocations()->InAt(0); if (lhs.IsRegister()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); } else { __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); @@ -806,8 +814,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); } else if (rhs.IsConstant()) { - __ cmpl(lhs.AsRegister<CpuRegister>(), - Immediate(rhs.GetConstant()->AsIntConstant()->GetValue())); + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); @@ -883,15 +895,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) { CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); // Clear register: setcc only sets the low byte. __ xorq(reg, reg); - if (locations->InAt(1).IsRegister()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - locations->InAt(1).AsRegister<CpuRegister>()); - } else if (locations->InAt(1).IsConstant()) { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue())); + Location lhs = locations->InAt(0); + Location rhs = locations->InAt(1); + if (rhs.IsRegister()) { + __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>()); + } else if (rhs.IsConstant()) { + int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue(); + if (constant == 0) { + __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); + } else { + __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant)); + } } else { - __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex())); + __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } __ setcc(X86_64Condition(comp->GetCondition()), reg); } @@ -1840,8 +1856,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { switch (add->GetResultType()) { case Primitive::kPrimInt: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); - locations->SetOut(Location::SameAsFirstInput()); + locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1869,16 +1885,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { LocationSummary* locations = add->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); - DCHECK(first.Equals(locations->Out())); + Location out = locations->Out(); switch (add->GetResultType()) { case Primitive::kPrimInt: { if (second.IsRegister()) { - __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); + } } else if (second.IsConstant()) { - Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); - __ addl(first.AsRegister<CpuRegister>(), imm); + if (out.AsRegister<Register>() == first.AsRegister<Register>()) { + __ addl(out.AsRegister<CpuRegister>(), + Immediate(second.GetConstant()->AsIntConstant()->GetValue())); + } else { + __ leal(out.AsRegister<CpuRegister>(), Address( + first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); + } } else { + DCHECK(first.Equals(locations->Out())); __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); } break; @@ -2754,7 +2781,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr Location obj = locations->InAt(0); if (obj.IsRegister()) { - __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0)); + __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); } else if (obj.IsStackSlot()) { __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); } else { @@ -3237,12 +3264,16 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant()) { - Immediate imm(constant->AsIntConstant()->GetValue()); + int32_t value = constant->AsIntConstant()->GetValue(); if (destination.IsRegister()) { - __ movl(destination.AsRegister<CpuRegister>(), imm); + if (value == 0) { + __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); + } else { + __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); + } } else { DCHECK(destination.IsStackSlot()) << destination; - __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); + __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); } } else if (constant->IsLongConstant()) { int64_t value = constant->AsLongConstant()->GetValue(); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17c8f337ca..44dbb9d106 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -28,6 +28,7 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitArraySet(HArraySet* equal) OVERRIDE; void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitNullCheck(HNullCheck* instruction) OVERRIDE; + void VisitArrayLength(HArrayLength* instruction) OVERRIDE; }; void InstructionSimplifier::Run() { @@ -75,6 +76,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) { } } +void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { + HInstruction* input = instruction->InputAt(0); + // If the array is a NewArray with constant size, replace the array length + // with the constant instruction. This helps the bounds check elimination phase. + if (input->IsNewArray()) { + input = input->InputAt(0); + if (input->IsIntConstant()) { + instruction->ReplaceWith(input); + } + } +} + void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); if (value->GetType() != Primitive::kPrimNot) return; diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc new file mode 100644 index 0000000000..a82d80af13 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.cc @@ -0,0 +1,883 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_arm.h" + +#include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/art_method.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/arm/assembler_arm.h" + +namespace art { + +namespace arm { + +ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() { + return codegen_->GetAssembler(); +} + +ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +#define __ codegen->GetAssembler()-> + +static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) { + if (!trg.IsValid()) { + DCHECK(type == Primitive::kPrimVoid); + return; + } + + DCHECK_NE(type, Primitive::kPrimVoid); + + if (Primitive::IsIntegralType(type)) { + if (type == Primitive::kPrimLong) { + Register trg_reg_lo = trg.AsRegisterPairLow<Register>(); + Register trg_reg_hi = trg.AsRegisterPairHigh<Register>(); + Register res_reg_lo = R0; + Register res_reg_hi = R1; + if (trg_reg_lo != res_reg_hi) { + if (trg_reg_lo != res_reg_lo) { + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + } else { + DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi); + } + } else { + __ mov(trg_reg_hi, ShifterOperand(res_reg_hi)); + __ mov(trg_reg_lo, ShifterOperand(res_reg_lo)); + } + } else { + Register trg_reg = trg.AsRegister<Register>(); + Register res_reg = R0; + if (trg_reg != res_reg) { + __ mov(trg_reg, ShifterOperand(res_reg)); + } + } + } else { + UNIMPLEMENTED(FATAL) << "Floating-point return."; + } +} + +static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) { + if (invoke->InputCount() == 0) { + return; + } + + LocationSummary* locations = invoke->GetLocations(); + InvokeDexCallingConventionVisitor calling_convention_visitor; + + // We're moving potentially two or more locations to locations that could overlap, so we need + // a parallel move resolver. + HParallelMove parallel_move(arena); + + for (size_t i = 0; i < invoke->InputCount(); i++) { + HInstruction* input = invoke->InputAt(i); + Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType()); + Location actual_loc = locations->InAt(i); + + parallel_move.AddMove(actual_loc, cc_loc, nullptr); + } + + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); +} + +// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified +// call. This will copy the arguments into the positions for a regular call. +// +// Note: The actual parameters are required to be in the locations given by the invoke's location +// summary. If an intrinsic modifies those locations before a slowpath call, they must be +// restored! +class IntrinsicSlowPathARM : public SlowPathCodeARM { + public: + explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { } + + void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in); + __ Bind(GetEntryLabel()); + + codegen->SaveLiveRegisters(invoke_->GetLocations()); + + MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen); + + if (invoke_->IsInvokeStaticOrDirect()) { + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + } else { + UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; + UNREACHABLE(); + } + + // Copy the result back to the expected output. + Location out = invoke_->GetLocations()->Out(); + if (out.IsValid()) { + DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. + DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); + MoveFromReturnRegister(out, invoke_->GetType(), codegen); + } + + codegen->RestoreLiveRegisters(invoke_->GetLocations()); + __ b(GetExitLabel()); + } + + private: + // The instruction where this slow path is happening. + HInvoke* const invoke_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM); +}; + +#undef __ + +bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovrrd(output.AsRegisterPairLow<Register>(), + output.AsRegisterPairHigh<Register>(), + FromLowSToD(input.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>()); + } +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location input = locations->InAt(0); + Location output = locations->Out(); + if (is64bit) { + __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()), + input.AsRegisterPairLow<Register>(), + input.AsRegisterPairHigh<Register>()); + } else { + __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} +void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location out = locations->Out(); + + if (is64bit) { + __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(in.AsFpuRegisterPairLow<SRegister>())); + } else { + __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>()); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); +} + +static void GenAbsInteger(LocationSummary* locations, + bool is64bit, + ArmAssembler* assembler) { + Location in = locations->InAt(0); + Location output = locations->Out(); + + Register mask = locations->GetTemp(0).AsRegister<Register>(); + + if (is64bit) { + Register in_reg_lo = in.AsRegisterPairLow<Register>(); + Register in_reg_hi = in.AsRegisterPairHigh<Register>(); + Register out_reg_lo = output.AsRegisterPairLow<Register>(); + Register out_reg_hi = output.AsRegisterPairHigh<Register>(); + + DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected."; + + __ Asr(mask, in_reg_hi, 31); + __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask)); + __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask)); + __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo)); + __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi)); + } else { + Register in_reg = in.AsRegister<Register>(); + Register out_reg = output.AsRegister<Register>(); + + __ Asr(mask, in_reg, 31); + __ add(out_reg, in_reg, ShifterOperand(mask)); + __ eor(out_reg, mask, ShifterOperand(out_reg)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + + +void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToIntPlusTemp(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + ArmAssembler* assembler) { + Register op1 = locations->InAt(0).AsRegister<Register>(); + Register op2 = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + __ cmp(op1, ShifterOperand(op2)); + + __ it((is_min) ? Condition::LT : Condition::GT, kItElse); + __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT); + __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + ArmAssembler* assembler = GetAssembler(); + __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()), + FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>(); + Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>(); + if (addr == lo) { + __ ldr(hi, Address(addr, 4)); + __ ldr(lo, Address(addr, 0)); + } else { + __ ldr(lo, Address(addr, 0)); + __ ldr(hi, Address(addr, 4)); + } +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + // Ignore upper 4B of long address. + Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>(); + // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor + // exception. So we can't use ldrd as addr may be unaligned. + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0)); + __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4)); +} + +void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(), + Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>())); +} + +void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + __ LoadFromOffset(kLoadWord, + invoke->GetLocations()->Out().AsRegister<Register>(), + TR, + Thread::PeerOffset<kArmPointerSize>().Int32Value()); +} + +static void GenUnsafeGet(HInvoke* invoke, + Primitive::Type type, + bool is_volatile, + CodeGeneratorARM* codegen) { + LocationSummary* locations = invoke->GetLocations(); + DCHECK((type == Primitive::kPrimInt) || + (type == Primitive::kPrimLong) || + (type == Primitive::kPrimNot)); + ArmAssembler* assembler = codegen->GetAssembler(); + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + + if (type == Primitive::kPrimLong) { + Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + } else { + Register trg = locations->Out().AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } +} + +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + CreateIntIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { + GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_); +} + +static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features, + Primitive::Type type, + bool is_volatile, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + + if (type == Primitive::kPrimLong) { + // Potentially need temps for ldrexd-strexd loop. + if (is_volatile && !features.HasAtomicLdrdAndStrd()) { + locations->AddTemp(Location::RequiresRegister()); // Temp_lo. + locations->AddTemp(Location::RequiresRegister()); // Temp_hi. + } + } else if (type == Primitive::kPrimNot) { + // Temps for card-marking. + locations->AddTemp(Location::RequiresRegister()); // Temp. + locations->AddTemp(Location::RequiresRegister()); // Card. + } +} + +void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke); +} + +static void GenUnsafePut(LocationSummary* locations, + Primitive::Type type, + bool is_volatile, + bool is_ordered, + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only. + Register value; + + if (is_volatile || is_ordered) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimLong) { + Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); + value = value_lo; + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register temp_lo = locations->GetTemp(0).AsRegister<Register>(); + Register temp_hi = locations->GetTemp(1).AsRegister<Register>(); + Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); + + __ add(IP, base, ShifterOperand(offset)); + Label loop_head; + __ Bind(&loop_head); + __ ldrexd(temp_lo, temp_hi, IP); + __ strexd(temp_lo, value_lo, value_hi, IP); + __ cmp(temp_lo, ShifterOperand(0)); + __ b(&loop_head, NE); + } else { + __ add(IP, base, ShifterOperand(offset)); + __ strd(value_lo, Address(IP)); + } + } else { + value = locations->InAt(3).AsRegister<Register>(); + __ str(value, Address(base, offset)); + } + + if (is_volatile) { + __ dmb(ISH); + } + + if (type == Primitive::kPrimNot) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register card = locations->GetTemp(1).AsRegister<Register>(); + codegen->MarkGCCard(temp, card, base, value); + } +} + +void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { + GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_); +} + +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, + HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::NoLocation()); // Unused receiver. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->SetInAt(4, Location::RequiresRegister()); + + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); // Pointer. + locations->AddTemp(Location::RequiresRegister()); // Temp 1. + locations->AddTemp(Location::RequiresRegister()); // Temp 2. +} + +static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { + DCHECK_NE(type, Primitive::kPrimLong); + + ArmAssembler* assembler = codegen->GetAssembler(); + + Register out = locations->Out().AsRegister<Register>(); // Boolean result. + + Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer. + Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B). + Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected. + Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value. + + Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory. + Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory. + + if (type == Primitive::kPrimNot) { + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. + codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo); + } + + // Prevent reordering with prior memory operations. + __ dmb(ISH); + + __ add(tmp_ptr, base, ShifterOperand(offset)); + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; + + Label loop_head; + __ Bind(&loop_head); + + __ ldrex(tmp_lo, tmp_ptr); + + __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); + + __ it(EQ, ItState::kItT); + __ strex(tmp_lo, value_lo, tmp_ptr, EQ); + __ cmp(tmp_lo, ShifterOperand(1), EQ); + + __ b(&loop_head, EQ); + + __ dmb(ISH); + + __ rsbs(out, tmp_lo, ShifterOperand(1)); + __ it(CC); + __ mov(out, ShifterOperand(0), CC); +} + +void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) { + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); +} +void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); +} + +void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) { + ArmAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + // Location of reference to data array + const MemberOffset value_offset = mirror::String::ValueOffset(); + // Location of count + const MemberOffset count_offset = mirror::String::CountOffset(); + // Starting offset within data array + const MemberOffset offset_offset = mirror::String::OffsetOffset(); + // Start of char data with array_ + const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t)); + + Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer. + Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character. + Register out = locations->Out().AsRegister<Register>(); // Result character. + + Register temp = locations->GetTemp(0).AsRegister<Register>(); + Register array_temp = locations->GetTemp(1).AsRegister<Register>(); + + // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth + // the cost. + // TODO: For simplicity, the index parameter is requested in a register, so different from Quick + // we will not optimize the code for constants (which would save a register). + + SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); + codegen_->AddSlowPath(slow_path); + + __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length. + codegen_->MaybeRecordImplicitNullCheck(invoke); + __ cmp(idx, ShifterOperand(temp)); + __ b(slow_path->GetEntryLabel(), CS); + + // Index computation. + __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset. + __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset. + __ add(temp, temp, ShifterOperand(idx)); + DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting. + __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2)); + + // Load the value. + __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp]. + + __ Bind(slow_path->GetExitLabel()); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(IntegerReverse) +UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) +UNIMPLEMENTED_INTRINSIC(LongReverse) +UNIMPLEMENTED_INTRINSIC(LongReverseBytes) +UNIMPLEMENTED_INTRINSIC(ShortReverseBytes) +UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble) +UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat) +UNIMPLEMENTED_INTRINSIC(MathMinLongLong) +UNIMPLEMENTED_INTRINSIC(MathMaxLongLong) +UNIMPLEMENTED_INTRINSIC(MathCeil) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathFloor) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRint) +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe? +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure. +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should +UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here. +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h new file mode 100644 index 0000000000..8bfb7d4686 --- /dev/null +++ b/compiler/optimizing/intrinsics_arm.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class ArmInstructionSetFeatures; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace arm { + +class ArmAssembler; +class CodeGeneratorARM; + +class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena, + const ArmInstructionSetFeatures& features) + : arena_(arena), features_(features) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + const ArmInstructionSetFeatures& features_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM); +}; + +class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + ArmAssembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorARM* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM); +}; + +} // namespace arm +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 7a3d7d8389..8874edc341 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { } static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - // We only support FP registers here. LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) { - // The inputs plus one temp. LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 30d869d026..9bb91d208b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1015,6 +1015,18 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> { void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; } bool HasLiveInterval() const { return live_interval_ != nullptr; } + bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); } + + // Returns whether the code generation of the instruction will require to have access + // to the current method. Such instructions are: + // (1): Instructions that require an environment, as calling the runtime requires + // to walk the stack and have the current method stored at a specific stack address. + // (2): Object literals like classes and strings, that are loaded from the dex cache + // fields of the current method. + bool NeedsCurrentMethod() const { + return NeedsEnvironment() || IsLoadClass() || IsLoadString(); + } + private: HInstruction* previous_; HInstruction* next_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 3809720cb4..bfbe63f6ce 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -252,8 +252,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) { && (instruction->GetType() != Primitive::kPrimFloat); if (locations->CanCall()) { - if (!instruction->IsSuspendCheck()) { - codegen_->MarkNotLeaf(); + if (codegen_->IsLeafMethod()) { + // TODO: We do this here because we do not want the suspend check to artificially + // create live registers. We should find another place, but this is currently the + // simplest. + DCHECK(instruction->IsSuspendCheckEntry()); + instruction->GetBlock()->RemoveInstruction(instruction); + return; } safepoints_.Add(instruction); if (locations->OnlyCallsOnSlowPath()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 1b06315fce..2a8473524f 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() { // to differentiate between the start and end of an instruction. Adding 2 to // the lifetime position for each instruction ensures the start of an // instruction is different than the end of the previous instruction. - HGraphVisitor* location_builder = codegen_->GetLocationBuilder(); for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); block->SetLifetimeStart(lifetime_position); for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(location_builder); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); @@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() { for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { HInstruction* current = inst_it.Current(); - current->Accept(codegen_->GetLocationBuilder()); + codegen_->AllocateLocations(current); LocationSummary* locations = current->GetLocations(); if (locations != nullptr && locations->Out().IsValid()) { instructions_from_ssa_index_.Add(current); |