summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc540
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc75
-rw-r--r--compiler/optimizing/builder.h2
-rw-r--r--compiler/optimizing/code_generator.cc88
-rw-r--r--compiler/optimizing/code_generator.h80
-rw-r--r--compiler/optimizing/code_generator_arm.cc275
-rw-r--r--compiler/optimizing/code_generator_arm.h26
-rw-r--r--compiler/optimizing/code_generator_arm64.cc151
-rw-r--r--compiler/optimizing/code_generator_arm64.h32
-rw-r--r--compiler/optimizing/code_generator_x86.cc58
-rw-r--r--compiler/optimizing/code_generator_x86.h2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc110
-rw-r--r--compiler/optimizing/code_generator_x86_64.h2
-rw-r--r--compiler/optimizing/dominator_test.cc2
-rw-r--r--compiler/optimizing/find_loops_test.cc2
-rw-r--r--compiler/optimizing/graph_checker.cc52
-rw-r--r--compiler/optimizing/graph_test.cc2
-rw-r--r--compiler/optimizing/graph_visualizer.cc4
-rw-r--r--compiler/optimizing/gvn.cc2
-rw-r--r--compiler/optimizing/gvn_test.cc2
-rw-r--r--compiler/optimizing/inliner.cc6
-rw-r--r--compiler/optimizing/inliner.h4
-rw-r--r--compiler/optimizing/instruction_simplifier.cc46
-rw-r--r--compiler/optimizing/instruction_simplifier.h7
-rw-r--r--compiler/optimizing/intrinsics_arm.cc883
-rw-r--r--compiler/optimizing/intrinsics_arm.h88
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc2
-rw-r--r--compiler/optimizing/licm.cc3
-rw-r--r--compiler/optimizing/linearize_test.cc2
-rw-r--r--compiler/optimizing/live_interval_test.cc2
-rw-r--r--compiler/optimizing/live_ranges_test.cc2
-rw-r--r--compiler/optimizing/liveness_test.cc2
-rw-r--r--compiler/optimizing/locations.cc7
-rw-r--r--compiler/optimizing/locations.h20
-rw-r--r--compiler/optimizing/nodes.cc96
-rw-r--r--compiler/optimizing/nodes.h373
-rw-r--r--compiler/optimizing/nodes_test.cc2
-rw-r--r--compiler/optimizing/optimization.cc6
-rw-r--r--compiler/optimizing/optimization.h9
-rw-r--r--compiler/optimizing/optimizing_compiler.cc51
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h4
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc115
-rw-r--r--compiler/optimizing/parallel_move_resolver.h10
-rw-r--r--compiler/optimizing/parallel_move_test.cc90
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc5
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/pretty_printer_test.cc2
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc30
-rw-r--r--compiler/optimizing/reference_type_propagation.cc254
-rw-r--r--compiler/optimizing/reference_type_propagation.h38
-rw-r--r--compiler/optimizing/register_allocator.cc110
-rw-r--r--compiler/optimizing/register_allocator.h15
-rw-r--r--compiler/optimizing/register_allocator_test.cc2
-rw-r--r--compiler/optimizing/ssa_builder.cc69
-rw-r--r--compiler/optimizing/ssa_builder.h2
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc59
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc43
-rw-r--r--compiler/optimizing/ssa_phi_elimination.h3
-rw-r--r--compiler/optimizing/ssa_test.cc2
-rw-r--r--compiler/optimizing/stack_map_stream.h31
-rw-r--r--compiler/optimizing/stack_map_test.cc2
61 files changed, 3185 insertions, 820 deletions
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d6c3515726..811a3bdf0c 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_containers.h"
#include "bounds_check_elimination.h"
#include "nodes.h"
-#include "utils/arena_containers.h"
namespace art {
@@ -28,18 +28,11 @@ class MonotonicValueRange;
*/
class ValueBound : public ValueObject {
public:
- ValueBound(HInstruction* instruction, int constant) {
+ ValueBound(HInstruction* instruction, int32_t constant) {
if (instruction != nullptr && instruction->IsIntConstant()) {
- // Normalizing ValueBound with constant instruction.
- int instr_const = instruction->AsIntConstant()->GetValue();
- if (constant >= 0 && (instr_const <= INT_MAX - constant)) {
- // No overflow.
- instruction_ = nullptr;
- constant_ = instr_const + constant;
- return;
- }
- if (constant < 0 && (instr_const >= INT_MIN - constant)) {
- // No underflow.
+ // Normalize ValueBound with constant instruction.
+ int32_t instr_const = instruction->AsIntConstant()->GetValue();
+ if (!WouldAddOverflowOrUnderflow(instr_const, constant)) {
instruction_ = nullptr;
constant_ = instr_const + constant;
return;
@@ -49,6 +42,41 @@ class ValueBound : public ValueObject {
constant_ = constant;
}
+ // Return whether (left + right) overflows or underflows.
+ static bool WouldAddOverflowOrUnderflow(int32_t left, int32_t right) {
+ if (right == 0) {
+ return false;
+ }
+ if ((right > 0) && (left <= INT_MAX - right)) {
+ // No overflow.
+ return false;
+ }
+ if ((right < 0) && (left >= INT_MIN - right)) {
+ // No underflow.
+ return false;
+ }
+ return true;
+ }
+
+ static bool IsAddOrSubAConstant(HInstruction* instruction,
+ HInstruction** left_instruction,
+ int* right_constant) {
+ if (instruction->IsAdd() || instruction->IsSub()) {
+ HBinaryOperation* bin_op = instruction->AsBinaryOperation();
+ HInstruction* left = bin_op->GetLeft();
+ HInstruction* right = bin_op->GetRight();
+ if (right->IsIntConstant()) {
+ *left_instruction = left;
+ int32_t c = right->AsIntConstant()->GetValue();
+ *right_constant = instruction->IsAdd() ? c : -c;
+ return true;
+ }
+ }
+ *left_instruction = nullptr;
+ *right_constant = 0;
+ return false;
+ }
+
// Try to detect useful value bound format from an instruction, e.g.
// a constant or array length related value.
static ValueBound DetectValueBoundFromValue(HInstruction* instruction, bool* found) {
@@ -63,13 +91,12 @@ class ValueBound : public ValueObject {
return ValueBound(instruction, 0);
}
// Try to detect (array.length + c) format.
- if (instruction->IsAdd()) {
- HAdd* add = instruction->AsAdd();
- HInstruction* left = add->GetLeft();
- HInstruction* right = add->GetRight();
- if (left->IsArrayLength() && right->IsIntConstant()) {
+ HInstruction *left;
+ int32_t right;
+ if (IsAddOrSubAConstant(instruction, &left, &right)) {
+ if (left->IsArrayLength()) {
*found = true;
- return ValueBound(left, right->AsIntConstant()->GetValue());
+ return ValueBound(left, right);
}
}
@@ -79,10 +106,13 @@ class ValueBound : public ValueObject {
}
HInstruction* GetInstruction() const { return instruction_; }
- int GetConstant() const { return constant_; }
+ int32_t GetConstant() const { return constant_; }
- bool IsRelativeToArrayLength() const {
- return instruction_ != nullptr && instruction_->IsArrayLength();
+ bool IsRelatedToArrayLength() const {
+ // Some bounds are created with HNewArray* as the instruction instead
+ // of HArrayLength*. They are treated the same.
+ return (instruction_ != nullptr) &&
+ (instruction_->IsArrayLength() || instruction_->IsNewArray());
}
bool IsConstant() const {
@@ -96,54 +126,45 @@ class ValueBound : public ValueObject {
return instruction_ == bound.instruction_ && constant_ == bound.constant_;
}
- // Returns if it's certain bound1 >= bound2.
- bool GreaterThanOrEqual(ValueBound bound) const {
- if (instruction_ == bound.instruction_) {
- if (instruction_ == nullptr) {
- // Pure constant.
- return constant_ >= bound.constant_;
- }
- // There might be overflow/underflow. Be conservative for now.
- return false;
+ static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) {
+ // Null check on the NewArray should have been eliminated by instruction
+ // simplifier already.
+ if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) {
+ return instruction->InputAt(0)->AsNewArray();
}
- // Not comparable. Just return false.
- return false;
+ return instruction;
}
- // Returns if it's certain bound1 <= bound2.
- bool LessThanOrEqual(ValueBound bound) const {
- if (instruction_ == bound.instruction_) {
- if (instruction_ == nullptr) {
- // Pure constant.
- return constant_ <= bound.constant_;
- }
- if (IsRelativeToArrayLength()) {
- // Array length is guaranteed to be no less than 0.
- // No overflow/underflow can happen if both constants are negative.
- if (constant_ <= 0 && bound.constant_ <= 0) {
- return constant_ <= bound.constant_;
- }
- // There might be overflow/underflow. Be conservative for now.
- return false;
- }
+ static bool Equal(HInstruction* instruction1, HInstruction* instruction2) {
+ if (instruction1 == instruction2) {
+ return true;
}
- // In case the array length is some constant, we can
- // still compare.
- if (IsConstant() && bound.IsRelativeToArrayLength()) {
- HInstruction* array = bound.GetInstruction()->AsArrayLength()->InputAt(0);
- if (array->IsNullCheck()) {
- array = array->AsNullCheck()->InputAt(0);
- }
- if (array->IsNewArray()) {
- HInstruction* len = array->InputAt(0);
- if (len->IsIntConstant()) {
- int len_const = len->AsIntConstant()->GetValue();
- return constant_ <= len_const + bound.GetConstant();
- }
- }
+ if (instruction1 == nullptr || instruction2 == nullptr) {
+ return false;
}
+ // Some bounds are created with HNewArray* as the instruction instead
+ // of HArrayLength*. They are treated the same.
+ instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1);
+ instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2);
+ return instruction1 == instruction2;
+ }
+
+ // Returns if it's certain this->bound >= `bound`.
+ bool GreaterThanOrEqualTo(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ >= bound.constant_;
+ }
+ // Not comparable. Just return false.
+ return false;
+ }
+
+ // Returns if it's certain this->bound <= `bound`.
+ bool LessThanOrEqualTo(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ <= bound.constant_;
+ }
// Not comparable. Just return false.
return false;
}
@@ -151,10 +172,11 @@ class ValueBound : public ValueObject {
// Try to narrow lower bound. Returns the greatest of the two if possible.
// Pick one if they are not comparable.
static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
- if (bound1.instruction_ == bound2.instruction_) {
- // Same instruction, compare the constant part.
- return ValueBound(bound1.instruction_,
- std::max(bound1.constant_, bound2.constant_));
+ if (bound1.GreaterThanOrEqualTo(bound2)) {
+ return bound1;
+ }
+ if (bound2.GreaterThanOrEqualTo(bound1)) {
+ return bound2;
}
// Not comparable. Just pick one. We may lose some info, but that's ok.
@@ -165,58 +187,71 @@ class ValueBound : public ValueObject {
// Try to narrow upper bound. Returns the lowest of the two if possible.
// Pick one if they are not comparable.
static ValueBound NarrowUpperBound(ValueBound bound1, ValueBound bound2) {
- if (bound1.instruction_ == bound2.instruction_) {
- // Same instruction, compare the constant part.
- return ValueBound(bound1.instruction_,
- std::min(bound1.constant_, bound2.constant_));
+ if (bound1.LessThanOrEqualTo(bound2)) {
+ return bound1;
+ }
+ if (bound2.LessThanOrEqualTo(bound1)) {
+ return bound2;
}
// Not comparable. Just pick one. We may lose some info, but that's ok.
// Favor array length as upper bound.
- return bound1.IsRelativeToArrayLength() ? bound1 : bound2;
+ return bound1.IsRelatedToArrayLength() ? bound1 : bound2;
}
- // Add a constant to a ValueBound. If the constant part of the ValueBound
- // overflows/underflows, then we can't accurately represent it. For correctness,
- // just return Max/Min() depending on whether the returned ValueBound is used for
- // lower/upper bound.
- ValueBound Add(int c, bool* overflow_or_underflow) const {
- *overflow_or_underflow = false;
+ // Add a constant to a ValueBound.
+ // `overflow` or `underflow` will return whether the resulting bound may
+ // overflow or underflow an int.
+ ValueBound Add(int32_t c, bool* overflow, bool* underflow) const {
+ *overflow = *underflow = false;
if (c == 0) {
return *this;
}
- int new_constant;
+ int32_t new_constant;
if (c > 0) {
if (constant_ > INT_MAX - c) {
- // Constant part overflows.
- *overflow_or_underflow = true;
+ *overflow = true;
return Max();
- } else {
- new_constant = constant_ + c;
}
+
+ new_constant = constant_ + c;
+ // (array.length + non-positive-constant) won't overflow an int.
+ if (IsConstant() || (IsRelatedToArrayLength() && new_constant <= 0)) {
+ return ValueBound(instruction_, new_constant);
+ }
+ // Be conservative.
+ *overflow = true;
+ return Max();
} else {
if (constant_ < INT_MIN - c) {
- // Constant part underflows.
- *overflow_or_underflow = true;
- return Max();
- } else {
- new_constant = constant_ + c;
+ *underflow = true;
+ return Min();
}
+
+ new_constant = constant_ + c;
+ // Regardless of the value new_constant, (array.length+new_constant) will
+ // never underflow since array.length is no less than 0.
+ if (IsConstant() || IsRelatedToArrayLength()) {
+ return ValueBound(instruction_, new_constant);
+ }
+ // Be conservative.
+ *underflow = true;
+ return Min();
}
return ValueBound(instruction_, new_constant);
}
private:
HInstruction* instruction_;
- int constant_;
+ int32_t constant_;
};
/**
* Represent a range of lower bound and upper bound, both being inclusive.
* Currently a ValueRange may be generated as a result of the following:
* comparisons related to array bounds, array bounds check, add/sub on top
- * of an existing value range, or a loop phi corresponding to an
+ * of an existing value range, NewArray or a loop phi corresponding to an
* incrementing/decrementing array index (MonotonicValueRange).
*/
class ValueRange : public ArenaObject<kArenaAllocMisc> {
@@ -241,8 +276,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
return true;
}
DCHECK(!other_range->IsMonotonicValueRange());
- return lower_.GreaterThanOrEqual(other_range->lower_) &&
- upper_.LessThanOrEqual(other_range->upper_);
+ return lower_.GreaterThanOrEqualTo(other_range->lower_) &&
+ upper_.LessThanOrEqualTo(other_range->upper_);
}
// Returns the intersection of this and range.
@@ -263,29 +298,24 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> {
ValueBound::NarrowUpperBound(upper_, range->upper_));
}
- // Shift a range by a constant. If either bound can't be represented
- // as (instruction+c) format due to possible overflow/underflow,
- // return the full integer range.
- ValueRange* Add(int constant) const {
- bool overflow_or_underflow;
- ValueBound lower = lower_.Add(constant, &overflow_or_underflow);
- if (overflow_or_underflow) {
- // We can't accurately represent the bounds anymore.
- return FullIntRange();
+ // Shift a range by a constant.
+ ValueRange* Add(int32_t constant) const {
+ bool overflow, underflow;
+ ValueBound lower = lower_.Add(constant, &overflow, &underflow);
+ if (underflow) {
+ // Lower bound underflow will wrap around to positive values
+ // and invalidate the upper bound.
+ return nullptr;
}
- ValueBound upper = upper_.Add(constant, &overflow_or_underflow);
- if (overflow_or_underflow) {
- // We can't accurately represent the bounds anymore.
- return FullIntRange();
+ ValueBound upper = upper_.Add(constant, &overflow, &underflow);
+ if (overflow) {
+ // Upper bound overflow will wrap around to negative values
+ // and invalidate the lower bound.
+ return nullptr;
}
return new (allocator_) ValueRange(allocator_, lower, upper);
}
- // Return [INT_MIN, INT_MAX].
- ValueRange* FullIntRange() const {
- return new (allocator_) ValueRange(allocator_, ValueBound::Min(), ValueBound::Max());
- }
-
private:
ArenaAllocator* const allocator_;
const ValueBound lower_; // inclusive
@@ -304,7 +334,7 @@ class MonotonicValueRange : public ValueRange {
public:
MonotonicValueRange(ArenaAllocator* allocator,
HInstruction* initial,
- int increment,
+ int32_t increment,
ValueBound bound)
// To be conservative, give it full range [INT_MIN, INT_MAX] in case it's
// used as a regular value range, due to possible overflow/underflow.
@@ -343,23 +373,17 @@ class MonotonicValueRange : public ValueRange {
// make assumptions about the max array length, e.g. due to the max heap size,
// divided by the element size (such as 4 bytes for each integer array), we can
// lower this number and rule out some possible overflows.
- int max_array_len = INT_MAX;
-
- int upper = INT_MAX;
- if (range->GetUpper().IsConstant()) {
- upper = range->GetUpper().GetConstant();
- } else if (range->GetUpper().IsRelativeToArrayLength()) {
- int constant = range->GetUpper().GetConstant();
- if (constant <= 0) {
- // Normal case. e.g. <= array.length - 1, <= array.length - 2, etc.
- upper = max_array_len + constant;
- } else {
- // There might be overflow. Give up narrowing.
- return this;
- }
- } else {
- // There might be overflow. Give up narrowing.
- return this;
+ int32_t max_array_len = INT_MAX;
+
+ // max possible integer value of range's upper value.
+ int32_t upper = INT_MAX;
+ // Try to lower upper.
+ ValueBound upper_bound = range->GetUpper();
+ if (upper_bound.IsConstant()) {
+ upper = upper_bound.GetConstant();
+ } else if (upper_bound.IsRelatedToArrayLength() && upper_bound.GetConstant() <= 0) {
+ // Normal case. e.g. <= array.length - 1.
+ upper = max_array_len + upper_bound.GetConstant();
}
// If we can prove for the last number in sequence of initial_,
@@ -368,13 +392,13 @@ class MonotonicValueRange : public ValueRange {
// then this MonoticValueRange is narrowed to a normal value range.
// Be conservative first, assume last number in the sequence hits upper.
- int last_num_in_sequence = upper;
+ int32_t last_num_in_sequence = upper;
if (initial_->IsIntConstant()) {
- int initial_constant = initial_->AsIntConstant()->GetValue();
+ int32_t initial_constant = initial_->AsIntConstant()->GetValue();
if (upper <= initial_constant) {
last_num_in_sequence = upper;
} else {
- // Cast to int64_t for the substraction part to avoid int overflow.
+ // Cast to int64_t for the substraction part to avoid int32_t overflow.
last_num_in_sequence = initial_constant +
((int64_t)upper - (int64_t)initial_constant) / increment_ * increment_;
}
@@ -392,23 +416,22 @@ class MonotonicValueRange : public ValueRange {
ValueBound upper = ValueBound::NarrowUpperBound(bound_, range->GetUpper());
// Need to take care of underflow. Try to prove underflow won't happen
- // for common cases. Basically need to be able to prove for any value
- // that's >= range->GetLower(), it won't be positive with value+increment.
+ // for common cases.
if (range->GetLower().IsConstant()) {
- int constant = range->GetLower().GetConstant();
+ int32_t constant = range->GetLower().GetConstant();
if (constant >= INT_MIN - increment_) {
return new (GetAllocator()) ValueRange(GetAllocator(), range->GetLower(), upper);
}
}
- // There might be underflow. Give up narrowing.
+ // For non-constant lower bound, just assume might be underflow. Give up narrowing.
return this;
}
}
private:
HInstruction* const initial_;
- const int increment_;
+ const int32_t increment_;
ValueBound bound_; // Additional value bound info for initial_;
DISALLOW_COPY_AND_ASSIGN(MonotonicValueRange);
@@ -446,13 +469,26 @@ class BCEVisitor : public HGraphVisitor {
return nullptr;
}
- // Narrow the value range of 'instruction' at the end of 'basic_block' with 'range',
- // and push the narrowed value range to 'successor'.
+ // Narrow the value range of `instruction` at the end of `basic_block` with `range`,
+ // and push the narrowed value range to `successor`.
void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
- HBasicBlock* successor, ValueRange* range) {
+ HBasicBlock* successor, ValueRange* range) {
ValueRange* existing_range = LookupValueRange(instruction, basic_block);
- ValueRange* narrowed_range = (existing_range == nullptr) ?
- range : existing_range->Narrow(range);
+ if (existing_range == nullptr) {
+ if (range != nullptr) {
+ GetValueRangeMap(successor)->Overwrite(instruction->GetId(), range);
+ }
+ return;
+ }
+ if (existing_range->IsMonotonicValueRange()) {
+ DCHECK(instruction->IsLoopHeaderPhi());
+ // Make sure the comparison is in the loop header so each increment is
+ // checked with a comparison.
+ if (instruction->GetBlock() != basic_block) {
+ return;
+ }
+ }
+ ValueRange* narrowed_range = existing_range->Narrow(range);
if (narrowed_range != nullptr) {
GetValueRangeMap(successor)->Overwrite(instruction->GetId(), narrowed_range);
}
@@ -472,10 +508,12 @@ class BCEVisitor : public HGraphVisitor {
bool found;
ValueBound bound = ValueBound::DetectValueBoundFromValue(right, &found);
+ // Each comparison can establish a lower bound and an upper bound
+ // for the left hand side.
ValueBound lower = bound;
ValueBound upper = bound;
if (!found) {
- // No constant or array.length+c bound found.
+ // No constant or array.length+c format bound found.
// For i<j, we can still use j's upper bound as i's upper bound. Same for lower.
ValueRange* range = LookupValueRange(right, block);
if (range != nullptr) {
@@ -487,13 +525,13 @@ class BCEVisitor : public HGraphVisitor {
}
}
- bool overflow_or_underflow;
+ bool overflow, underflow;
if (cond == kCondLT || cond == kCondLE) {
if (!upper.Equals(ValueBound::Max())) {
- int compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
- ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_upper = ValueBound::Max();
+ int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -501,11 +539,11 @@ class BCEVisitor : public HGraphVisitor {
}
// array.length as a lower bound isn't considered useful.
- if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
- int compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive
- ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_lower = ValueBound::Min();
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+ int32_t compensation = (cond == kCondLE) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -513,11 +551,11 @@ class BCEVisitor : public HGraphVisitor {
}
} else if (cond == kCondGT || cond == kCondGE) {
// array.length as a lower bound isn't considered useful.
- if (!lower.Equals(ValueBound::Min()) && !lower.IsRelativeToArrayLength()) {
- int compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
- ValueBound new_lower = lower.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_lower = ValueBound::Min();
+ if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
+ int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
+ ValueBound new_lower = lower.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max());
@@ -525,10 +563,10 @@ class BCEVisitor : public HGraphVisitor {
}
if (!upper.Equals(ValueBound::Max())) {
- int compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive
- ValueBound new_upper = upper.Add(compensation, &overflow_or_underflow);
- if (overflow_or_underflow) {
- new_upper = ValueBound::Max();
+ int32_t compensation = (cond == kCondGE) ? -1 : 0; // upper bound is inclusive
+ ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
+ if (overflow || underflow) {
+ return;
}
ValueRange* new_range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper);
@@ -541,41 +579,56 @@ class BCEVisitor : public HGraphVisitor {
HBasicBlock* block = bounds_check->GetBlock();
HInstruction* index = bounds_check->InputAt(0);
HInstruction* array_length = bounds_check->InputAt(1);
- ValueRange* index_range = LookupValueRange(index, block);
-
- if (index_range != nullptr) {
- ValueBound lower = ValueBound(nullptr, 0); // constant 0
- ValueBound upper = ValueBound(array_length, -1); // array_length - 1
- ValueRange* array_range = new (GetGraph()->GetArena())
- ValueRange(GetGraph()->GetArena(), lower, upper);
- if (index_range->FitsIn(array_range)) {
- ReplaceBoundsCheck(bounds_check, index);
+ DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength());
+
+ if (!index->IsIntConstant()) {
+ ValueRange* index_range = LookupValueRange(index, block);
+ if (index_range != nullptr) {
+ ValueBound lower = ValueBound(nullptr, 0); // constant 0
+ ValueBound upper = ValueBound(array_length, -1); // array_length - 1
+ ValueRange* array_range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ if (index_range->FitsIn(array_range)) {
+ ReplaceBoundsCheck(bounds_check, index);
+ return;
+ }
+ }
+ } else {
+ int32_t constant = index->AsIntConstant()->GetValue();
+ if (constant < 0) {
+ // Will always throw exception.
+ return;
+ }
+ if (array_length->IsIntConstant()) {
+ if (constant < array_length->AsIntConstant()->GetValue()) {
+ ReplaceBoundsCheck(bounds_check, index);
+ }
return;
}
- }
- if (index->IsIntConstant()) {
- ValueRange* array_length_range = LookupValueRange(array_length, block);
- int constant = index->AsIntConstant()->GetValue();
- if (array_length_range != nullptr &&
- array_length_range->GetLower().IsConstant()) {
- if (constant < array_length_range->GetLower().GetConstant()) {
+ DCHECK(array_length->IsArrayLength());
+ ValueRange* existing_range = LookupValueRange(array_length, block);
+ if (existing_range != nullptr) {
+ ValueBound lower = existing_range->GetLower();
+ DCHECK(lower.IsConstant());
+ if (constant < lower.GetConstant()) {
ReplaceBoundsCheck(bounds_check, index);
return;
+ } else {
+ // Existing range isn't strong enough to eliminate the bounds check.
+ // Fall through to update the array_length range with info from this
+ // bounds check.
}
}
// Once we have an array access like 'array[5] = 1', we record array.length >= 6.
+ // We currently don't do it for non-constant index since a valid array[i] can't prove
+ // a valid array[i-1] yet due to the lower bound side.
ValueBound lower = ValueBound(nullptr, constant + 1);
ValueBound upper = ValueBound::Max();
ValueRange* range = new (GetGraph()->GetArena())
ValueRange(GetGraph()->GetArena(), lower, upper);
- ValueRange* existing_range = LookupValueRange(array_length, block);
- ValueRange* new_range = range;
- if (existing_range != nullptr) {
- new_range = range->Narrow(existing_range);
- }
- GetValueRangeMap(block)->Overwrite(array_length->GetId(), new_range);
+ GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
}
}
@@ -588,14 +641,12 @@ class BCEVisitor : public HGraphVisitor {
if (phi->IsLoopHeaderPhi() && phi->GetType() == Primitive::kPrimInt) {
DCHECK_EQ(phi->InputCount(), 2U);
HInstruction* instruction = phi->InputAt(1);
- if (instruction->IsAdd()) {
- HAdd* add = instruction->AsAdd();
- HInstruction* left = add->GetLeft();
- HInstruction* right = add->GetRight();
- if (left == phi && right->IsIntConstant()) {
+ HInstruction *left;
+ int32_t increment;
+ if (ValueBound::IsAddOrSubAConstant(instruction, &left, &increment)) {
+ if (left == phi) {
HInstruction* initial_value = phi->InputAt(0);
ValueRange* range = nullptr;
- int increment = right->AsIntConstant()->GetValue();
if (increment == 0) {
// Add constant 0. It's really a fixed value.
range = new (GetGraph()->GetArena()) ValueRange(
@@ -676,29 +727,122 @@ class BCEVisitor : public HGraphVisitor {
// Here we are interested in the typical triangular case of nested loops,
// such as the inner loop 'for (int j=0; j<array.length-i; j++)' where i
// is the index for outer loop. In this case, we know j is bounded by array.length-1.
+
+ // Try to handle (array.length - i) or (array.length + c - i) format.
+ HInstruction* left_of_left; // left input of left.
+ int32_t right_const = 0;
+ if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &right_const)) {
+ left = left_of_left;
+ }
+ // The value of left input of the sub equals (left + right_const).
+
if (left->IsArrayLength()) {
HInstruction* array_length = left->AsArrayLength();
ValueRange* right_range = LookupValueRange(right, sub->GetBlock());
if (right_range != nullptr) {
ValueBound lower = right_range->GetLower();
ValueBound upper = right_range->GetUpper();
- if (lower.IsConstant() && upper.IsRelativeToArrayLength()) {
+ if (lower.IsConstant() && upper.IsRelatedToArrayLength()) {
HInstruction* upper_inst = upper.GetInstruction();
- if (upper_inst->IsArrayLength() &&
- upper_inst->AsArrayLength() == array_length) {
- // (array.length - v) where v is in [c1, array.length + c2]
- // gets [-c2, array.length - c1] as its value range.
- ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
- GetGraph()->GetArena(),
- ValueBound(nullptr, - upper.GetConstant()),
- ValueBound(array_length, - lower.GetConstant()));
- GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ // Make sure it's the same array.
+ if (ValueBound::Equal(array_length, upper_inst)) {
+ int32_t c0 = right_const;
+ int32_t c1 = lower.GetConstant();
+ int32_t c2 = upper.GetConstant();
+ // (array.length + c0 - v) where v is in [c1, array.length + c2]
+ // gets [c0 - c2, array.length + c0 - c1] as its value range.
+ if (!ValueBound::WouldAddOverflowOrUnderflow(c0, -c2) &&
+ !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) {
+ if ((c0 - c1) <= 0) {
+ // array.length + (c0 - c1) won't overflow/underflow.
+ ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound(nullptr, right_const - upper.GetConstant()),
+ ValueBound(array_length, right_const - lower.GetConstant()));
+ GetValueRangeMap(sub->GetBlock())->Overwrite(sub->GetId(), range);
+ }
+ }
}
}
}
}
}
+ void FindAndHandlePartialArrayLength(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsShr() || instruction->IsUShr());
+ HInstruction* right = instruction->GetRight();
+ int32_t right_const;
+ if (right->IsIntConstant()) {
+ right_const = right->AsIntConstant()->GetValue();
+ // Detect division by two or more.
+ if ((instruction->IsDiv() && right_const <= 1) ||
+ (instruction->IsShr() && right_const < 1) ||
+ (instruction->IsUShr() && right_const < 1)) {
+ return;
+ }
+ } else {
+ return;
+ }
+
+ // Try to handle array.length/2 or (array.length-1)/2 format.
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* left_of_left; // left input of left.
+ int32_t c = 0;
+ if (ValueBound::IsAddOrSubAConstant(left, &left_of_left, &c)) {
+ left = left_of_left;
+ }
+ // The value of left input of instruction equals (left + c).
+
+ // (array_length + 1) or smaller divided by two or more
+ // always generate a value in [INT_MIN, array_length].
+ // This is true even if array_length is INT_MAX.
+ if (left->IsArrayLength() && c <= 1) {
+ if (instruction->IsUShr() && c < 0) {
+ // Make sure for unsigned shift, left side is not negative.
+ // e.g. if array_length is 2, ((array_length - 3) >>> 2) is way bigger
+ // than array_length.
+ return;
+ }
+ ValueRange* range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound(nullptr, INT_MIN),
+ ValueBound(left, 0));
+ GetValueRangeMap(instruction->GetBlock())->Overwrite(instruction->GetId(), range);
+ }
+ }
+
+ void VisitDiv(HDiv* div) {
+ FindAndHandlePartialArrayLength(div);
+ }
+
+ void VisitShr(HShr* shr) {
+ FindAndHandlePartialArrayLength(shr);
+ }
+
+ void VisitUShr(HUShr* ushr) {
+ FindAndHandlePartialArrayLength(ushr);
+ }
+
+ void VisitNewArray(HNewArray* new_array) {
+ HInstruction* len = new_array->InputAt(0);
+ if (!len->IsIntConstant()) {
+ HInstruction *left;
+ int32_t right_const;
+ if (ValueBound::IsAddOrSubAConstant(len, &left, &right_const)) {
+ // (left + right_const) is used as size to new the array.
+ // We record "-right_const <= left <= new_array - right_const";
+ ValueBound lower = ValueBound(nullptr, -right_const);
+ // We use new_array for the bound instead of new_array.length,
+ // which isn't available as an instruction yet. new_array will
+ // be treated the same as new_array.length when it's used in a ValueBound.
+ ValueBound upper = ValueBound(new_array, -right_const);
+ ValueRange* range = new (GetGraph()->GetArena())
+ ValueRange(GetGraph()->GetArena(), lower, upper);
+ GetValueRangeMap(new_array->GetBlock())->Overwrite(left->GetId(), range);
+ }
+ }
+ }
+
std::vector<std::unique_ptr<ArenaSafeMap<int, ValueRange*>>> maps_;
DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 3dcb08d195..a298413d14 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -14,19 +14,22 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "bounds_check_elimination.h"
#include "builder.h"
#include "gvn.h"
+#include "instruction_simplifier.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
namespace art {
-static void RunGvn(HGraph* graph) {
+static void RunSimplifierAndGvn(HGraph* graph) {
+ InstructionSimplifier simplify(graph);
+ simplify.Run();
SideEffectsAnalysis side_effects(graph);
side_effects.Run();
GVNOptimization(graph, side_effects).Run();
@@ -127,7 +130,7 @@ TEST(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) {
block3->AddSuccessor(block4); // False successor
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check2));
@@ -202,7 +205,7 @@ TEST(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) {
block3->AddSuccessor(exit);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -277,7 +280,7 @@ TEST(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) {
block3->AddSuccessor(exit);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -351,7 +354,7 @@ TEST(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) {
exit->AddInstruction(new (&allocator) HExit());
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination(graph);
bounds_check_elimination.Run();
ASSERT_FALSE(IsRemoved(bounds_check5));
@@ -397,7 +400,6 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
HInstruction* array_length = new (allocator) HArrayLength(null_check);
HInstruction* cmp = nullptr;
@@ -413,6 +415,7 @@ static HGraph* BuildSSAGraph1(ArenaAllocator* allocator,
loop_header->AddInstruction(array_length);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
null_check = new (allocator) HNullCheck(parameter, 0);
array_length = new (allocator) HArrayLength(null_check);
@@ -450,7 +453,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -458,7 +461,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -466,7 +469,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
bounds_check_elimination_with_initial_minus_1.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -474,7 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -483,7 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// array[i] = 10; // Can't eliminate due to overflow concern. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_increment_2(graph);
bounds_check_elimination_with_increment_2.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -491,7 +494,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) {
// for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph);
bounds_check_elimination_with_increment_2_from_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -541,7 +544,6 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(array_length);
HInstruction* cmp = nullptr;
if (cond == kCondLE) {
cmp = new (allocator) HLessThanOrEqual(phi, constant_initial);
@@ -553,6 +555,7 @@ static HGraph* BuildSSAGraph2(ArenaAllocator* allocator,
loop_header->AddPhi(phi);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(array_length);
HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1);
null_check = new (allocator) HNullCheck(parameter, 0);
@@ -591,7 +594,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph2(&allocator, &bounds_check, 0);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -599,7 +602,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -607,7 +610,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, -1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph);
bounds_check_elimination_with_initial_minus_1.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -615,7 +618,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_less_than(graph);
bounds_check_elimination_with_less_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -623,13 +626,13 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) {
// for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph);
bounds_check_elimination_increment_minus_2.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
}
-// int[] array = new array[10];
+// int[] array = new int[10];
// for (int i=0; i<10; i+=increment) { array[i] = 10; }
static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
HInstruction** bounds_check,
@@ -669,7 +672,6 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* cmp = nullptr;
if (cond == kCondGE) {
cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10);
@@ -681,6 +683,7 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator,
loop_header->AddPhi(phi);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
HNullCheck* null_check = new (allocator) HNullCheck(new_array, 0);
HArrayLength* array_length = new (allocator) HArrayLength(null_check);
@@ -705,39 +708,39 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) {
ArenaPool pool;
ArenaAllocator allocator(&pool);
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. }
HInstruction* bounds_check = nullptr;
HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
- // int[] array = new array[10];
+ // int[] array = new int[10];
// for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. }
graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_increment_8(graph);
bounds_check_elimination_increment_8.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -782,7 +785,6 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
loop_body->AddSuccessor(loop_header);
HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt);
- phi->AddInput(constant_initial);
HInstruction* null_check = new (allocator) HNullCheck(parameter, 0);
HInstruction* array_length = new (allocator) HArrayLength(null_check);
HInstruction* cmp = nullptr;
@@ -797,6 +799,7 @@ static HGraph* BuildSSAGraph4(ArenaAllocator* allocator,
loop_header->AddInstruction(array_length);
loop_header->AddInstruction(cmp);
loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_initial);
null_check = new (allocator) HNullCheck(parameter, 0);
array_length = new (allocator) HArrayLength(null_check);
@@ -838,7 +841,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// HArrayLength which uses the null check as its input.
graph = BuildSSAGraph4(&allocator, &bounds_check, 0);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_after_gvn(graph);
bounds_check_elimination_after_gvn.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -846,7 +849,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. }
graph = BuildSSAGraph4(&allocator, &bounds_check, 1);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_initial_1(graph);
bounds_check_elimination_with_initial_1.Run();
ASSERT_TRUE(IsRemoved(bounds_check));
@@ -854,7 +857,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) {
// for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. }
graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
BoundsCheckElimination bounds_check_elimination_with_greater_than(graph);
bounds_check_elimination_with_greater_than.Run();
ASSERT_FALSE(IsRemoved(bounds_check));
@@ -901,7 +904,6 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
HBasicBlock* outer_header = new (&allocator) HBasicBlock(graph);
graph->AddBlock(outer_header);
HPhi* phi_i = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
- phi_i->AddInput(constant_0);
HNullCheck* null_check = new (&allocator) HNullCheck(parameter, 0);
HArrayLength* array_length = new (&allocator) HArrayLength(null_check);
HAdd* add = new (&allocator) HAdd(Primitive::kPrimInt, array_length, constant_minus_1);
@@ -913,11 +915,11 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
outer_header->AddInstruction(add);
outer_header->AddInstruction(cmp);
outer_header->AddInstruction(if_inst);
+ phi_i->AddInput(constant_0);
HBasicBlock* inner_header = new (&allocator) HBasicBlock(graph);
graph->AddBlock(inner_header);
HPhi* phi_j = new (&allocator) HPhi(&allocator, 0, 0, Primitive::kPrimInt);
- phi_j->AddInput(constant_0);
null_check = new (&allocator) HNullCheck(parameter, 0);
array_length = new (&allocator) HArrayLength(null_check);
HSub* sub = new (&allocator) HSub(Primitive::kPrimInt, array_length, phi_i);
@@ -931,6 +933,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
inner_header->AddInstruction(add);
inner_header->AddInstruction(cmp);
inner_header->AddInstruction(if_inst);
+ phi_j->AddInput(constant_0);
HBasicBlock* inner_body_compare = new (&allocator) HBasicBlock(graph);
graph->AddBlock(inner_body_compare);
@@ -1030,7 +1033,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
outer_body_add->AddSuccessor(outer_header);
graph->BuildDominatorTree();
- RunGvn(graph);
+ RunSimplifierAndGvn(graph);
// gvn should remove the same bounds check.
ASSERT_FALSE(IsRemoved(bounds_check1));
ASSERT_FALSE(IsRemoved(bounds_check2));
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index c5101363ee..3e4a6169d9 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -17,13 +17,13 @@
#ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_
#define ART_COMPILER_OPTIMIZING_BUILDER_H_
+#include "base/arena_object.h"
#include "dex_file.h"
#include "dex_file-inl.h"
#include "driver/compiler_driver.h"
#include "driver/dex_compilation_unit.h"
#include "optimizing_compiler_stats.h"
#include "primitive.h"
-#include "utils/arena_object.h"
#include "utils/growable_array.h"
#include "nodes.h"
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index fd4e391470..2a57fdc929 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -40,9 +40,17 @@ size_t CodeGenerator::GetCacheOffset(uint32_t index) {
return mirror::ObjectArray<mirror::Object>::OffsetOfElement(index).SizeValue();
}
-void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
- DCHECK_EQ(frame_size_, kUninitializedFrameSize);
+static bool IsSingleGoto(HBasicBlock* block) {
+ HLoopInformation* loop_info = block->GetLoopInformation();
+ // TODO: Remove the null check b/19084197.
+ return (block->GetFirstInstruction() != nullptr)
+ && (block->GetFirstInstruction() == block->GetLastInstruction())
+ && block->GetLastInstruction()->IsGoto()
+ // Back edges generate the suspend check.
+ && (loop_info == nullptr || !loop_info->IsBackEdge(block));
+}
+void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
Initialize();
if (!is_leaf) {
MarkNotLeaf();
@@ -58,19 +66,43 @@ void CodeGenerator::CompileBaseline(CodeAllocator* allocator, bool is_leaf) {
CompileInternal(allocator, /* is_baseline */ true);
}
+bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
+ DCHECK_EQ(block_order_->Get(current_block_index_), current);
+ return GetNextBlockToEmit() == FirstNonEmptyBlock(next);
+}
+
+HBasicBlock* CodeGenerator::GetNextBlockToEmit() const {
+ for (size_t i = current_block_index_ + 1; i < block_order_->Size(); ++i) {
+ HBasicBlock* block = block_order_->Get(i);
+ if (!IsSingleGoto(block)) {
+ return block;
+ }
+ }
+ return nullptr;
+}
+
+HBasicBlock* CodeGenerator::FirstNonEmptyBlock(HBasicBlock* block) const {
+ while (IsSingleGoto(block)) {
+ block = block->GetSuccessors().Get(0);
+ }
+ return block;
+}
+
void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
- HGraphVisitor* location_builder = GetLocationBuilder();
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
HBasicBlock* block = block_order_->Get(current_block_index_);
+ // Don't generate code for an empty block. Its predecessors will branch to its successor
+ // directly. Also, the label of that block will not be emitted, so this helps catch
+ // errors where we reference that label.
+ if (IsSingleGoto(block)) continue;
Bind(block);
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (is_baseline) {
- current->Accept(location_builder);
- InitLocations(current);
+ InitLocationsBaseline(current);
}
current->Accept(instruction_visitor);
}
@@ -88,7 +120,6 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
void CodeGenerator::CompileOptimized(CodeAllocator* allocator) {
// The register allocator already called `InitializeCodeGeneration`,
// where the frame size has been computed.
- DCHECK_NE(frame_size_, kUninitializedFrameSize);
DCHECK(block_order_ != nullptr);
Initialize();
CompileInternal(allocator, /* is_baseline */ false);
@@ -138,13 +169,22 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
ComputeSpillMask();
first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize;
- SetFrameSize(RoundUp(
- number_of_spill_slots * kVRegSize
- + number_of_out_slots * kVRegSize
- + maximum_number_of_live_core_registers * GetWordSize()
- + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
- + FrameEntrySpillSize(),
- kStackAlignment));
+ if (number_of_spill_slots == 0
+ && !HasAllocatedCalleeSaveRegisters()
+ && IsLeafMethod()
+ && !RequiresCurrentMethod()) {
+ DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
+ DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+ SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
+ } else {
+ SetFrameSize(RoundUp(
+ number_of_spill_slots * kVRegSize
+ + number_of_out_slots * kVRegSize
+ + maximum_number_of_live_core_registers * GetWordSize()
+ + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+ + FrameEntrySpillSize(),
+ kStackAlignment));
+ }
}
Location CodeGenerator::GetTemporaryLocation(HTemporary* temp) const {
@@ -294,7 +334,8 @@ void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
}
}
-void CodeGenerator::InitLocations(HInstruction* instruction) {
+void CodeGenerator::InitLocationsBaseline(HInstruction* instruction) {
+ AllocateLocations(instruction);
if (instruction->GetLocations() == nullptr) {
if (instruction->IsTemporary()) {
HInstruction* previous = instruction->GetPrevious();
@@ -320,10 +361,17 @@ void CodeGenerator::InitLocations(HInstruction* instruction) {
}
}
-bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const {
- DCHECK_EQ(block_order_->Get(current_block_index_), current);
- return (current_block_index_ < block_order_->Size() - 1)
- && (block_order_->Get(current_block_index_ + 1) == next);
+void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+ instruction->Accept(GetLocationBuilder());
+ LocationSummary* locations = instruction->GetLocations();
+ if (!instruction->IsSuspendCheckEntry()) {
+ if (locations != nullptr && locations->CanCall()) {
+ MarkNotLeaf();
+ }
+ if (instruction->NeedsCurrentMethod()) {
+ SetRequiresCurrentMethod();
+ }
+ }
}
CodeGenerator* CodeGenerator::Create(HGraph* graph,
@@ -572,7 +620,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
Location location = locations->GetEnvironmentAt(i);
switch (location.GetKind()) {
case Location::kConstant: {
- DCHECK(current == location.GetConstant());
+ DCHECK_EQ(current, location.GetConstant());
if (current->IsLongConstant()) {
int64_t value = current->AsLongConstant()->GetValue();
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, Low32Bits(value));
@@ -588,6 +636,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc) {
} else if (current->IsIntConstant()) {
int32_t value = current->AsIntConstant()->GetValue();
stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, value);
+ } else if (current->IsNullConstant()) {
+ stack_map_stream_.AddDexRegisterEntry(DexRegisterMap::kConstant, 0);
} else {
DCHECK(current->IsFloatConstant());
int32_t value = bit_cast<float, int32_t>(current->AsFloatConstant()->GetValue());
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ab63b911b2..f46a36d02f 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,7 +30,6 @@
namespace art {
static size_t constexpr kVRegSize = 4;
-static size_t constexpr kUninitializedFrameSize = 0;
// Binary encoding of 2^32 for type double.
static int64_t constexpr k2Pow32EncodingForDouble = INT64_C(0x41F0000000000000);
@@ -92,6 +91,8 @@ class CodeGenerator {
HGraph* GetGraph() const { return graph_; }
+ HBasicBlock* GetNextBlockToEmit() const;
+ HBasicBlock* FirstNonEmptyBlock(HBasicBlock* block) const;
bool GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const;
size_t GetStackSlotOfParameter(HParameterValue* parameter) const {
@@ -107,8 +108,6 @@ class CodeGenerator {
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
- virtual HGraphVisitor* GetLocationBuilder() = 0;
- virtual HGraphVisitor* GetInstructionVisitor() = 0;
virtual Assembler* GetAssembler() = 0;
virtual size_t GetWordSize() const = 0;
virtual size_t GetFloatingPointSpillSlotSize() const = 0;
@@ -196,6 +195,15 @@ class CodeGenerator {
void MarkNotLeaf() {
is_leaf_ = false;
+ requires_current_method_ = true;
+ }
+
+ void SetRequiresCurrentMethod() {
+ requires_current_method_ = true;
+ }
+
+ bool RequiresCurrentMethod() const {
+ return requires_current_method_;
}
// Clears the spill slots taken by loop phis in the `LocationSummary` of the
@@ -228,6 +236,41 @@ class CodeGenerator {
allocated_registers_.Add(location);
}
+ void AllocateLocations(HInstruction* instruction);
+
+ // Tells whether the stack frame of the compiled method is
+ // considered "empty", that is either actually having a size of zero,
+ // or just containing the saved return address register.
+ bool HasEmptyFrame() const {
+ return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0);
+ }
+
+ static int32_t GetInt32ValueOf(HConstant* constant) {
+ if (constant->IsIntConstant()) {
+ return constant->AsIntConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ return 0;
+ } else {
+ DCHECK(constant->IsFloatConstant());
+ return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+ }
+ }
+
+ static int64_t GetInt64ValueOf(HConstant* constant) {
+ if (constant->IsIntConstant()) {
+ return constant->AsIntConstant()->GetValue();
+ } else if (constant->IsNullConstant()) {
+ return 0;
+ } else if (constant->IsFloatConstant()) {
+ return bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
+ } else if (constant->IsLongConstant()) {
+ return constant->AsLongConstant()->GetValue();
+ } else {
+ DCHECK(constant->IsDoubleConstant());
+ return bit_cast<double, int64_t>(constant->AsDoubleConstant()->GetValue());
+ }
+ }
+
protected:
CodeGenerator(HGraph* graph,
size_t number_of_core_registers,
@@ -236,7 +279,7 @@ class CodeGenerator {
uint32_t core_callee_save_mask,
uint32_t fpu_callee_save_mask,
const CompilerOptions& compiler_options)
- : frame_size_(kUninitializedFrameSize),
+ : frame_size_(0),
core_spill_mask_(0),
fpu_spill_mask_(0),
first_register_slot_in_slow_path_(0),
@@ -255,6 +298,7 @@ class CodeGenerator {
block_order_(nullptr),
current_block_index_(0),
is_leaf_(true),
+ requires_current_method_(false),
stack_map_stream_(graph->GetArena()) {}
// Register allocation logic.
@@ -269,11 +313,12 @@ class CodeGenerator {
virtual Location GetStackLocation(HLoadLocal* load) const = 0;
virtual ParallelMoveResolver* GetMoveResolver() = 0;
+ virtual HGraphVisitor* GetLocationBuilder() = 0;
+ virtual HGraphVisitor* GetInstructionVisitor() = 0;
// Returns the location of the first spilled entry for floating point registers,
// relative to the stack pointer.
uint32_t GetFpuSpillStart() const {
- DCHECK_NE(frame_size_, kUninitializedFrameSize);
return GetFrameSize() - FrameEntrySpillSize();
}
@@ -289,6 +334,25 @@ class CodeGenerator {
return GetFpuSpillSize() + GetCoreSpillSize();
}
+ bool HasAllocatedCalleeSaveRegisters() const {
+ // We check the core registers against 1 because it always comprises the return PC.
+ return (POPCOUNT(allocated_registers_.GetCoreRegisters() & core_callee_save_mask_) != 1)
+ || (POPCOUNT(allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_) != 0);
+ }
+
+ bool CallPushesPC() const {
+ InstructionSet instruction_set = GetInstructionSet();
+ return instruction_set == kX86 || instruction_set == kX86_64;
+ }
+
+ // Arm64 has its own type for a label, so we need to templatize this method
+ // to share the logic.
+ template <typename T>
+ T* CommonGetLabelOf(T* raw_pointer_to_labels_array, HBasicBlock* block) const {
+ block = FirstNonEmptyBlock(block);
+ return raw_pointer_to_labels_array + block->GetBlockId();
+ }
+
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
@@ -311,7 +375,7 @@ class CodeGenerator {
const uint32_t fpu_callee_save_mask_;
private:
- void InitLocations(HInstruction* instruction);
+ void InitLocationsBaseline(HInstruction* instruction);
size_t GetStackOffsetOfSavedRegister(size_t index);
void CompileInternal(CodeAllocator* allocator, bool is_baseline);
@@ -328,8 +392,12 @@ class CodeGenerator {
// we are generating code for.
size_t current_block_index_;
+ // Whether the method is a leaf method.
bool is_leaf_;
+ // Whether an instruction in the graph accesses the current method.
+ bool requires_current_method_;
+
StackMapStream stack_map_stream_;
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 78fd181dcf..e864ae1cec 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,8 @@
#include "arch/arm/instruction_set_features_arm.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "intrinsics.h"
+#include "intrinsics_arm.h"
#include "mirror/array-inl.h"
#include "mirror/art_method.h"
#include "mirror/class.h"
@@ -32,11 +34,6 @@ namespace art {
namespace arm {
-static DRegister FromLowSToD(SRegister reg) {
- DCHECK_EQ(reg % 2, 0);
- return static_cast<DRegister>(reg / 2);
-}
-
static bool ExpectedPairLayout(Location location) {
// We expected this for both core and fpu register pairs.
return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
@@ -58,6 +55,10 @@ static constexpr Register kCoreCalleeSaves[] =
static constexpr SRegister kFpuCalleeSaves[] =
{ S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
+// D31 cannot be split into two S registers, and the register allocator only works on
+// S registers. Therefore there is no need to block it.
+static constexpr DRegister DTMP = D31;
+
class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegister> {
public:
InvokeRuntimeCallingConvention()
@@ -73,20 +74,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, SRegis
#define __ reinterpret_cast<ArmAssembler*>(codegen->GetAssembler())->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
-class SlowPathCodeARM : public SlowPathCode {
- public:
- SlowPathCodeARM() : entry_label_(), exit_label_() {}
-
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
-
- private:
- Label entry_label_;
- Label exit_label_;
-
- DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
-};
-
class NullCheckSlowPathARM : public SlowPathCodeARM {
public:
explicit NullCheckSlowPathARM(HNullCheck* instruction) : instruction_(instruction) {}
@@ -396,10 +383,6 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
move_resolver_(graph->GetArena(), this),
assembler_(true),
isa_features_(isa_features) {
- // Save one extra register for baseline. Note that on thumb2, there is no easy
- // instruction to restore just the PC, so this actually helps both baseline
- // and non-baseline to save and restore at least two registers at entry and exit.
- AddAllocatedRegister(Location::RegisterLocation(kCoreSavedRegisterForBaseline));
// Save the PC register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(PC));
}
@@ -508,6 +491,10 @@ static uint32_t LeastSignificantBit(uint32_t mask) {
void CodeGeneratorARM::ComputeSpillMask() {
core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_;
+ // Save one extra register for baseline. Note that on thumb2, there is no easy
+ // instruction to restore just the PC, so this actually helps both baseline
+ // and non-baseline to save and restore at least two registers at entry and exit.
+ core_spill_mask_ |= (1 << kCoreSavedRegisterForBaseline);
DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved";
fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
// We use vpush and vpop for saving and restoring floating point registers, which take
@@ -529,6 +516,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
__ Bind(&frame_entry_label_);
+ if (HasEmptyFrame()) {
+ return;
+ }
+
if (!skip_overflow_check) {
__ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
__ LoadFromOffset(kLoadWord, IP, IP, 0);
@@ -547,6 +538,10 @@ void CodeGeneratorARM::GenerateFrameEntry() {
}
void CodeGeneratorARM::GenerateFrameExit() {
+ if (HasEmptyFrame()) {
+ __ bx(LR);
+ return;
+ }
__ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
@@ -784,8 +779,8 @@ void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstr
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- int32_t value = const_to_move->AsIntConstant()->GetValue();
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ int32_t value = GetInt32ValueOf(const_to_move);
if (location.IsRegister()) {
__ LoadImmediate(location.AsRegister<Register>(), value);
} else {
@@ -952,8 +947,8 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
__ cmp(left, ShifterOperand(locations->InAt(1).AsRegister<Register>()));
} else {
DCHECK(locations->InAt(1).IsConstant());
- int32_t value =
- locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ HConstant* constant = locations->InAt(1).GetConstant();
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
ShifterOperand operand;
if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, value, &operand)) {
__ cmp(left, operand);
@@ -1114,6 +1109,17 @@ void InstructionCodeGeneratorARM::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderARM::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderARM::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1168,44 +1174,37 @@ void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+ codegen_->GetInstructionSetFeatures());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void CodeGeneratorARM::LoadCurrentMethod(Register reg) {
+ DCHECK(RequiresCurrentMethod());
__ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
}
-void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
- Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
-
- // TODO: Implement all kinds of calls:
- // 1) boot -> boot
- // 2) app -> boot
- // 3) app -> app
- //
- // Currently we implement the app -> app logic, which looks up in the resolve cache.
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
+ if (invoke->GetLocations()->Intrinsified()) {
+ IntrinsicCodeGeneratorARM intrinsic(codegen);
+ intrinsic.Dispatch(invoke);
+ return true;
+ }
+ return false;
+}
- // temp = method;
- codegen_->LoadCurrentMethod(temp);
- if (!invoke->IsRecursive()) {
- // temp = temp->dex_cache_resolved_methods_;
- __ LoadFromOffset(
- kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
- // temp = temp[index_in_cache]
- __ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
- // LR = temp[offset_of_quick_compiled_code]
- __ LoadFromOffset(kLoadWord, LR, temp,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArmWordSize).Int32Value());
- // LR()
- __ blx(LR);
- } else {
- __ bl(codegen_->GetFrameEntryLabel());
+void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
}
- codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
- DCHECK(!codegen_->IsLeafMethod());
+ Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+
+ codegen_->GenerateStaticOrDirectCall(invoke, temp);
}
void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
@@ -1223,10 +1222,20 @@ void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
}
void LocationsBuilderARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ IntrinsicLocationsBuilderARM intrinsic(GetGraph()->GetArena(),
+ codegen_->GetInstructionSetFeatures());
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
+
Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
uint32_t method_offset = mirror::Class::EmbeddedVTableOffset().Uint32Value() +
invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry);
@@ -3366,16 +3375,44 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
__ StoreSToOffset(source.AsFpuRegister<SRegister>(), SP, destination.GetStackIndex());
}
} else if (source.IsDoubleStackSlot()) {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetStackIndex());
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
- __ LoadFromOffset(kLoadWord, IP, SP, source.GetHighStackIndex(kArmWordSize));
- __ StoreToOffset(kStoreWord, IP, SP, destination.GetHighStackIndex(kArmWordSize));
+ if (destination.IsDoubleStackSlot()) {
+ __ LoadDFromOffset(DTMP, SP, source.GetStackIndex());
+ __ StoreDToOffset(DTMP, SP, destination.GetStackIndex());
+ } else if (destination.IsRegisterPair()) {
+ DCHECK(ExpectedPairLayout(destination));
+ __ LoadFromOffset(
+ kLoadWordPair, destination.AsRegisterPairLow<Register>(), SP, source.GetStackIndex());
+ } else {
+ DCHECK(destination.IsFpuRegisterPair()) << destination;
+ __ LoadDFromOffset(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ SP,
+ source.GetStackIndex());
+ }
+ } else if (source.IsRegisterPair()) {
+ if (destination.IsRegisterPair()) {
+ __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>());
+ __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>());
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ DCHECK(ExpectedPairLayout(source));
+ __ StoreToOffset(
+ kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex());
+ }
+ } else if (source.IsFpuRegisterPair()) {
+ if (destination.IsFpuRegisterPair()) {
+ __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ __ StoreDToOffset(FromLowSToD(source.AsFpuRegisterPairLow<SRegister>()),
+ SP,
+ destination.GetStackIndex());
+ }
} else {
DCHECK(source.IsConstant()) << source;
- HInstruction* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- int32_t value = constant->AsIntConstant()->GetValue();
+ HConstant* constant = source.GetConstant();
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
if (destination.IsRegister()) {
__ LoadImmediate(destination.AsRegister<Register>(), value);
} else {
@@ -3385,17 +3422,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
}
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
- if (destination.IsRegister()) {
- // In the presence of long or double constants, the parallel move resolver will
- // split the move into two, but keeps the same constant for both moves. Here,
- // we use the low or high part depending on which register this move goes to.
- if (destination.reg() % 2 == 0) {
- __ LoadImmediate(destination.AsRegister<Register>(), Low32Bits(value));
- } else {
- __ LoadImmediate(destination.AsRegister<Register>(), High32Bits(value));
- }
+ if (destination.IsRegisterPair()) {
+ __ LoadImmediate(destination.AsRegisterPairLow<Register>(), Low32Bits(value));
+ __ LoadImmediate(destination.AsRegisterPairHigh<Register>(), High32Bits(value));
} else {
- DCHECK(destination.IsDoubleStackSlot());
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
__ LoadImmediate(IP, Low32Bits(value));
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
__ LoadImmediate(IP, High32Bits(value));
@@ -3403,20 +3434,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) {
}
} else if (constant->IsDoubleConstant()) {
double value = constant->AsDoubleConstant()->GetValue();
- uint64_t int_value = bit_cast<uint64_t, double>(value);
- if (destination.IsFpuRegister()) {
- // In the presence of long or double constants, the parallel move resolver will
- // split the move into two, but keeps the same constant for both moves. Here,
- // we use the low or high part depending on which register this move goes to.
- if (destination.reg() % 2 == 0) {
- __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
- bit_cast<float, uint32_t>(Low32Bits(int_value)));
- } else {
- __ LoadSImmediate(destination.AsFpuRegister<SRegister>(),
- bit_cast<float, uint32_t>(High32Bits(int_value)));
- }
+ if (destination.IsFpuRegisterPair()) {
+ __ LoadDImmediate(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), value);
} else {
- DCHECK(destination.IsDoubleStackSlot());
+ DCHECK(destination.IsDoubleStackSlot()) << destination;
+ uint64_t int_value = bit_cast<uint64_t, double>(value);
__ LoadImmediate(IP, Low32Bits(int_value));
__ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex());
__ LoadImmediate(IP, High32Bits(int_value));
@@ -3474,6 +3496,40 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
__ vmovrs(IP, source.AsFpuRegister<SRegister>());
__ vmovs(source.AsFpuRegister<SRegister>(), destination.AsFpuRegister<SRegister>());
__ vmovsr(destination.AsFpuRegister<SRegister>(), IP);
+ } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+ __ vmovdrr(DTMP, source.AsRegisterPairLow<Register>(), source.AsRegisterPairHigh<Register>());
+ __ Mov(source.AsRegisterPairLow<Register>(), destination.AsRegisterPairLow<Register>());
+ __ Mov(source.AsRegisterPairHigh<Register>(), destination.AsRegisterPairHigh<Register>());
+ __ vmovrrd(destination.AsRegisterPairLow<Register>(),
+ destination.AsRegisterPairHigh<Register>(),
+ DTMP);
+ } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+ Register low_reg = source.IsRegisterPair()
+ ? source.AsRegisterPairLow<Register>()
+ : destination.AsRegisterPairLow<Register>();
+ int mem = source.IsRegisterPair()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+ DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+ __ vmovdrr(DTMP, low_reg, static_cast<Register>(low_reg + 1));
+ __ LoadFromOffset(kLoadWordPair, low_reg, SP, mem);
+ __ StoreDToOffset(DTMP, SP, mem);
+ } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+ DRegister first = FromLowSToD(source.AsFpuRegisterPairLow<SRegister>());
+ DRegister second = FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+ __ vmovd(DTMP, first);
+ __ vmovd(first, second);
+ __ vmovd(second, DTMP);
+ } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+ DRegister reg = source.IsFpuRegisterPair()
+ ? FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())
+ : FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>());
+ int mem = source.IsFpuRegisterPair()
+ ? destination.GetStackIndex()
+ : source.GetStackIndex();
+ __ vmovd(DTMP, reg);
+ __ LoadDFromOffset(reg, SP, mem);
+ __ StoreDToOffset(DTMP, SP, mem);
} else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
SRegister reg = source.IsFpuRegister() ? source.AsFpuRegister<SRegister>()
: destination.AsFpuRegister<SRegister>();
@@ -3482,7 +3538,7 @@ void ParallelMoveResolverARM::EmitSwap(size_t index) {
: source.GetStackIndex();
__ vmovrs(IP, reg);
- __ LoadFromOffset(kLoadWord, IP, SP, mem);
+ __ LoadSFromOffset(reg, SP, mem);
__ StoreToOffset(kStoreWord, IP, SP, mem);
} else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
Exchange(source.GetStackIndex(), destination.GetStackIndex());
@@ -3776,5 +3832,50 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) {
+ DCHECK_EQ(temp, kArtMethodRegister);
+
+ // TODO: Implement all kinds of calls:
+ // 1) boot -> boot
+ // 2) app -> boot
+ // 3) app -> app
+ //
+ // Currently we implement the app -> app logic, which looks up in the resolve cache.
+
+ // temp = method;
+ LoadCurrentMethod(temp);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+ // temp = temp[index_in_cache]
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+ // LR = temp[offset_of_quick_compiled_code]
+ __ LoadFromOffset(kLoadWord, LR, temp,
+ mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmWordSize).Int32Value());
+ // LR()
+ __ blx(LR);
+ } else {
+ __ bl(GetFrameEntryLabel());
+ }
+
+ RecordPcInfo(invoke, invoke->GetDexPc());
+ DCHECK(!IsLeafMethod());
+}
+
+void LocationsBuilderARM::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 4b03dffd38..f1a3729c13 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -39,6 +39,14 @@ static constexpr SRegister kParameterFpuRegisters[] =
{ S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 };
static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+static constexpr Register kArtMethodRegister = R0;
+
+static constexpr DRegister FromLowSToD(SRegister reg) {
+ return DCHECK_CONSTEXPR(reg % 2 == 0, , D0)
+ static_cast<DRegister>(reg / 2);
+}
+
+
class InvokeDexCallingConvention : public CallingConvention<Register, SRegister> {
public:
InvokeDexCallingConvention()
@@ -90,6 +98,20 @@ class ParallelMoveResolverARM : public ParallelMoveResolver {
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM);
};
+class SlowPathCodeARM : public SlowPathCode {
+ public:
+ SlowPathCodeARM() : entry_label_(), exit_label_() {}
+
+ Label* GetEntryLabel() { return &entry_label_; }
+ Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+ Label entry_label_;
+ Label exit_label_;
+
+ DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM);
+};
+
class LocationsBuilderARM : public HGraphVisitor {
public:
LocationsBuilderARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -230,7 +252,7 @@ class CodeGeneratorARM : public CodeGenerator {
void MarkGCCard(Register temp, Register card, Register object, Register value);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
@@ -249,6 +271,8 @@ class CodeGeneratorARM : public CodeGenerator {
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+ void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3bc23fe4f3..0d7864fa35 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -402,15 +402,15 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com
kNumberOfAllocatableRegisters,
kNumberOfAllocatableFPRegisters,
kNumberOfAllocatableRegisterPairs,
- (1 << LR),
- 0,
+ callee_saved_core_registers.list(),
+ callee_saved_fp_registers.list(),
compiler_options),
block_labels_(nullptr),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this) {
// Save the link register (containing the return address) to mimic Quick.
- AddAllocatedRegister(Location::RegisterLocation(LR));
+ AddAllocatedRegister(LocationFrom(lr));
}
#undef __
@@ -448,27 +448,32 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireX();
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
+ __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64)));
__ Ldr(wzr, MemOperand(temp, 0));
RecordPcInfo(nullptr, 0);
}
- int frame_size = GetFrameSize();
- __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
-
- // Stack layout:
- // sp[frame_size - 8] : lr.
- // ... : other preserved registers.
- // sp[frame_size - regs_size]: first preserved register.
- // ... : reserved frame space.
- // sp[0] : current method.
+ if (!HasEmptyFrame()) {
+ int frame_size = GetFrameSize();
+ // Stack layout:
+ // sp[frame_size - 8] : lr.
+ // ... : other preserved core registers.
+ // ... : other preserved fp registers.
+ // ... : reserved frame space.
+ // sp[0] : current method.
+ __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+ __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ }
}
void CodeGeneratorARM64::GenerateFrameExit() {
- int frame_size = GetFrameSize();
- __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize());
- __ Drop(frame_size);
+ if (!HasEmptyFrame()) {
+ int frame_size = GetFrameSize();
+ __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ __ Drop(frame_size);
+ }
}
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
@@ -486,18 +491,21 @@ void CodeGeneratorARM64::Move(HInstruction* instruction,
Primitive::Type type = instruction->GetType();
DCHECK_NE(type, Primitive::kPrimVoid);
- if (instruction->IsIntConstant() || instruction->IsLongConstant()) {
- int64_t value = instruction->IsIntConstant() ? instruction->AsIntConstant()->GetValue()
- : instruction->AsLongConstant()->GetValue();
+ if (instruction->IsIntConstant()
+ || instruction->IsLongConstant()
+ || instruction->IsNullConstant()) {
+ int64_t value = GetInt64ValueOf(instruction->AsConstant());
if (location.IsRegister()) {
Register dst = RegisterFrom(location, type);
- DCHECK((instruction->IsIntConstant() && dst.Is32Bits()) ||
+ DCHECK(((instruction->IsIntConstant() || instruction->IsNullConstant()) && dst.Is32Bits()) ||
(instruction->IsLongConstant() && dst.Is64Bits()));
__ Mov(dst, value);
} else {
DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = instruction->IsIntConstant() ? temps.AcquireW() : temps.AcquireX();
+ Register temp = (instruction->IsIntConstant() || instruction->IsNullConstant())
+ ? temps.AcquireW()
+ : temps.AcquireX();
__ Mov(temp, value);
__ Str(temp, StackOperandFrom(location));
}
@@ -555,26 +563,38 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value) {
__ Bind(&done);
}
-void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
- // Block reserved registers:
- // ip0 (VIXL temporary)
- // ip1 (VIXL temporary)
- // tr
- // lr
- // sp is not part of the allocatable registers, so we don't need to block it.
- // TODO: Avoid blocking callee-saved registers, and instead preserve them
- // where necessary.
+void CodeGeneratorARM64::SetupBlockedRegisters(bool is_baseline) const {
+ // Blocked core registers:
+ // lr : Runtime reserved.
+ // tr : Runtime reserved.
+ // xSuspend : Runtime reserved. TODO: Unblock this when the runtime stops using it.
+ // ip1 : VIXL core temp.
+ // ip0 : VIXL core temp.
+ //
+ // Blocked fp registers:
+ // d31 : VIXL fp temp.
CPURegList reserved_core_registers = vixl_reserved_core_registers;
reserved_core_registers.Combine(runtime_reserved_core_registers);
- reserved_core_registers.Combine(quick_callee_saved_registers);
while (!reserved_core_registers.IsEmpty()) {
blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
}
+
CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
- reserved_fp_registers.Combine(CPURegList::GetCalleeSavedFP());
- while (!reserved_core_registers.IsEmpty()) {
+ while (!reserved_fp_registers.IsEmpty()) {
blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().code()] = true;
}
+
+ if (is_baseline) {
+ CPURegList reserved_core_baseline_registers = callee_saved_core_registers;
+ while (!reserved_core_baseline_registers.IsEmpty()) {
+ blocked_core_registers_[reserved_core_baseline_registers.PopLowestIndex().code()] = true;
+ }
+
+ CPURegList reserved_fp_baseline_registers = callee_saved_fp_registers;
+ while (!reserved_fp_baseline_registers.IsEmpty()) {
+ blocked_fpu_registers_[reserved_fp_baseline_registers.PopLowestIndex().code()] = true;
+ }
+ }
}
Location CodeGeneratorARM64::AllocateFreeRegister(Primitive::Type type) const {
@@ -626,10 +646,12 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg
}
void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
- if (constant->IsIntConstant() || constant->IsLongConstant()) {
- __ Mov(Register(destination),
- constant->IsIntConstant() ? constant->AsIntConstant()->GetValue()
- : constant->AsLongConstant()->GetValue());
+ if (constant->IsIntConstant()) {
+ __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
+ } else if (constant->IsLongConstant()) {
+ __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
+ } else if (constant->IsNullConstant()) {
+ __ Mov(Register(destination), 0);
} else if (constant->IsFloatConstant()) {
__ Fmov(FPRegister(destination), constant->AsFloatConstant()->GetValue());
} else {
@@ -643,6 +665,8 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
DCHECK(constant.IsConstant());
HConstant* cst = constant.GetConstant();
return (cst->IsIntConstant() && type == Primitive::kPrimInt) ||
+ // Null is mapped to a core W register, which we associate with kPrimInt.
+ (cst->IsNullConstant() && type == Primitive::kPrimInt) ||
(cst->IsLongConstant() && type == Primitive::kPrimLong) ||
(cst->IsFloatConstant() && type == Primitive::kPrimFloat) ||
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
@@ -663,7 +687,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
if (unspecified_type) {
HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
if (source.IsStackSlot() ||
- (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant()))) {
+ (src_cst != nullptr && (src_cst->IsIntConstant()
+ || src_cst->IsFloatConstant()
+ || src_cst->IsNullConstant()))) {
// For stack slots and 32bit constants, a 64bit type is appropriate.
type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat;
} else {
@@ -709,7 +735,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Pri
UseScratchRegisterScope temps(GetVIXLAssembler());
HConstant* src_cst = source.GetConstant();
CPURegister temp;
- if (src_cst->IsIntConstant()) {
+ if (src_cst->IsIntConstant() || src_cst->IsNullConstant()) {
temp = temps.AcquireW();
} else if (src_cst->IsLongConstant()) {
temp = temps.AcquireX();
@@ -947,6 +973,7 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type,
}
void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) {
+ DCHECK(RequiresCurrentMethod());
DCHECK(current_method.IsW());
__ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset));
}
@@ -1370,7 +1397,13 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ HInstruction* right = compare->InputAt(1);
+ if ((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+ (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0))) {
+ locations->SetInAt(1, Location::ConstantLocation(right->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ }
locations->SetOut(Location::RequiresRegister());
break;
}
@@ -1400,9 +1433,17 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
case Primitive::kPrimDouble: {
Register result = OutputRegister(compare);
FPRegister left = InputFPRegisterAt(compare, 0);
- FPRegister right = InputFPRegisterAt(compare, 1);
-
- __ Fcmp(left, right);
+ if (compare->GetLocations()->InAt(1).IsConstant()) {
+ if (kIsDebugBuild) {
+ HInstruction* right = compare->GetLocations()->InAt(1).GetConstant();
+ DCHECK((right->IsFloatConstant() && (right->AsFloatConstant()->GetValue() == 0.0f)) ||
+ (right->IsDoubleConstant() && (right->AsDoubleConstant()->GetValue() == 0.0)));
+ }
+ // 0.0 is the only immediate that can be encoded directly in a FCMP instruction.
+ __ Fcmp(left, 0.0);
+ } else {
+ __ Fcmp(left, InputFPRegisterAt(compare, 1));
+ }
if (compare->IsGtBias()) {
__ Cset(result, ne);
} else {
@@ -1752,6 +1793,16 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(invoke, LocationSummary::kCall);
@@ -2545,6 +2596,18 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
HandleBinaryOp(instruction);
}
+void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
#undef __
#undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 9a99dcccea..afb7fc3718 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -50,14 +50,24 @@ static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegi
const vixl::Register tr = vixl::x18; // Thread Register
static const vixl::Register kArtMethodRegister = vixl::w0; // Method register on invoke.
+const vixl::Register kQuickSuspendRegister = vixl::x19;
const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
-const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
-const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
- vixl::kXRegSize,
- kArm64CalleeSaveRefSpills);
+// TODO: When the runtime does not use kQuickSuspendRegister as a suspend
+// counter remove it from the reserved registers list.
+const vixl::CPURegList runtime_reserved_core_registers(tr, kQuickSuspendRegister, vixl::lr);
+
+// Callee-saved registers defined by AAPCS64.
+const vixl::CPURegList callee_saved_core_registers(vixl::CPURegister::kRegister,
+ vixl::kXRegSize,
+ vixl::x19.code(),
+ vixl::x30.code());
+const vixl::CPURegList callee_saved_fp_registers(vixl::CPURegister::kFPRegister,
+ vixl::kDRegSize,
+ vixl::d8.code(),
+ vixl::d15.code());
Location ARM64ReturnLocation(Primitive::Type return_type);
class SlowPathCodeARM64 : public SlowPathCode {
@@ -191,16 +201,20 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
- static const vixl::CPURegList& GetFramePreservedRegisters() {
- static const vixl::CPURegList frame_preserved_regs =
- vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit());
- return frame_preserved_regs;
+ vixl::CPURegList GetFramePreservedCoreRegisters() const {
+ return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
+ core_spill_mask_);
+ }
+
+ vixl::CPURegList GetFramePreservedFPRegisters() const {
+ return vixl::CPURegList(vixl::CPURegister::kFPRegister, vixl::kDRegSize,
+ fpu_spill_mask_);
}
void Bind(HBasicBlock* block) OVERRIDE;
vixl::Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_ + block->GetBlockId();
+ return CommonGetLabelOf<vixl::Label>(block_labels_, block);
}
void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 98f93a418a..1101569174 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -470,12 +470,16 @@ void CodeGeneratorX86::GenerateFrameEntry() {
RecordPcInfo(nullptr, 0);
}
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
- __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ if (!HasEmptyFrame()) {
+ __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ }
}
void CodeGeneratorX86::GenerateFrameExit() {
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ if (!HasEmptyFrame()) {
+ __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ }
}
void CodeGeneratorX86::Bind(HBasicBlock* block) {
@@ -483,6 +487,7 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) {
}
void CodeGeneratorX86::LoadCurrentMethod(Register reg) {
+ DCHECK(RequiresCurrentMethod());
__ movl(reg, Address(ESP, kCurrentMethodStackOffset));
}
@@ -597,13 +602,7 @@ void CodeGeneratorX86::Move32(Location destination, Location source) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int32_t value;
- if (constant->IsIntConstant()) {
- value = constant->AsIntConstant()->GetValue();
- } else {
- DCHECK(constant->IsFloatConstant());
- value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
- }
+ int32_t value = GetInt32ValueOf(constant);
__ movl(Address(ESP, destination.GetStackIndex()), Immediate(value));
} else {
DCHECK(source.IsStackSlot());
@@ -669,8 +668,8 @@ void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstr
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- Immediate imm(const_to_move->AsIntConstant()->GetValue());
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ Immediate imm(GetInt32ValueOf(const_to_move));
if (location.IsRegister()) {
__ movl(location.AsRegister<Register>(), imm);
} else if (location.IsStackSlot()) {
@@ -920,7 +919,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* comp) {
locations->InAt(1).AsRegister<Register>());
} else if (locations->InAt(1).IsConstant()) {
HConstant* instruction = locations->InAt(1).GetConstant();
- Immediate imm(instruction->AsIntConstant()->GetValue());
+ Immediate imm(CodeGenerator::GetInt32ValueOf(instruction));
__ cmpl(locations->InAt(0).AsRegister<Register>(), imm);
} else {
__ cmpl(locations->InAt(0).AsRegister<Register>(),
@@ -989,6 +988,17 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1799,7 +1809,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::Any());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -1843,8 +1853,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
case Primitive::kPrimFloat: {
if (second.IsFpuRegister()) {
__ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
- } else {
- __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
}
break;
}
@@ -1852,8 +1860,6 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble: {
if (second.IsFpuRegister()) {
__ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
- } else {
- __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex()));
}
break;
}
@@ -3495,8 +3501,8 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- Immediate imm(constant->AsIntConstant()->GetValue());
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ Immediate imm(CodeGenerator::GetInt32ValueOf(constant));
if (destination.IsRegister()) {
__ movl(destination.AsRegister<Register>(), imm);
} else {
@@ -3904,5 +3910,17 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void LocationsBuilderX86::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace x86
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 107ddafea4..f5a9b7d1f7 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -234,7 +234,7 @@ class CodeGeneratorX86 : public CodeGenerator {
void LoadCurrentMethod(Register reg);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ff53a0603..41a19e11f0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -487,6 +487,10 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
RecordPcInfo(nullptr, 0);
}
+ if (HasEmptyFrame()) {
+ return;
+ }
+
for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
@@ -509,6 +513,9 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
}
void CodeGeneratorX86_64::GenerateFrameExit() {
+ if (HasEmptyFrame()) {
+ return;
+ }
uint32_t xmm_spill_location = GetFpuSpillStart();
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
@@ -533,6 +540,7 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
}
void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) {
+ DCHECK(RequiresCurrentMethod());
__ movl(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
}
@@ -599,13 +607,7 @@ void CodeGeneratorX86_64::Move(Location destination, Location source) {
source.AsFpuRegister<XmmRegister>());
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- int32_t value;
- if (constant->IsFloatConstant()) {
- value = bit_cast<float, int32_t>(constant->AsFloatConstant()->GetValue());
- } else {
- DCHECK(constant->IsIntConstant());
- value = constant->AsIntConstant()->GetValue();
- }
+ int32_t value = GetInt32ValueOf(constant);
__ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
} else {
DCHECK(source.IsStackSlot()) << source;
@@ -649,8 +651,8 @@ void CodeGeneratorX86_64::Move(HInstruction* instruction,
if (locations != nullptr && locations->Out().IsConstant()) {
HConstant* const_to_move = locations->Out().GetConstant();
- if (const_to_move->IsIntConstant()) {
- Immediate imm(const_to_move->AsIntConstant()->GetValue());
+ if (const_to_move->IsIntConstant() || const_to_move->IsNullConstant()) {
+ Immediate imm(GetInt32ValueOf(const_to_move));
if (location.IsRegister()) {
__ movl(location.AsRegister<CpuRegister>(), imm);
} else if (location.IsStackSlot()) {
@@ -790,7 +792,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
// Materialized condition, compare against 0.
Location lhs = if_instr->GetLocations()->InAt(0);
if (lhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(0));
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
} else {
__ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
Immediate(0));
@@ -806,8 +808,12 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
if (rhs.IsRegister()) {
__ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
} else if (rhs.IsConstant()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(),
- Immediate(rhs.GetConstant()->AsIntConstant()->GetValue()));
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+ }
} else {
__ cmpl(lhs.AsRegister<CpuRegister>(),
Address(CpuRegister(RSP), rhs.GetStackIndex()));
@@ -883,15 +889,19 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* comp) {
CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
// Clear register: setcc only sets the low byte.
__ xorq(reg, reg);
- if (locations->InAt(1).IsRegister()) {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- locations->InAt(1).AsRegister<CpuRegister>());
- } else if (locations->InAt(1).IsConstant()) {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- Immediate(locations->InAt(1).GetConstant()->AsIntConstant()->GetValue()));
+ Location lhs = locations->InAt(0);
+ Location rhs = locations->InAt(1);
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = rhs.GetConstant()->AsIntConstant()->GetValue();
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
+ }
} else {
- __ cmpl(locations->InAt(0).AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), locations->InAt(1).GetStackIndex()));
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex()));
}
__ setcc(X86_64Condition(comp->GetCondition()), reg);
}
@@ -1018,6 +1028,17 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant) {
UNUSED(constant);
}
+void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+ locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant) {
+ // Will be generated at use site.
+ UNUSED(constant);
+}
+
void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -1840,8 +1861,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
switch (add->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -1869,16 +1890,27 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
LocationSummary* locations = add->GetLocations();
Location first = locations->InAt(0);
Location second = locations->InAt(1);
- DCHECK(first.Equals(locations->Out()));
+ Location out = locations->Out();
switch (add->GetResultType()) {
case Primitive::kPrimInt: {
if (second.IsRegister()) {
- __ addl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+ __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+ } else {
+ __ leal(out.AsRegister<CpuRegister>(), Address(
+ first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
+ }
} else if (second.IsConstant()) {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
- __ addl(first.AsRegister<CpuRegister>(), imm);
+ if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
+ __ addl(out.AsRegister<CpuRegister>(),
+ Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
+ } else {
+ __ leal(out.AsRegister<CpuRegister>(), Address(
+ first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
+ }
} else {
+ DCHECK(first.Equals(locations->Out()));
__ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
}
break;
@@ -2754,7 +2786,7 @@ void InstructionCodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instr
Location obj = locations->InAt(0);
if (obj.IsRegister()) {
- __ cmpl(obj.AsRegister<CpuRegister>(), Immediate(0));
+ __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
} else if (obj.IsStackSlot()) {
__ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
} else {
@@ -3236,13 +3268,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
}
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
- if (constant->IsIntConstant()) {
- Immediate imm(constant->AsIntConstant()->GetValue());
+ if (constant->IsIntConstant() || constant->IsNullConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(constant);
if (destination.IsRegister()) {
- __ movl(destination.AsRegister<CpuRegister>(), imm);
+ if (value == 0) {
+ __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
+ } else {
+ __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
+ }
} else {
DCHECK(destination.IsStackSlot()) << destination;
- __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
+ __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
}
} else if (constant->IsLongConstant()) {
int64_t value = constant->AsLongConstant()->GetValue();
@@ -3675,5 +3711,17 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
}
}
+void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
+ // Nothing to do, this should be removed during prepare for register allocator.
+ UNUSED(instruction);
+ LOG(FATAL) << "Unreachable";
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dbdbf869db..707c9992c0 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -232,7 +232,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void LoadCurrentMethod(CpuRegister reg);
Label* GetLabelOf(HBasicBlock* block) const {
- return block_labels_.GetRawStorage() + block->GetBlockId();
+ return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block);
}
void Initialize() OVERRIDE {
diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc
index b246c6f98d..7623e421fd 100644
--- a/compiler/optimizing/dominator_test.cc
+++ b/compiler/optimizing/dominator_test.cc
@@ -14,11 +14,11 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "dex_instruction.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc
index e05d9b3b0f..2bfecc696a 100644
--- a/compiler/optimizing/find_loops_test.cc
+++ b/compiler/optimizing/find_loops_test.cc
@@ -14,13 +14,13 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "pretty_printer.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 4ebb1363cc..a7f1f74e27 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -160,6 +160,22 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
instruction->GetId()));
}
}
+
+ // Ensure 'instruction' has pointers to its inputs' use entries.
+ for (size_t i = 0, e = instruction->InputCount(); i < e; ++i) {
+ HUserRecord<HInstruction*> input_record = instruction->InputRecordAt(i);
+ HInstruction* input = input_record.GetInstruction();
+ HUseListNode<HInstruction*>* use_node = input_record.GetUseNode();
+ if (use_node == nullptr || !input->GetUses().Contains(use_node)) {
+ AddError(StringPrintf("Instruction %s:%d has an invalid pointer to use entry "
+ "at input %u (%s:%d).",
+ instruction->DebugName(),
+ instruction->GetId(),
+ static_cast<unsigned>(i),
+ input->DebugName(),
+ input->GetId()));
+ }
+ }
}
void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
@@ -285,6 +301,19 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
}
}
+static Primitive::Type PrimitiveKind(Primitive::Type type) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimInt:
+ return Primitive::kPrimInt;
+ default:
+ return type;
+ }
+}
+
void SSAChecker::VisitPhi(HPhi* phi) {
VisitInstruction(phi);
@@ -321,18 +350,17 @@ void SSAChecker::VisitPhi(HPhi* phi) {
}
}
}
-}
-
-static Primitive::Type PrimitiveKind(Primitive::Type type) {
- switch (type) {
- case Primitive::kPrimBoolean:
- case Primitive::kPrimByte:
- case Primitive::kPrimShort:
- case Primitive::kPrimChar:
- case Primitive::kPrimInt:
- return Primitive::kPrimInt;
- default:
- return type;
+ // Ensure that the inputs have the same primitive kind as the phi.
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ HInstruction* input = phi->InputAt(i);
+ if (PrimitiveKind(input->GetType()) != PrimitiveKind(phi->GetType())) {
+ AddError(StringPrintf(
+ "Input %d at index %zu of phi %d from block %d does not have the "
+ "same type as the phi: %s versus %s",
+ input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(),
+ Primitive::PrettyDescriptor(input->GetType()),
+ Primitive::PrettyDescriptor(phi->GetType())));
+ }
}
}
diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc
index c59f8366fa..4742e4d073 100644
--- a/compiler/optimizing/graph_test.cc
+++ b/compiler/optimizing/graph_test.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 835bca688f..c59273753e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -184,6 +184,10 @@ class HGraphVisualizerPrinter : public HGraphVisitor {
output_ << " " << instruction->GetValue();
}
+ void VisitPhi(HPhi* phi) OVERRIDE {
+ output_ << " " << phi->GetRegNumber();
+ }
+
void PrintInstruction(HInstruction* instruction) {
output_ << instruction->DebugName();
instruction->Accept(this);
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 89bba2d9f6..cb448c883f 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -270,7 +270,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
set = new (allocator_) ValueSet(allocator_);
} else {
HBasicBlock* dominator = block->GetDominator();
- set = sets_.Get(dominator->GetBlockId())->Copy();
+ set = sets_.Get(dominator->GetBlockId());
if (dominator->GetSuccessors().Size() != 1 || dominator->GetSuccessors().Get(0) != block) {
// We have to copy if the dominator has other successors, or `block` is not a successor
// of the dominator.
diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc
index 4a48fee2fb..a81d49aa0c 100644
--- a/compiler/optimizing/gvn_test.cc
+++ b/compiler/optimizing/gvn_test.cc
@@ -14,12 +14,12 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "gvn.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "side_effects_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 32f6972c84..d55a3ca00b 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -159,7 +159,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
SsaDeadPhiElimination dead_phi(callee_graph);
HDeadCodeElimination dce(callee_graph);
HConstantFolding fold(callee_graph);
- InstructionSimplifier simplify(callee_graph);
+ InstructionSimplifier simplify(callee_graph, stats_);
HOptimization* optimizations[] = {
&redundant_phi,
@@ -176,7 +176,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
if (depth_ + 1 < kDepthLimit) {
HInliner inliner(
- callee_graph, outer_compilation_unit_, compiler_driver_, outer_stats_, depth_ + 1);
+ callee_graph, outer_compilation_unit_, compiler_driver_, stats_, depth_ + 1);
inliner.Run();
}
@@ -221,7 +221,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction,
// after optimizations get a unique id.
graph_->SetCurrentInstructionId(callee_graph->GetNextInstructionId());
VLOG(compiler) << "Successfully inlined " << PrettyMethod(method_index, outer_dex_file);
- outer_stats_->RecordStat(kInlinedInvoke);
+ MaybeRecordStat(kInlinedInvoke);
return true;
}
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 07d893e7c9..8e9cf837df 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -35,10 +35,9 @@ class HInliner : public HOptimization {
CompilerDriver* compiler_driver,
OptimizingCompilerStats* stats,
size_t depth = 0)
- : HOptimization(outer_graph, true, "inliner"),
+ : HOptimization(outer_graph, true, "inliner", stats),
outer_compilation_unit_(outer_compilation_unit),
compiler_driver_(compiler_driver),
- outer_stats_(stats),
depth_(depth) {}
void Run() OVERRIDE;
@@ -48,7 +47,6 @@ class HInliner : public HOptimization {
const DexCompilationUnit& outer_compilation_unit_;
CompilerDriver* const compiler_driver_;
- OptimizingCompilerStats* const outer_stats_;
const size_t depth_;
DISALLOW_COPY_AND_ASSIGN(HInliner);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17c8f337ca..fd99070780 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -16,11 +16,15 @@
#include "instruction_simplifier.h"
+#include "mirror/class-inl.h"
+#include "scoped_thread_state_change.h"
+
namespace art {
class InstructionSimplifierVisitor : public HGraphVisitor {
public:
- explicit InstructionSimplifierVisitor(HGraph* graph) : HGraphVisitor(graph) {}
+ InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph), stats_(stats) {}
private:
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -28,10 +32,14 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitArraySet(HArraySet* equal) OVERRIDE;
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
+ void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
+
+ OptimizingCompilerStats* stats_;
};
void InstructionSimplifier::Run() {
- InstructionSimplifierVisitor visitor(graph_);
+ InstructionSimplifierVisitor visitor(graph_, stats_);
visitor.VisitInsertionOrder();
}
@@ -40,6 +48,28 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) {
if (!obj->CanBeNull()) {
null_check->ReplaceWith(obj);
null_check->GetBlock()->RemoveInstruction(null_check);
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kRemovedNullCheck);
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
+ HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
+ if (!load_class->IsResolved()) {
+ // If the class couldn't be resolve it's not safe to compare against it. It's
+ // default type would be Top which might be wider that the actual class type
+ // and thus producing wrong results.
+ return;
+ }
+ ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo();
+ ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ ScopedObjectAccess soa(Thread::Current());
+ if (class_rti.IsSupertypeOf(obj_rti)) {
+ check_cast->GetBlock()->RemoveInstruction(check_cast);
+ if (stats_ != nullptr) {
+ stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast);
+ }
}
}
@@ -75,6 +105,18 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
}
}
+void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) {
+ HInstruction* input = instruction->InputAt(0);
+ // If the array is a NewArray with constant size, replace the array length
+ // with the constant instruction. This helps the bounds check elimination phase.
+ if (input->IsNewArray()) {
+ input = input->InputAt(0);
+ if (input->IsIntConstant()) {
+ instruction->ReplaceWith(input);
+ }
+ }
+}
+
void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) {
HInstruction* value = instruction->GetValue();
if (value->GetType() != Primitive::kPrimNot) return;
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index bca6697d05..a7ff755aed 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -19,6 +19,7 @@
#include "nodes.h"
#include "optimization.h"
+#include "optimizing_compiler_stats.h"
namespace art {
@@ -27,8 +28,10 @@ namespace art {
*/
class InstructionSimplifier : public HOptimization {
public:
- explicit InstructionSimplifier(HGraph* graph, const char* name = "instruction_simplifier")
- : HOptimization(graph, true, name) {}
+ InstructionSimplifier(HGraph* graph,
+ OptimizingCompilerStats* stats = nullptr,
+ const char* name = "instruction_simplifier")
+ : HOptimization(graph, true, name, stats) {}
void Run() OVERRIDE;
};
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
new file mode 100644
index 0000000000..a82d80af13
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -0,0 +1,883 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "intrinsics.h"
+#include "mirror/array-inl.h"
+#include "mirror/art_method.h"
+#include "mirror/string.h"
+#include "thread.h"
+#include "utils/arm/assembler_arm.h"
+
+namespace art {
+
+namespace arm {
+
+ArmAssembler* IntrinsicCodeGeneratorARM::GetAssembler() {
+ return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() {
+ return codegen_->GetGraph()->GetArena();
+}
+
+#define __ codegen->GetAssembler()->
+
+static void MoveFromReturnRegister(Location trg, Primitive::Type type, CodeGeneratorARM* codegen) {
+ if (!trg.IsValid()) {
+ DCHECK(type == Primitive::kPrimVoid);
+ return;
+ }
+
+ DCHECK_NE(type, Primitive::kPrimVoid);
+
+ if (Primitive::IsIntegralType(type)) {
+ if (type == Primitive::kPrimLong) {
+ Register trg_reg_lo = trg.AsRegisterPairLow<Register>();
+ Register trg_reg_hi = trg.AsRegisterPairHigh<Register>();
+ Register res_reg_lo = R0;
+ Register res_reg_hi = R1;
+ if (trg_reg_lo != res_reg_hi) {
+ if (trg_reg_lo != res_reg_lo) {
+ __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+ __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+ } else {
+ DCHECK_EQ(trg_reg_lo + 1, trg_reg_hi);
+ }
+ } else {
+ __ mov(trg_reg_hi, ShifterOperand(res_reg_hi));
+ __ mov(trg_reg_lo, ShifterOperand(res_reg_lo));
+ }
+ } else {
+ Register trg_reg = trg.AsRegister<Register>();
+ Register res_reg = R0;
+ if (trg_reg != res_reg) {
+ __ mov(trg_reg, ShifterOperand(res_reg));
+ }
+ }
+ } else {
+ UNIMPLEMENTED(FATAL) << "Floating-point return.";
+ }
+}
+
+static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorARM* codegen) {
+ if (invoke->InputCount() == 0) {
+ return;
+ }
+
+ LocationSummary* locations = invoke->GetLocations();
+ InvokeDexCallingConventionVisitor calling_convention_visitor;
+
+ // We're moving potentially two or more locations to locations that could overlap, so we need
+ // a parallel move resolver.
+ HParallelMove parallel_move(arena);
+
+ for (size_t i = 0; i < invoke->InputCount(); i++) {
+ HInstruction* input = invoke->InputAt(i);
+ Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
+ Location actual_loc = locations->InAt(i);
+
+ parallel_move.AddMove(actual_loc, cc_loc, nullptr);
+ }
+
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+}
+
+// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
+// call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+// summary. If an intrinsic modifies those locations before a slowpath call, they must be
+// restored!
+class IntrinsicSlowPathARM : public SlowPathCodeARM {
+ public:
+ explicit IntrinsicSlowPathARM(HInvoke* invoke) : invoke_(invoke) { }
+
+ void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ CodeGeneratorARM* codegen = down_cast<CodeGeneratorARM*>(codegen_in);
+ __ Bind(GetEntryLabel());
+
+ codegen->SaveLiveRegisters(invoke_->GetLocations());
+
+ MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
+
+ if (invoke_->IsInvokeStaticOrDirect()) {
+ codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister);
+ } else {
+ UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
+ UNREACHABLE();
+ }
+
+ // Copy the result back to the expected output.
+ Location out = invoke_->GetLocations()->Out();
+ if (out.IsValid()) {
+ DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
+ DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+ MoveFromReturnRegister(out, invoke_->GetType(), codegen);
+ }
+
+ codegen->RestoreLiveRegisters(invoke_->GetLocations());
+ __ b(GetExitLabel());
+ }
+
+ private:
+ // The instruction where this slow path is happening.
+ HInvoke* const invoke_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM);
+};
+
+#undef __
+
+bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
+ Dispatch(invoke);
+ LocationSummary* res = invoke->GetLocations();
+ return res != nullptr && res->Intrinsified();
+}
+
+#define __ assembler->
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ __ vmovrrd(output.AsRegisterPairLow<Register>(),
+ output.AsRegisterPairHigh<Register>(),
+ FromLowSToD(input.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ __ vmovrs(output.AsRegister<Register>(), input.AsFpuRegister<SRegister>());
+ }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location input = locations->InAt(0);
+ Location output = locations->Out();
+ if (is64bit) {
+ __ vmovdrr(FromLowSToD(output.AsFpuRegisterPairLow<SRegister>()),
+ input.AsRegisterPairLow<Register>(),
+ input.AsRegisterPairHigh<Register>());
+ } else {
+ __ vmovsr(output.AsFpuRegister<SRegister>(), input.AsRegister<Register>());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARM::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
+ Location in = locations->InAt(0);
+ Location out = locations->Out();
+
+ if (is64bit) {
+ __ vabsd(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(in.AsFpuRegisterPairLow<SRegister>()));
+ } else {
+ __ vabss(out.AsFpuRegister<SRegister>(), in.AsFpuRegister<SRegister>());
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsDouble(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsDouble(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsFloat(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsFloat(HInvoke* invoke) {
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+ bool is64bit,
+ ArmAssembler* assembler) {
+ Location in = locations->InAt(0);
+ Location output = locations->Out();
+
+ Register mask = locations->GetTemp(0).AsRegister<Register>();
+
+ if (is64bit) {
+ Register in_reg_lo = in.AsRegisterPairLow<Register>();
+ Register in_reg_hi = in.AsRegisterPairHigh<Register>();
+ Register out_reg_lo = output.AsRegisterPairLow<Register>();
+ Register out_reg_hi = output.AsRegisterPairHigh<Register>();
+
+ DCHECK_NE(out_reg_lo, in_reg_hi) << "Diagonal overlap unexpected.";
+
+ __ Asr(mask, in_reg_hi, 31);
+ __ adds(out_reg_lo, in_reg_lo, ShifterOperand(mask));
+ __ adc(out_reg_hi, in_reg_hi, ShifterOperand(mask));
+ __ eor(out_reg_lo, mask, ShifterOperand(out_reg_lo));
+ __ eor(out_reg_hi, mask, ShifterOperand(out_reg_hi));
+ } else {
+ Register in_reg = in.AsRegister<Register>();
+ Register out_reg = output.AsRegister<Register>();
+
+ __ Asr(mask, in_reg, 31);
+ __ add(out_reg, in_reg, ShifterOperand(mask));
+ __ eor(out_reg, mask, ShifterOperand(out_reg));
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsInt(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsInt(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARM::VisitMathAbsLong(HInvoke* invoke) {
+ CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathAbsLong(HInvoke* invoke) {
+ GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
+}
+
+static void GenMinMax(LocationSummary* locations,
+ bool is_min,
+ ArmAssembler* assembler) {
+ Register op1 = locations->InAt(0).AsRegister<Register>();
+ Register op2 = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ __ cmp(op1, ShifterOperand(op2));
+
+ __ it((is_min) ? Condition::LT : Condition::GT, kItElse);
+ __ mov(out, ShifterOperand(op1), is_min ? Condition::LT : Condition::GT);
+ __ mov(out, ShifterOperand(op2), is_min ? Condition::GE : Condition::LE);
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMinIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMinIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathMaxIntInt(HInvoke* invoke) {
+ CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathMaxIntInt(HInvoke* invoke) {
+ GenMinMax(invoke->GetLocations(), false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMathSqrt(HInvoke* invoke) {
+ CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMathSqrt(HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ ArmAssembler* assembler = GetAssembler();
+ __ vsqrtd(FromLowSToD(locations->Out().AsFpuRegisterPairLow<SRegister>()),
+ FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekByte(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekByte(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldrsb(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekIntNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldr(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekLongNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+ // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+ // exception. So we can't use ldrd as addr may be unaligned.
+ Register lo = invoke->GetLocations()->Out().AsRegisterPairLow<Register>();
+ Register hi = invoke->GetLocations()->Out().AsRegisterPairHigh<Register>();
+ if (addr == lo) {
+ __ ldr(hi, Address(addr, 4));
+ __ ldr(lo, Address(addr, 0));
+ } else {
+ __ ldr(lo, Address(addr, 0));
+ __ ldr(hi, Address(addr, 4));
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPeekShortNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ __ ldrsh(invoke->GetLocations()->Out().AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeByte(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeByte(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ strb(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeIntNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ str(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeLongNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ // Ignore upper 4B of long address.
+ Register addr = invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>();
+ // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+ // exception. So we can't use ldrd as addr may be unaligned.
+ __ str(invoke->GetLocations()->InAt(1).AsRegisterPairLow<Register>(), Address(addr, 0));
+ __ str(invoke->GetLocations()->InAt(1).AsRegisterPairHigh<Register>(), Address(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitMemoryPokeShortNative(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ strh(invoke->GetLocations()->InAt(1).AsRegister<Register>(),
+ Address(invoke->GetLocations()->InAt(0).AsRegisterPairLow<Register>()));
+}
+
+void IntrinsicLocationsBuilderARM::VisitThreadCurrentThread(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitThreadCurrentThread(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ __ LoadFromOffset(kLoadWord,
+ invoke->GetLocations()->Out().AsRegister<Register>(),
+ TR,
+ Thread::PeerOffset<kArmPointerSize>().Int32Value());
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+ Primitive::Type type,
+ bool is_volatile,
+ CodeGeneratorARM* codegen) {
+ LocationSummary* locations = invoke->GetLocations();
+ DCHECK((type == Primitive::kPrimInt) ||
+ (type == Primitive::kPrimLong) ||
+ (type == Primitive::kPrimNot));
+ ArmAssembler* assembler = codegen->GetAssembler();
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
+
+ if (type == Primitive::kPrimLong) {
+ Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+ __ add(IP, base, ShifterOperand(offset));
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+ __ ldrexd(trg_lo, trg_hi, IP);
+ } else {
+ __ ldrd(trg_lo, Address(IP));
+ }
+ } else {
+ Register trg = locations->Out().AsRegister<Register>();
+ __ ldr(trg, Address(base, offset));
+ }
+
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLong(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObject(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+ GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+ const ArmInstructionSetFeatures& features,
+ Primitive::Type type,
+ bool is_volatile,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+
+ if (type == Primitive::kPrimLong) {
+ // Potentially need temps for ldrexd-strexd loop.
+ if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+ locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
+ locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
+ }
+ } else if (type == Primitive::kPrimNot) {
+ // Temps for card-marking.
+ locations->AddTemp(Location::RequiresRegister()); // Temp.
+ locations->AddTemp(Location::RequiresRegister()); // Card.
+ }
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafePut(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObject(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, true, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLong(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, false, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimLong, true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+ Primitive::Type type,
+ bool is_volatile,
+ bool is_ordered,
+ CodeGeneratorARM* codegen) {
+ ArmAssembler* assembler = codegen->GetAssembler();
+
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
+ Register value;
+
+ if (is_volatile || is_ordered) {
+ __ dmb(ISH);
+ }
+
+ if (type == Primitive::kPrimLong) {
+ Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
+ value = value_lo;
+ if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+ Register temp_lo = locations->GetTemp(0).AsRegister<Register>();
+ Register temp_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
+
+ __ add(IP, base, ShifterOperand(offset));
+ Label loop_head;
+ __ Bind(&loop_head);
+ __ ldrexd(temp_lo, temp_hi, IP);
+ __ strexd(temp_lo, value_lo, value_hi, IP);
+ __ cmp(temp_lo, ShifterOperand(0));
+ __ b(&loop_head, NE);
+ } else {
+ __ add(IP, base, ShifterOperand(offset));
+ __ strd(value_lo, Address(IP));
+ }
+ } else {
+ value = locations->InAt(3).AsRegister<Register>();
+ __ str(value, Address(base, offset));
+ }
+
+ if (is_volatile) {
+ __ dmb(ISH);
+ }
+
+ if (type == Primitive::kPrimNot) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register card = locations->GetTemp(1).AsRegister<Register>();
+ codegen->MarkGCCard(temp, card, base, value);
+ }
+}
+
+void IntrinsicCodeGeneratorARM::VisitUnsafePut(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObject(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLong(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, false, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, true, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+ GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, false, codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+ HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->SetInAt(4, Location::RequiresRegister());
+
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister()); // Pointer.
+ locations->AddTemp(Location::RequiresRegister()); // Temp 1.
+ locations->AddTemp(Location::RequiresRegister()); // Temp 2.
+}
+
+static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+ DCHECK_NE(type, Primitive::kPrimLong);
+
+ ArmAssembler* assembler = codegen->GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>(); // Boolean result.
+
+ Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
+ Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Offset (discard high 4B).
+ Register expected_lo = locations->InAt(3).AsRegister<Register>(); // Expected.
+ Register value_lo = locations->InAt(4).AsRegister<Register>(); // Value.
+
+ Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>(); // Pointer to actual memory.
+ Register tmp_lo = locations->GetTemp(1).AsRegister<Register>(); // Value in memory.
+
+ if (type == Primitive::kPrimNot) {
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
+ codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo);
+ }
+
+ // Prevent reordering with prior memory operations.
+ __ dmb(ISH);
+
+ __ add(tmp_ptr, base, ShifterOperand(offset));
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+ // result = tmp != 0;
+
+ Label loop_head;
+ __ Bind(&loop_head);
+
+ __ ldrex(tmp_lo, tmp_ptr);
+
+ __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+
+ __ it(EQ, ItState::kItT);
+ __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
+ __ cmp(tmp_lo, ShifterOperand(1), EQ);
+
+ __ b(&loop_head, EQ);
+
+ __ dmb(ISH);
+
+ __ rsbs(out, tmp_lo, ShifterOperand(1));
+ __ it(CC);
+ __ mov(out, ShifterOperand(0), CC);
+}
+
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke ATTRIBUTE_UNUSED) {
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke ATTRIBUTE_UNUSED) {
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
+ GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
+ GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARM::VisitStringCharAt(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARM::VisitStringCharAt(HInvoke* invoke) {
+ ArmAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ // Location of reference to data array
+ const MemberOffset value_offset = mirror::String::ValueOffset();
+ // Location of count
+ const MemberOffset count_offset = mirror::String::CountOffset();
+ // Starting offset within data array
+ const MemberOffset offset_offset = mirror::String::OffsetOffset();
+ // Start of char data with array_
+ const MemberOffset data_offset = mirror::Array::DataOffset(sizeof(uint16_t));
+
+ Register obj = locations->InAt(0).AsRegister<Register>(); // String object pointer.
+ Register idx = locations->InAt(1).AsRegister<Register>(); // Index of character.
+ Register out = locations->Out().AsRegister<Register>(); // Result character.
+
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register array_temp = locations->GetTemp(1).AsRegister<Register>();
+
+ // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
+ // the cost.
+ // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
+ // we will not optimize the code for constants (which would save a register).
+
+ SlowPathCodeARM* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ __ ldr(temp, Address(obj, count_offset.Int32Value())); // temp = str.length.
+ codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ cmp(idx, ShifterOperand(temp));
+ __ b(slow_path->GetEntryLabel(), CS);
+
+ // Index computation.
+ __ ldr(temp, Address(obj, offset_offset.Int32Value())); // temp := str.offset.
+ __ ldr(array_temp, Address(obj, value_offset.Int32Value())); // array_temp := str.offset.
+ __ add(temp, temp, ShifterOperand(idx));
+ DCHECK_EQ(data_offset.Int32Value() % 2, 0); // We'll compensate by shifting.
+ __ add(temp, temp, ShifterOperand(data_offset.Int32Value() / 2));
+
+ // Load the value.
+ __ ldrh(out, Address(array_temp, temp, LSL, 1)); // out := array_temp[temp].
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+// Unimplemented intrinsics.
+
+#define UNIMPLEMENTED_INTRINSIC(Name) \
+void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+} \
+void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
+}
+
+UNIMPLEMENTED_INTRINSIC(IntegerReverse)
+UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
+UNIMPLEMENTED_INTRINSIC(LongReverse)
+UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
+UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
+UNIMPLEMENTED_INTRINSIC(MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(MathCeil) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathFloor) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRint)
+UNIMPLEMENTED_INTRINSIC(MathRoundDouble) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(MathRoundFloat) // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) // High register pressure.
+UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(StringCompareTo)
+UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
+UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
+UNIMPLEMENTED_INTRINSIC(StringIndexOf)
+UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
+UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
new file mode 100644
index 0000000000..8bfb7d4686
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
+
+#include "intrinsics.h"
+
+namespace art {
+
+class ArenaAllocator;
+class ArmInstructionSetFeatures;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace arm {
+
+class ArmAssembler;
+class CodeGeneratorARM;
+
+class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicLocationsBuilderARM(ArenaAllocator* arena,
+ const ArmInstructionSetFeatures& features)
+ : arena_(arena), features_(features) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+ // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+ // the invoke.
+ bool TryDispatch(HInvoke* invoke);
+
+ private:
+ ArenaAllocator* arena_;
+
+ const ArmInstructionSetFeatures& features_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM);
+};
+
+class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor {
+ public:
+ explicit IntrinsicCodeGeneratorARM(CodeGeneratorARM* codegen) : codegen_(codegen) {}
+
+ // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic) \
+ void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+ ArmAssembler* GetAssembler();
+
+ ArenaAllocator* GetAllocator();
+
+ CodeGeneratorARM* codegen_;
+
+ DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM);
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_H_
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 7a3d7d8389..8874edc341 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -300,7 +300,6 @@ void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) {
}
static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
- // We only support FP registers here.
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -924,7 +923,6 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitStringCharAt(HInvoke* invoke) {
- // The inputs plus one temp.
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kCallOnSlowPath,
kIntrinsified);
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index 10f24d8148..bf9b8e59c5 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -66,8 +66,7 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info)
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
HInstruction* input = environment->GetInstructionAt(i);
if (input != nullptr && IsPhiOf(input, info->GetHeader())) {
- HUseListNode<HEnvironment*>* env_use = environment->GetInstructionEnvUseAt(i);
- input->RemoveEnvironmentUser(env_use);
+ environment->RemoveAsUserOfInput(i);
HInstruction* incoming = input->InputAt(0);
environment->SetRawEnvAt(i, incoming);
incoming->AddEnvUseAt(environment, i);
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index eb27965c79..f22b7a7e82 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,6 +16,7 @@
#include <fstream>
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "code_generator.h"
@@ -29,7 +30,6 @@
#include "pretty_printer.h"
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc
index ac8759c805..28000c18f8 100644
--- a/compiler/optimizing/live_interval_test.cc
+++ b/compiler/optimizing/live_interval_test.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "optimizing_unit_test.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 0558b85b47..17914e8206 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -24,7 +25,6 @@
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index c9be570c73..907eff162f 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -24,7 +25,6 @@
#include "optimizing_unit_test.h"
#include "prepare_for_register_allocation.h"
#include "ssa_liveness_analysis.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index 990d662d86..4ac1fe8573 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -64,6 +64,13 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
std::ostream& operator<<(std::ostream& os, const Location& location) {
os << location.DebugString();
+ if (location.IsRegister() || location.IsFpuRegister()) {
+ os << location.reg();
+ } else if (location.IsPair()) {
+ os << location.low() << ":" << location.high();
+ } else if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+ os << location.GetStackIndex();
+ }
return os;
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index bf27c5cf7a..198cc15cce 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -17,10 +17,10 @@
#ifndef ART_COMPILER_OPTIMIZING_LOCATIONS_H_
#define ART_COMPILER_OPTIMIZING_LOCATIONS_H_
+#include "base/arena_object.h"
#include "base/bit_field.h"
#include "base/bit_vector.h"
#include "base/value_object.h"
-#include "utils/arena_object.h"
#include "utils/growable_array.h"
namespace art {
@@ -151,6 +151,10 @@ class Location : public ValueObject {
return GetKind() == kFpuRegisterPair;
}
+ bool IsRegisterKind() const {
+ return IsRegister() || IsFpuRegister() || IsRegisterPair() || IsFpuRegisterPair();
+ }
+
int reg() const {
DCHECK(IsRegister() || IsFpuRegister());
return GetPayload();
@@ -268,6 +272,20 @@ class Location : public ValueObject {
return value_ == other.value_;
}
+ bool Contains(Location other) const {
+ if (Equals(other)) {
+ return true;
+ } else if (IsFpuRegisterPair() && other.IsFpuRegister()) {
+ return other.reg() == low() || other.reg() == high();
+ } else if (IsRegisterPair() && other.IsRegister()) {
+ return other.reg() == low() || other.reg() == high();
+ } else if (IsDoubleStackSlot() && other.IsStackSlot()) {
+ return (GetStackIndex() == other.GetStackIndex())
+ || (GetStackIndex() + 4 == other.GetStackIndex());
+ }
+ return false;
+ }
+
const char* DebugString() const {
switch (GetKind()) {
case kInvalid: return "I";
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index cd36598171..93787b8bfd 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -18,6 +18,7 @@
#include "ssa_builder.h"
#include "utils/growable_array.h"
+#include "scoped_thread_state_change.h"
namespace art {
@@ -33,17 +34,14 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) {
static void RemoveAsUser(HInstruction* instruction) {
for (size_t i = 0; i < instruction->InputCount(); i++) {
- instruction->InputAt(i)->RemoveUser(instruction, i);
+ instruction->RemoveAsUserOfInput(i);
}
HEnvironment* environment = instruction->GetEnvironment();
if (environment != nullptr) {
for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HUseListNode<HEnvironment*>* vreg_env_use = environment->GetInstructionEnvUseAt(i);
- if (vreg_env_use != nullptr) {
- HInstruction* vreg = environment->GetInstructionAt(i);
- DCHECK(vreg != nullptr);
- vreg->RemoveEnvironmentUser(vreg_env_use);
+ if (environment->GetInstructionAt(i) != nullptr) {
+ environment->RemoveAsUserOfInput(i);
}
}
}
@@ -63,22 +61,19 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
}
}
-void HGraph::RemoveBlock(HBasicBlock* block) const {
- for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
- block->GetSuccessors().Get(j)->RemovePredecessor(block);
- }
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- block->RemovePhi(it.Current()->AsPhi());
- }
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- block->RemoveInstruction(it.Current());
- }
-}
-
void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) const {
for (size_t i = 0; i < blocks_.Size(); ++i) {
if (!visited.IsBitSet(i)) {
- RemoveBlock(blocks_.Get(i));
+ HBasicBlock* block = blocks_.Get(i);
+ for (size_t j = 0; j < block->GetSuccessors().Size(); ++j) {
+ block->GetSuccessors().Get(j)->RemovePredecessor(block);
+ }
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ block->RemovePhi(it.Current()->AsPhi(), /*ensure_safety=*/ false);
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ block->RemoveInstruction(it.Current(), /*ensure_safety=*/ false);
+ }
}
}
}
@@ -292,6 +287,15 @@ bool HGraph::AnalyzeNaturalLoops() const {
return true;
}
+HNullConstant* HGraph::GetNullConstant() {
+ if (cached_null_constant_ == nullptr) {
+ cached_null_constant_ = new (arena_) HNullConstant();
+ entry_block_->InsertInstructionBefore(cached_null_constant_,
+ entry_block_->GetLastInstruction());
+ }
+ return cached_null_constant_;
+}
+
void HLoopInformation::Add(HBasicBlock* block) {
blocks_.SetBit(block->GetBlockId());
}
@@ -429,22 +433,24 @@ void HBasicBlock::InsertPhiAfter(HPhi* phi, HPhi* cursor) {
static void Remove(HInstructionList* instruction_list,
HBasicBlock* block,
- HInstruction* instruction) {
+ HInstruction* instruction,
+ bool ensure_safety) {
DCHECK_EQ(block, instruction->GetBlock());
- DCHECK(instruction->GetUses().IsEmpty());
- DCHECK(instruction->GetEnvUses().IsEmpty());
instruction->SetBlock(nullptr);
instruction_list->RemoveInstruction(instruction);
-
- RemoveAsUser(instruction);
+ if (ensure_safety) {
+ DCHECK(instruction->GetUses().IsEmpty());
+ DCHECK(instruction->GetEnvUses().IsEmpty());
+ RemoveAsUser(instruction);
+ }
}
-void HBasicBlock::RemoveInstruction(HInstruction* instruction) {
- Remove(&instructions_, this, instruction);
+void HBasicBlock::RemoveInstruction(HInstruction* instruction, bool ensure_safety) {
+ Remove(&instructions_, this, instruction, ensure_safety);
}
-void HBasicBlock::RemovePhi(HPhi* phi) {
- Remove(&phis_, this, phi);
+void HBasicBlock::RemovePhi(HPhi* phi, bool ensure_safety) {
+ Remove(&phis_, this, phi, ensure_safety);
}
void HEnvironment::CopyFrom(HEnvironment* env) {
@@ -457,15 +463,9 @@ void HEnvironment::CopyFrom(HEnvironment* env) {
}
}
-template <typename T>
-static void RemoveFromUseList(T user, size_t input_index, HUseList<T>* list) {
- HUseListNode<T>* current;
- for (HUseIterator<HInstruction*> use_it(*list); !use_it.Done(); use_it.Advance()) {
- current = use_it.Current();
- if (current->GetUser() == user && current->GetIndex() == input_index) {
- list->Remove(current);
- }
- }
+void HEnvironment::RemoveAsUserOfInput(size_t index) const {
+ const HUserRecord<HEnvironment*> user_record = vregs_.Get(index);
+ user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode());
}
HInstruction* HInstruction::GetNextDisregardingMoves() const {
@@ -484,14 +484,6 @@ HInstruction* HInstruction::GetPreviousDisregardingMoves() const {
return previous;
}
-void HInstruction::RemoveUser(HInstruction* user, size_t input_index) {
- RemoveFromUseList(user, input_index, &uses_);
-}
-
-void HInstruction::RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use) {
- env_uses_.Remove(use);
-}
-
void HInstructionList::AddInstruction(HInstruction* instruction) {
if (first_instruction_ == nullptr) {
DCHECK(last_instruction_ == nullptr);
@@ -602,7 +594,7 @@ void HInstruction::ReplaceWith(HInstruction* other) {
}
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
- InputAt(index)->RemoveUser(this, index);
+ RemoveAsUserOfInput(index);
SetRawInputAt(index, replacement);
replacement->AddUseAt(this, index);
}
@@ -613,7 +605,7 @@ size_t HInstruction::EnvironmentSize() const {
void HPhi::AddInput(HInstruction* input) {
DCHECK(input->GetBlock() != nullptr);
- inputs_.Add(input);
+ inputs_.Add(HUserRecord<HInstruction*>(input));
input->AddUseAt(this, inputs_.Size() - 1);
}
@@ -990,4 +982,14 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
invoke->GetBlock()->RemoveInstruction(invoke);
}
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) {
+ ScopedObjectAccess soa(Thread::Current());
+ os << "["
+ << " is_top=" << rhs.IsTop()
+ << " type=" << (rhs.IsTop() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+ << " is_exact=" << rhs.IsExact()
+ << " ]";
+ return os;
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 30d869d026..de448cc483 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -17,23 +17,28 @@
#ifndef ART_COMPILER_OPTIMIZING_NODES_H_
#define ART_COMPILER_OPTIMIZING_NODES_H_
+#include "base/arena_object.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "handle.h"
+#include "handle_scope.h"
#include "invoke_type.h"
#include "locations.h"
+#include "mirror/class.h"
#include "offsets.h"
#include "primitive.h"
-#include "utils/arena_object.h"
#include "utils/arena_bit_vector.h"
#include "utils/growable_array.h"
namespace art {
+class GraphChecker;
class HBasicBlock;
class HEnvironment;
class HInstruction;
class HIntConstant;
class HInvoke;
class HGraphVisitor;
+class HNullConstant;
class HPhi;
class HSuspendCheck;
class LiveInterval;
@@ -194,6 +199,8 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
return reverse_post_order_;
}
+ HNullConstant* GetNullConstant();
+
private:
HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -205,7 +212,6 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
ArenaBitVector* visiting);
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
void RemoveDeadBlocks(const ArenaBitVector& visited) const;
- void RemoveBlock(HBasicBlock* block) const;
ArenaAllocator* const arena_;
@@ -233,6 +239,9 @@ class HGraph : public ArenaObject<kArenaAllocMisc> {
// The current id to assign to a newly added instruction. See HInstruction.id_.
int32_t current_instruction_id_;
+ // Cached null constant that might be created when building SSA form.
+ HNullConstant* cached_null_constant_;
+
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
DISALLOW_COPY_AND_ASSIGN(HGraph);
};
@@ -481,14 +490,17 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
void ReplaceWith(HBasicBlock* other);
void AddInstruction(HInstruction* instruction);
- void RemoveInstruction(HInstruction* instruction);
void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor);
// Replace instruction `initial` with `replacement` within this block.
void ReplaceAndRemoveInstructionWith(HInstruction* initial,
HInstruction* replacement);
void AddPhi(HPhi* phi);
void InsertPhiAfter(HPhi* instruction, HPhi* cursor);
- void RemovePhi(HPhi* phi);
+ // RemoveInstruction and RemovePhi delete a given instruction from the respective
+ // instruction list. With 'ensure_safety' set to true, it verifies that the
+ // instruction is not in use and removes it from the use lists of its inputs.
+ void RemoveInstruction(HInstruction* instruction, bool ensure_safety = true);
+ void RemovePhi(HPhi* phi, bool ensure_safety = true);
bool IsLoopHeader() const {
return (loop_information_ != nullptr) && (loop_information_->GetHeader() == this);
@@ -574,6 +586,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
M(ArrayLength, Instruction) \
M(ArraySet, Instruction) \
M(BoundsCheck, Instruction) \
+ M(BoundType, Instruction) \
M(CheckCast, Instruction) \
M(ClinitCheck, Instruction) \
M(Compare, BinaryOperation) \
@@ -610,6 +623,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocMisc> {
M(NewInstance, Instruction) \
M(Not, UnaryOperation) \
M(NotEqual, Condition) \
+ M(NullConstant, Instruction) \
M(NullCheck, Instruction) \
M(Or, BinaryOperation) \
M(ParallelMove, Instruction) \
@@ -704,6 +718,9 @@ class HUseList : public ValueObject {
}
void Remove(HUseListNode<T>* node) {
+ DCHECK(node != nullptr);
+ DCHECK(Contains(node));
+
if (node->prev_ != nullptr) {
node->prev_->next_ = node->next_;
}
@@ -715,6 +732,18 @@ class HUseList : public ValueObject {
}
}
+ bool Contains(const HUseListNode<T>* node) const {
+ if (node == nullptr) {
+ return false;
+ }
+ for (HUseListNode<T>* current = first_; current != nullptr; current = current->GetNext()) {
+ if (current == node) {
+ return true;
+ }
+ }
+ return false;
+ }
+
bool IsEmpty() const {
return first_ == nullptr;
}
@@ -750,6 +779,33 @@ class HUseIterator : public ValueObject {
friend class HValue;
};
+// This class is used by HEnvironment and HInstruction classes to record the
+// instructions they use and pointers to the corresponding HUseListNodes kept
+// by the used instructions.
+template <typename T>
+class HUserRecord : public ValueObject {
+ public:
+ HUserRecord() : instruction_(nullptr), use_node_(nullptr) {}
+ explicit HUserRecord(HInstruction* instruction) : instruction_(instruction), use_node_(nullptr) {}
+
+ HUserRecord(const HUserRecord<T>& old_record, HUseListNode<T>* use_node)
+ : instruction_(old_record.instruction_), use_node_(use_node) {
+ DCHECK(instruction_ != nullptr);
+ DCHECK(use_node_ != nullptr);
+ DCHECK(old_record.use_node_ == nullptr);
+ }
+
+ HInstruction* GetInstruction() const { return instruction_; }
+ HUseListNode<T>* GetUseNode() const { return use_node_; }
+
+ private:
+ // Instruction used by the user.
+ HInstruction* instruction_;
+
+ // Corresponding entry in the use list kept by 'instruction_'.
+ HUseListNode<T>* use_node_;
+};
+
// Represents the side effects an instruction may have.
class SideEffects : public ValueObject {
public:
@@ -820,50 +876,118 @@ class HEnvironment : public ArenaObject<kArenaAllocMisc> {
: vregs_(arena, number_of_vregs) {
vregs_.SetSize(number_of_vregs);
for (size_t i = 0; i < number_of_vregs; i++) {
- vregs_.Put(i, VRegInfo(nullptr, nullptr));
+ vregs_.Put(i, HUserRecord<HEnvironment*>());
}
}
void CopyFrom(HEnvironment* env);
void SetRawEnvAt(size_t index, HInstruction* instruction) {
- vregs_.Put(index, VRegInfo(instruction, nullptr));
+ vregs_.Put(index, HUserRecord<HEnvironment*>(instruction));
}
+ HInstruction* GetInstructionAt(size_t index) const {
+ return vregs_.Get(index).GetInstruction();
+ }
+
+ void RemoveAsUserOfInput(size_t index) const;
+
+ size_t Size() const { return vregs_.Size(); }
+
+ private:
// Record instructions' use entries of this environment for constant-time removal.
+ // It should only be called by HInstruction when a new environment use is added.
void RecordEnvUse(HUseListNode<HEnvironment*>* env_use) {
DCHECK(env_use->GetUser() == this);
size_t index = env_use->GetIndex();
- VRegInfo info = vregs_.Get(index);
- DCHECK(info.vreg_ != nullptr);
- DCHECK(info.node_ == nullptr);
- vregs_.Put(index, VRegInfo(info.vreg_, env_use));
+ vregs_.Put(index, HUserRecord<HEnvironment*>(vregs_.Get(index), env_use));
}
- HInstruction* GetInstructionAt(size_t index) const {
- return vregs_.Get(index).vreg_;
+ GrowableArray<HUserRecord<HEnvironment*> > vregs_;
+
+ friend HInstruction;
+
+ DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+};
+
+class ReferenceTypeInfo : ValueObject {
+ public:
+ typedef Handle<mirror::Class> TypeHandle;
+
+ static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (type_handle->IsObjectClass()) {
+ // Override the type handle to be consistent with the case when we get to
+ // Top but don't have the Object class available. It avoids having to guess
+ // what value the type_handle has when it's Top.
+ return ReferenceTypeInfo(TypeHandle(), is_exact, true);
+ } else {
+ return ReferenceTypeInfo(type_handle, is_exact, false);
+ }
}
- HUseListNode<HEnvironment*>* GetInstructionEnvUseAt(size_t index) const {
- return vregs_.Get(index).node_;
+ static ReferenceTypeInfo CreateTop(bool is_exact) {
+ return ReferenceTypeInfo(TypeHandle(), is_exact, true);
}
- size_t Size() const { return vregs_.Size(); }
+ bool IsExact() const { return is_exact_; }
+ bool IsTop() const { return is_top_; }
- private:
- struct VRegInfo {
- HInstruction* vreg_;
- HUseListNode<HEnvironment*>* node_;
+ Handle<mirror::Class> GetTypeHandle() const { return type_handle_; }
- VRegInfo(HInstruction* instruction, HUseListNode<HEnvironment*>* env_use)
- : vreg_(instruction), node_(env_use) {}
- };
+ bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (IsTop()) {
+ // Top (equivalent for java.lang.Object) is supertype of anything.
+ return true;
+ }
+ if (rti.IsTop()) {
+ // If we get here `this` is not Top() so it can't be a supertype.
+ return false;
+ }
+ return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
- GrowableArray<VRegInfo> vregs_;
+ // Returns true if the type information provide the same amount of details.
+ // Note that it does not mean that the instructions have the same actual type
+ // (e.g. tops are equal but they can be the result of a merge).
+ bool IsEqual(ReferenceTypeInfo rti) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ if (IsExact() != rti.IsExact()) {
+ return false;
+ }
+ if (IsTop() && rti.IsTop()) {
+ // `Top` means java.lang.Object, so the types are equivalent.
+ return true;
+ }
+ if (IsTop() || rti.IsTop()) {
+ // If only one is top or object than they are not equivalent.
+ // NB: We need this extra check because the type_handle of `Top` is invalid
+ // and we cannot inspect its reference.
+ return false;
+ }
- DISALLOW_COPY_AND_ASSIGN(HEnvironment);
+ // Finally check the types.
+ return GetTypeHandle().Get() == rti.GetTypeHandle().Get();
+ }
+
+ private:
+ ReferenceTypeInfo() : ReferenceTypeInfo(TypeHandle(), false, true) {}
+ ReferenceTypeInfo(TypeHandle type_handle, bool is_exact, bool is_top)
+ : type_handle_(type_handle), is_exact_(is_exact), is_top_(is_top) {}
+
+ // The class of the object.
+ TypeHandle type_handle_;
+ // Whether or not the type is exact or a superclass of the actual type.
+ // Whether or not we have any information about this type.
+ bool is_exact_;
+ // A true value here means that the object type should be java.lang.Object.
+ // We don't have access to the corresponding mirror object every time so this
+ // flag acts as a substitute. When true, the TypeHandle refers to a null
+ // pointer and should not be used.
+ bool is_top_;
};
+std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs);
+
class HInstruction : public ArenaObject<kArenaAllocMisc> {
public:
explicit HInstruction(SideEffects side_effects)
@@ -876,7 +1000,8 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
locations_(nullptr),
live_interval_(nullptr),
lifetime_position_(kNoLifetime),
- side_effects_(side_effects) {}
+ side_effects_(side_effects),
+ reference_type_info_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
virtual ~HInstruction() {}
@@ -899,13 +1024,15 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool IsLoopHeaderPhi() { return IsPhi() && block_->IsLoopHeader(); }
virtual size_t InputCount() const = 0;
- virtual HInstruction* InputAt(size_t i) const = 0;
+ HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
virtual void Accept(HGraphVisitor* visitor) = 0;
virtual const char* DebugName() const = 0;
virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; }
- virtual void SetRawInputAt(size_t index, HInstruction* input) = 0;
+ void SetRawInputAt(size_t index, HInstruction* input) {
+ SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
+ }
virtual bool NeedsEnvironment() const { return false; }
virtual bool IsControlFlow() const { return false; }
@@ -914,12 +1041,24 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
// Does not apply for all instructions, but having this at top level greatly
// simplifies the null check elimination.
- virtual bool CanBeNull() const { return true; }
+ virtual bool CanBeNull() const {
+ DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types";
+ return true;
+ }
virtual bool CanDoImplicitNullCheck() const { return false; }
+ void SetReferenceTypeInfo(ReferenceTypeInfo reference_type_info) {
+ reference_type_info_ = reference_type_info;
+ }
+
+ ReferenceTypeInfo GetReferenceTypeInfo() const { return reference_type_info_; }
+
void AddUseAt(HInstruction* user, size_t index) {
- uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+ DCHECK(user != nullptr);
+ HUseListNode<HInstruction*>* use =
+ uses_.AddUse(user, index, GetBlock()->GetGraph()->GetArena());
+ user->SetRawInputRecordAt(index, HUserRecord<HInstruction*>(user->InputRecordAt(index), use));
}
void AddEnvUseAt(HEnvironment* user, size_t index) {
@@ -929,11 +1068,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
user->RecordEnvUse(env_use);
}
- void RemoveUser(HInstruction* user, size_t index);
- void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use);
+ void RemoveAsUserOfInput(size_t input) {
+ HUserRecord<HInstruction*> input_use = InputRecordAt(input);
+ input_use.GetInstruction()->uses_.Remove(input_use.GetUseNode());
+ }
- const HUseList<HInstruction*>& GetUses() { return uses_; }
- const HUseList<HEnvironment*>& GetEnvUses() { return env_uses_; }
+ const HUseList<HInstruction*>& GetUses() const { return uses_; }
+ const HUseList<HEnvironment*>& GetEnvUses() const { return env_uses_; }
bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
@@ -1015,7 +1156,25 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
void SetLiveInterval(LiveInterval* interval) { live_interval_ = interval; }
bool HasLiveInterval() const { return live_interval_ != nullptr; }
+ bool IsSuspendCheckEntry() const { return IsSuspendCheck() && GetBlock()->IsEntryBlock(); }
+
+ // Returns whether the code generation of the instruction will require to have access
+ // to the current method. Such instructions are:
+ // (1): Instructions that require an environment, as calling the runtime requires
+ // to walk the stack and have the current method stored at a specific stack address.
+ // (2): Object literals like classes and strings, that are loaded from the dex cache
+ // fields of the current method.
+ bool NeedsCurrentMethod() const {
+ return NeedsEnvironment() || IsLoadClass() || IsLoadString();
+ }
+
+ protected:
+ virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0;
+ virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0;
+
private:
+ void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); }
+
HInstruction* previous_;
HInstruction* next_;
HBasicBlock* block_;
@@ -1050,7 +1209,12 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
const SideEffects side_effects_;
+ // TODO: for primitive types this should be marked as invalid.
+ ReferenceTypeInfo reference_type_info_;
+
+ friend class GraphChecker;
friend class HBasicBlock;
+ friend class HEnvironment;
friend class HGraph;
friend class HInstructionList;
@@ -1170,15 +1334,16 @@ class HTemplateInstruction: public HInstruction {
virtual ~HTemplateInstruction() {}
virtual size_t InputCount() const { return N; }
- virtual HInstruction* InputAt(size_t i) const { return inputs_[i]; }
protected:
- virtual void SetRawInputAt(size_t i, HInstruction* instruction) {
- inputs_[i] = instruction;
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_[i]; }
+
+ void SetRawInputRecordAt(size_t i, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_[i] = input;
}
private:
- EmbeddedArray<HInstruction*, N> inputs_;
+ EmbeddedArray<HUserRecord<HInstruction*>, N> inputs_;
friend class SsaBuilder;
};
@@ -1663,6 +1828,22 @@ class HDoubleConstant : public HConstant {
DISALLOW_COPY_AND_ASSIGN(HDoubleConstant);
};
+class HNullConstant : public HConstant {
+ public:
+ HNullConstant() : HConstant(Primitive::kPrimNot) {}
+
+ bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ return true;
+ }
+
+ size_t ComputeHashCode() const OVERRIDE { return 0; }
+
+ DECLARE_INSTRUCTION(NullConstant);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HNullConstant);
+};
+
// Constants of the type int. Those can be from Dex instructions, or
// synthesized (for example with the if-eqz instruction).
class HIntConstant : public HConstant {
@@ -1718,7 +1899,6 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic);
class HInvoke : public HInstruction {
public:
virtual size_t InputCount() const { return inputs_.Size(); }
- virtual HInstruction* InputAt(size_t i) const { return inputs_.Get(i); }
// Runtime needs to walk the stack, so Dex -> Dex calls need to
// know their environment.
@@ -1728,10 +1908,6 @@ class HInvoke : public HInstruction {
SetRawInputAt(index, argument);
}
- virtual void SetRawInputAt(size_t index, HInstruction* input) {
- inputs_.Put(index, input);
- }
-
virtual Primitive::Type GetType() const { return return_type_; }
uint32_t GetDexPc() const { return dex_pc_; }
@@ -1763,7 +1939,12 @@ class HInvoke : public HInstruction {
inputs_.SetSize(number_of_arguments);
}
- GrowableArray<HInstruction*> inputs_;
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+ void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_.Put(index, input);
+ }
+
+ GrowableArray<HUserRecord<HInstruction*> > inputs_;
const Primitive::Type return_type_;
const uint32_t dex_pc_;
const uint32_t dex_method_index_;
@@ -2259,11 +2440,6 @@ class HPhi : public HInstruction {
}
size_t InputCount() const OVERRIDE { return inputs_.Size(); }
- HInstruction* InputAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
-
- void SetRawInputAt(size_t index, HInstruction* input) OVERRIDE {
- inputs_.Put(index, input);
- }
void AddInput(HInstruction* input);
@@ -2282,8 +2458,15 @@ class HPhi : public HInstruction {
DECLARE_INSTRUCTION(Phi);
+ protected:
+ const HUserRecord<HInstruction*> InputRecordAt(size_t i) const OVERRIDE { return inputs_.Get(i); }
+
+ void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) OVERRIDE {
+ inputs_.Put(index, input);
+ }
+
private:
- GrowableArray<HInstruction*> inputs_;
+ GrowableArray<HUserRecord<HInstruction*> > inputs_;
const uint32_t reg_number_;
Primitive::Type type_;
bool is_live_;
@@ -2608,7 +2791,8 @@ class HLoadClass : public HExpression<0> {
type_index_(type_index),
is_referrers_class_(is_referrers_class),
dex_pc_(dex_pc),
- generate_clinit_check_(false) {}
+ generate_clinit_check_(false),
+ loaded_class_rti_(ReferenceTypeInfo::CreateTop(/* is_exact */ false)) {}
bool CanBeMoved() const OVERRIDE { return true; }
@@ -2646,6 +2830,20 @@ class HLoadClass : public HExpression<0> {
return !is_referrers_class_;
}
+ ReferenceTypeInfo GetLoadedClassRTI() {
+ return loaded_class_rti_;
+ }
+
+ void SetLoadedClassRTI(ReferenceTypeInfo rti) {
+ // Make sure we only set exact types (the loaded class should never be merged).
+ DCHECK(rti.IsExact());
+ loaded_class_rti_ = rti;
+ }
+
+ bool IsResolved() {
+ return loaded_class_rti_.IsExact();
+ }
+
DECLARE_INSTRUCTION(LoadClass);
private:
@@ -2656,6 +2854,8 @@ class HLoadClass : public HExpression<0> {
// Used for code generation.
bool generate_clinit_check_;
+ ReferenceTypeInfo loaded_class_rti_;
+
DISALLOW_COPY_AND_ASSIGN(HLoadClass);
};
@@ -2858,6 +3058,32 @@ class HInstanceOf : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HInstanceOf);
};
+class HBoundType : public HExpression<1> {
+ public:
+ HBoundType(HInstruction* input, ReferenceTypeInfo bound_type)
+ : HExpression(Primitive::kPrimNot, SideEffects::None()),
+ bound_type_(bound_type) {
+ SetRawInputAt(0, input);
+ }
+
+ const ReferenceTypeInfo& GetBoundType() const { return bound_type_; }
+
+ bool CanBeNull() const OVERRIDE {
+ // `null instanceof ClassX` always return false so we can't be null.
+ return false;
+ }
+
+ DECLARE_INSTRUCTION(BoundType);
+
+ private:
+ // Encodes the most upper class that this instruction can have. In other words
+ // it is always the case that GetBoundType().IsSupertypeOf(GetReferenceType()).
+ // It is used to bound the type in cases like `if (x instanceof ClassX) {}`
+ const ReferenceTypeInfo bound_type_;
+
+ DISALLOW_COPY_AND_ASSIGN(HBoundType);
+};
+
class HCheckCast : public HTemplateInstruction<2> {
public:
HCheckCast(HInstruction* object,
@@ -2959,7 +3185,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMisc> {
// True if this blocks a move from the given location.
bool Blocks(Location loc) const {
- return !IsEliminated() && source_.Equals(loc);
+ return !IsEliminated() && (source_.Contains(loc) || loc.Contains(source_));
}
// A move is redundant if it's been eliminated, if its source and
@@ -3000,46 +3226,19 @@ class HParallelMove : public HTemplateInstruction<0> {
void AddMove(Location source, Location destination, HInstruction* instruction) {
DCHECK(source.IsValid());
DCHECK(destination.IsValid());
- // The parallel move resolver does not handle pairs. So we decompose the
- // pair locations into two moves.
- if (source.IsPair() && destination.IsPair()) {
- AddMove(source.ToLow(), destination.ToLow(), instruction);
- AddMove(source.ToHigh(), destination.ToHigh(), nullptr);
- } else if (source.IsPair()) {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
- AddMove(source.ToLow(), Location::StackSlot(destination.GetStackIndex()), instruction);
- AddMove(source.ToHigh(), Location::StackSlot(destination.GetHighStackIndex(4)), nullptr);
- } else if (destination.IsPair()) {
- if (source.IsConstant()) {
- // We put the same constant in the move. The code generator will handle which
- // low or high part to use.
- AddMove(source, destination.ToLow(), instruction);
- AddMove(source, destination.ToHigh(), nullptr);
- } else {
- DCHECK(source.IsDoubleStackSlot());
- AddMove(Location::StackSlot(source.GetStackIndex()), destination.ToLow(), instruction);
- // TODO: rewrite GetHighStackIndex to not require a word size. It's supposed to
- // always be 4.
- static constexpr int kHighOffset = 4;
- AddMove(Location::StackSlot(source.GetHighStackIndex(kHighOffset)),
- destination.ToHigh(),
- nullptr);
- }
- } else {
- if (kIsDebugBuild) {
- if (instruction != nullptr) {
- for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
- << "Doing parallel moves for the same instruction.";
- }
- }
+ if (kIsDebugBuild) {
+ if (instruction != nullptr) {
for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
- DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
- << "Same destination for two moves in a parallel move.";
+ DCHECK_NE(moves_.Get(i).GetInstruction(), instruction)
+ << "Doing parallel moves for the same instruction.";
}
}
- moves_.Add(MoveOperands(source, destination, instruction));
+ for (size_t i = 0, e = moves_.Size(); i < e; ++i) {
+ DCHECK(!destination.Equals(moves_.Get(i).GetDestination()))
+ << "Same destination for two moves in a parallel move.";
+ }
}
+ moves_.Add(MoveOperands(source, destination, instruction));
}
MoveOperands* MoveOperandsAt(size_t index) const {
diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc
index 5dbdc74924..4cf22d3b2e 100644
--- a/compiler/optimizing/nodes_test.cc
+++ b/compiler/optimizing/nodes_test.cc
@@ -14,8 +14,8 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "nodes.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index b99f6784f7..b13e07eb22 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -21,6 +21,12 @@
namespace art {
+void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat) const {
+ if (stats_ != nullptr) {
+ stats_->RecordStat(compilation_stat);
+ }
+}
+
void HOptimization::Check() {
if (kIsDebugBuild) {
if (is_in_ssa_form_) {
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index d9e082a7f3..af39e092c7 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -18,6 +18,7 @@
#define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_
#include "nodes.h"
+#include "optimizing_compiler_stats.h"
namespace art {
@@ -34,8 +35,10 @@ class HOptimization : public ValueObject {
public:
HOptimization(HGraph* graph,
bool is_in_ssa_form,
- const char* pass_name)
+ const char* pass_name,
+ OptimizingCompilerStats* stats = nullptr)
: graph_(graph),
+ stats_(stats),
is_in_ssa_form_(is_in_ssa_form),
pass_name_(pass_name) {}
@@ -51,7 +54,11 @@ class HOptimization : public ValueObject {
void Check();
protected:
+ void MaybeRecordStat(MethodCompilationStat compilation_stat) const;
+
HGraph* const graph_;
+ // Used to record stats about the optimization.
+ OptimizingCompilerStats* const stats_;
private:
// Does the analyzed graph use the SSA form?
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c518f33f53..2fef8c7b3a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -19,6 +19,7 @@
#include <fstream>
#include <stdint.h>
+#include "base/arena_allocator.h"
#include "base/dumpable.h"
#include "base/timing_logger.h"
#include "bounds_check_elimination.h"
@@ -47,7 +48,6 @@
#include "ssa_phi_elimination.h"
#include "ssa_liveness_analysis.h"
#include "reference_type_propagation.h"
-#include "utils/arena_allocator.h"
namespace art {
@@ -201,6 +201,7 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* driver,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info) const;
@@ -293,13 +294,15 @@ static void RunOptimizations(HOptimization* optimizations[],
static void RunOptimizations(HGraph* graph,
CompilerDriver* driver,
OptimizingCompilerStats* stats,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
- PassInfoPrinter* pass_info_printer) {
+ PassInfoPrinter* pass_info_printer,
+ StackHandleScopeCollection* handles) {
SsaRedundantPhiElimination redundant_phi(graph);
SsaDeadPhiElimination dead_phi(graph);
HDeadCodeElimination dce(graph);
HConstantFolding fold1(graph);
- InstructionSimplifier simplify1(graph);
+ InstructionSimplifier simplify1(graph, stats);
HInliner inliner(graph, dex_compilation_unit, driver, stats);
@@ -308,8 +311,8 @@ static void RunOptimizations(HGraph* graph,
GVNOptimization gvn(graph, side_effects);
LICM licm(graph, side_effects);
BoundsCheckElimination bce(graph);
- ReferenceTypePropagation type_propagation(graph);
- InstructionSimplifier simplify2(graph, "instruction_simplifier_after_types");
+ ReferenceTypePropagation type_propagation(graph, dex_file, dex_compilation_unit, handles);
+ InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types");
IntrinsicsRecognizer intrinsics(graph, dex_compilation_unit.GetDexFile(), driver);
@@ -348,10 +351,12 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* compiler_driver,
+ const DexFile& dex_file,
const DexCompilationUnit& dex_compilation_unit,
PassInfoPrinter* pass_info_printer) const {
- RunOptimizations(
- graph, compiler_driver, &compilation_stats_, dex_compilation_unit, pass_info_printer);
+ StackHandleScopeCollection handles(Thread::Current());
+ RunOptimizations(graph, compiler_driver, &compilation_stats_,
+ dex_file, dex_compilation_unit, pass_info_printer, &handles);
PrepareForRegisterAllocation(graph).Run();
SsaLivenessAnalysis liveness(*graph, codegen);
@@ -376,7 +381,10 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
- codegen->GetFrameSize(),
+ // Follow Quick's behavior and set the frame size to zero if it is
+ // considered "empty" (see the definition of
+ // art::CodeGenerator::HasEmptyFrame).
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
ArrayRef<const uint8_t>(stack_map));
@@ -400,17 +408,21 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline);
- return CompiledMethod::SwapAllocCompiledMethod(compiler_driver,
- codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(allocator.GetMemory()),
- codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
- &src_mapping_table,
- AlignVectorSize(mapping_table),
- AlignVectorSize(vmap_table),
- AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>());
+ return CompiledMethod::SwapAllocCompiledMethod(
+ compiler_driver,
+ codegen->GetInstructionSet(),
+ ArrayRef<const uint8_t>(allocator.GetMemory()),
+ // Follow Quick's behavior and set the frame size to zero if it is
+ // considered "empty" (see the definition of
+ // art::CodeGenerator::HasEmptyFrame).
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ codegen->GetFpuSpillMask(),
+ &src_mapping_table,
+ AlignVectorSize(mapping_table),
+ AlignVectorSize(vmap_table),
+ AlignVectorSize(gc_map),
+ ArrayRef<const uint8_t>());
}
CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
@@ -508,6 +520,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
return CompileOptimized(graph,
codegen.get(),
compiler_driver,
+ dex_file,
dex_compilation_unit,
&pass_info_printer);
} else if (shouldOptimize && RegisterAllocator::Supports(instruction_set)) {
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index cc2723df99..3ebf0f8cd2 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -43,6 +43,8 @@ enum MethodCompilationStat {
kNotCompiledCantAccesType,
kNotOptimizedRegisterAllocator,
kNotCompiledUnhandledInstruction,
+ kRemovedCheckedCast,
+ kRemovedNullCheck,
kLastStat
};
@@ -96,6 +98,8 @@ class OptimizingCompilerStats {
case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
+ case kRemovedCheckedCast: return "kRemovedCheckedCast";
+ case kRemovedNullCheck: return "kRemovedNullCheck";
default: LOG(FATAL) << "invalid stat";
}
return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index debe466560..7d0641ec13 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -57,17 +57,49 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) {
// unallocated, or the move was already eliminated).
for (size_t i = 0; i < parallel_move->NumMoves(); ++i) {
MoveOperands* move = parallel_move->MoveOperandsAt(i);
- // The parallel move resolver algorithm does not work with register pairs.
- DCHECK(!move->GetSource().IsPair());
- DCHECK(!move->GetDestination().IsPair());
if (!move->IsRedundant()) {
moves_.Add(move);
}
}
}
+// Update the source of `move`, knowing that `updated_location` has been swapped
+// with `new_source`. Note that `updated_location` can be a pair, therefore if
+// `move` is non-pair, we need to extract which register to use.
+static void UpdateSourceOf(MoveOperands* move, Location updated_location, Location new_source) {
+ Location source = move->GetSource();
+ if (new_source.GetKind() == source.GetKind()) {
+ DCHECK(updated_location.Equals(source));
+ move->SetSource(new_source);
+ } else if (new_source.IsStackSlot()
+ || new_source.IsDoubleStackSlot()
+ || source.IsStackSlot()
+ || source.IsDoubleStackSlot()) {
+ // Stack slots never take part of a pair/non-pair swap.
+ DCHECK(updated_location.Equals(source));
+ move->SetSource(new_source);
+ } else if (source.IsRegister()) {
+ DCHECK(new_source.IsRegisterPair()) << new_source;
+ DCHECK(updated_location.IsRegisterPair()) << updated_location;
+ if (updated_location.low() == source.reg()) {
+ move->SetSource(Location::RegisterLocation(new_source.low()));
+ } else {
+ DCHECK_EQ(updated_location.high(), source.reg());
+ move->SetSource(Location::RegisterLocation(new_source.high()));
+ }
+ } else if (source.IsFpuRegister()) {
+ DCHECK(new_source.IsFpuRegisterPair()) << new_source;
+ DCHECK(updated_location.IsFpuRegisterPair()) << updated_location;
+ if (updated_location.low() == source.reg()) {
+ move->SetSource(Location::FpuRegisterLocation(new_source.low()));
+ } else {
+ DCHECK_EQ(updated_location.high(), source.reg());
+ move->SetSource(Location::FpuRegisterLocation(new_source.high()));
+ }
+ }
+}
-void ParallelMoveResolver::PerformMove(size_t index) {
+MoveOperands* ParallelMoveResolver::PerformMove(size_t index) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We
// mark a move as "pending" on entry to PerformMove in order to detect
@@ -75,35 +107,59 @@ void ParallelMoveResolver::PerformMove(size_t index) {
// which means that a call to PerformMove could change any source operand
// in the move graph.
- DCHECK(!moves_.Get(index)->IsPending());
- DCHECK(!moves_.Get(index)->IsRedundant());
+ MoveOperands* move = moves_.Get(index);
+ DCHECK(!move->IsPending());
+ if (move->IsRedundant()) {
+ // Because we swap register pairs first, following, un-pending
+ // moves may become redundant.
+ move->Eliminate();
+ return nullptr;
+ }
// Clear this move's destination to indicate a pending move. The actual
// destination is saved in a stack-allocated local. Recursion may allow
// multiple moves to be pending.
- DCHECK(!moves_.Get(index)->GetSource().IsInvalid());
- Location destination = moves_.Get(index)->MarkPending();
+ DCHECK(!move->GetSource().IsInvalid());
+ Location destination = move->MarkPending();
// Perform a depth-first traversal of the move graph to resolve
// dependencies. Any unperformed, unpending move with a source the same
// as this one's destination blocks this one so recursively perform all
// such moves.
+ MoveOperands* required_swap = nullptr;
for (size_t i = 0; i < moves_.Size(); ++i) {
const MoveOperands& other_move = *moves_.Get(i);
if (other_move.Blocks(destination) && !other_move.IsPending()) {
// Though PerformMove can change any source operand in the move graph,
- // this call cannot create a blocking move via a swap (this loop does
- // not miss any). Assume there is a non-blocking move with source A
+ // calling `PerformMove` cannot create a blocking move via a swap
+ // (this loop does not miss any).
+ // For example, assume there is a non-blocking move with source A
// and this move is blocked on source B and there is a swap of A and
// B. Then A and B must be involved in the same cycle (or they would
// not be swapped). Since this move's destination is B and there is
// only a single incoming edge to an operand, this move must also be
// involved in the same cycle. In that case, the blocking move will
// be created but will be "pending" when we return from PerformMove.
- PerformMove(i);
+ required_swap = PerformMove(i);
+
+ if (required_swap == move) {
+ // If this move is required to swap, we do so without looking
+ // at the next moves. Swapping is not blocked by anything, it just
+ // updates other moves's source.
+ break;
+ } else if (required_swap == moves_.Get(i)) {
+ // If `other_move` was swapped, we iterate again to find a new
+ // potential cycle.
+ required_swap = nullptr;
+ i = 0;
+ } else if (required_swap != nullptr) {
+ // A move is required to swap. We walk back the cycle to find the
+ // move by just returning from this `PerforrmMove`.
+ moves_.Get(index)->ClearPending(destination);
+ return required_swap;
+ }
}
}
- MoveOperands* move = moves_.Get(index);
// We are about to resolve this move and don't need it marked as
// pending, so restore its destination.
@@ -113,19 +169,30 @@ void ParallelMoveResolver::PerformMove(size_t index) {
// so it may now be the last move in the cycle. If so remove it.
if (move->GetSource().Equals(destination)) {
move->Eliminate();
- return;
+ DCHECK(required_swap == nullptr);
+ return nullptr;
}
// The move may be blocked on a (at most one) pending move, in which case
// we have a cycle. Search for such a blocking move and perform a swap to
// resolve it.
bool do_swap = false;
- for (size_t i = 0; i < moves_.Size(); ++i) {
- const MoveOperands& other_move = *moves_.Get(i);
- if (other_move.Blocks(destination)) {
- DCHECK(other_move.IsPending());
- do_swap = true;
- break;
+ if (required_swap != nullptr) {
+ DCHECK_EQ(required_swap, move);
+ do_swap = true;
+ } else {
+ for (size_t i = 0; i < moves_.Size(); ++i) {
+ const MoveOperands& other_move = *moves_.Get(i);
+ if (other_move.Blocks(destination)) {
+ DCHECK(other_move.IsPending());
+ if (!destination.IsPair() && other_move.GetSource().IsPair()) {
+ // We swap pairs before swapping non-pairs. Go back from the
+ // cycle by returning the pair that must be swapped.
+ return moves_.Get(i);
+ }
+ do_swap = true;
+ break;
+ }
}
}
@@ -140,15 +207,21 @@ void ParallelMoveResolver::PerformMove(size_t index) {
for (size_t i = 0; i < moves_.Size(); ++i) {
const MoveOperands& other_move = *moves_.Get(i);
if (other_move.Blocks(source)) {
- moves_.Get(i)->SetSource(swap_destination);
+ UpdateSourceOf(moves_.Get(i), source, swap_destination);
} else if (other_move.Blocks(swap_destination)) {
- moves_.Get(i)->SetSource(source);
+ UpdateSourceOf(moves_.Get(i), swap_destination, source);
}
}
+ // If the swap was required because of a pair in the middle of a cycle,
+ // we return the swapped move, so that the caller knows it needs to re-iterate
+ // its dependency loop.
+ return required_swap;
} else {
// This move is not blocked.
EmitMove(index);
move->Eliminate();
+ DCHECK(required_swap == nullptr);
+ return nullptr;
}
}
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 7ec1dd2deb..3fa1b37afd 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -83,7 +83,15 @@ class ParallelMoveResolver : public ValueObject {
// Perform the move at the moves_ index in question (possibly requiring
// other moves to satisfy dependencies).
- void PerformMove(size_t index);
+ //
+ // Return whether another move in the dependency cycle needs to swap. This
+ // is to handle pair swaps, where we want the pair to swap first to avoid
+ // building pairs that are unexpected by the code generator. For example, if
+ // we were to swap R1 with R2, we would need to update all locations using
+ // R2 to R1. So a (R2,R3) pair register could become (R1,R3). We could make
+ // the code generator understand such pairs, but it's easier and cleaner to
+ // just not create such pairs and exchange pairs in priority.
+ MoveOperands* PerformMove(size_t index);
DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver);
};
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index 28b5697bbd..44a3da2817 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -14,9 +14,9 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
@@ -165,7 +165,7 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterPairLocation(2, 3),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
}
{
@@ -180,7 +180,7 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterLocation(4),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 -> 4) (0 -> 2) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str());
}
{
@@ -195,7 +195,89 @@ TEST(ParallelMoveTest, Pairs) {
Location::RegisterLocation(0),
nullptr);
resolver.EmitNativeCode(moves);
- ASSERT_STREQ("(2 <-> 0) (1 -> 3)", resolver.GetMessage().c_str());
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(2),
+ Location::RegisterLocation(7),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterLocation(7),
+ Location::RegisterLocation(1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3) (7 -> 1) (0 -> 7)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::RegisterPairLocation(0, 1),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(2,3 <-> 0,1)", resolver.GetMessage().c_str());
+ }
+ {
+ TestParallelMoveResolver resolver(&allocator);
+ HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
+ moves->AddMove(
+ Location::RegisterPairLocation(2, 3),
+ Location::RegisterPairLocation(0, 1),
+ nullptr);
+ moves->AddMove(
+ Location::RegisterPairLocation(0, 1),
+ Location::RegisterPairLocation(2, 3),
+ nullptr);
+ resolver.EmitNativeCode(moves);
+ ASSERT_STREQ("(0,1 <-> 2,3)", resolver.GetMessage().c_str());
}
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 12acd0884a..2d9a2bf330 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -42,6 +42,11 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
+void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
+ bound_type->ReplaceWith(bound_type->InputAt(0));
+ bound_type->GetBlock()->RemoveInstruction(bound_type);
+}
+
void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
HLoadClass* cls = check->GetLoadClass();
check->ReplaceWith(cls);
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 0fdb65ffe0..0f697fbc25 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -36,6 +36,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
virtual void VisitNullCheck(HNullCheck* check) OVERRIDE;
virtual void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
virtual void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
+ virtual void VisitBoundType(HBoundType* bound_type) OVERRIDE;
virtual void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
virtual void VisitCondition(HCondition* condition) OVERRIDE;
diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc
index 9cf8235d85..293fde978e 100644
--- a/compiler/optimizing/pretty_printer_test.cc
+++ b/compiler/optimizing/pretty_printer_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "dex_file.h"
@@ -21,7 +22,6 @@
#include "nodes.h"
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index 7e274f6ebf..fe23fcf326 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -40,6 +40,7 @@ static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_
// Re-compute and update the type of the instruction. Returns
// whether or not the type was changed.
bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
+ DCHECK(phi->IsLive());
Primitive::Type existing = phi->GetType();
Primitive::Type new_type = existing;
@@ -49,15 +50,20 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
}
phi->SetType(new_type);
- if (new_type == Primitive::kPrimDouble || new_type == Primitive::kPrimFloat) {
+ if (new_type == Primitive::kPrimDouble
+ || new_type == Primitive::kPrimFloat
+ || new_type == Primitive::kPrimNot) {
// If the phi is of floating point type, we need to update its inputs to that
// type. For inputs that are phis, we need to recompute their types.
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
HInstruction* input = phi->InputAt(i);
if (input->GetType() != new_type) {
- HInstruction* equivalent = SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
+ HInstruction* equivalent = (new_type == Primitive::kPrimNot)
+ ? SsaBuilder::GetReferenceTypeEquivalent(input)
+ : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
phi->ReplaceInput(equivalent, i);
if (equivalent->IsPhi()) {
+ equivalent->AsPhi()->SetLive();
AddToWorklist(equivalent->AsPhi());
}
}
@@ -78,15 +84,9 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
if (block->IsLoopHeader()) {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- // Set the initial type for the phi. Use the non back edge input for reaching
- // a fixed point faster.
- Primitive::Type phi_type = phi->GetType();
- // We merge with the existing type, that has been set by the SSA builder.
- DCHECK(phi_type == Primitive::kPrimVoid
- || phi_type == Primitive::kPrimFloat
- || phi_type == Primitive::kPrimDouble);
- phi->SetType(MergeTypes(phi->InputAt(0)->GetType(), phi->GetType()));
- AddToWorklist(phi);
+ if (phi->IsLive()) {
+ AddToWorklist(phi);
+ }
}
} else {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
@@ -95,7 +95,10 @@ void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) {
// doing a reverse post-order visit, therefore either the phi users are
// non-loop phi and will be visited later in the visit, or are loop-phis,
// and they are already in the work list.
- UpdateType(it.Current()->AsPhi());
+ HPhi* phi = it.Current()->AsPhi();
+ if (phi->IsLive()) {
+ UpdateType(phi);
+ }
}
}
}
@@ -110,13 +113,14 @@ void PrimitiveTypePropagation::ProcessWorklist() {
}
void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) {
+ DCHECK(instruction->IsLive());
worklist_.Add(instruction);
}
void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr) {
+ if (phi != nullptr && phi->IsLive()) {
AddToWorklist(phi);
}
}
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 24e6837f45..76b8d7eacf 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -16,16 +16,17 @@
#include "reference_type_propagation.h"
+#include "class_linker.h"
+#include "mirror/class-inl.h"
+#include "mirror/dex_cache.h"
+#include "scoped_thread_state_change.h"
+
namespace art {
-// TODO: Only do the analysis on reference types. We currently have to handle
-// the `null` constant, that is represented as a `HIntConstant` and therefore
-// has the Primitive::kPrimInt type.
+// TODO: handle: a !=/== null.
void ReferenceTypePropagation::Run() {
- // Compute null status for instructions.
-
- // To properly propagate not-null info we need to visit in the dominator-based order.
+ // To properly propagate type info we need to visit in the dominator-based order.
// Reverse post order guarantees a node's dominators are visited first.
// We take advantage of this order in `VisitBasicBlock`.
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
@@ -34,9 +35,210 @@ void ReferenceTypePropagation::Run() {
ProcessWorklist();
}
+void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
+ // TODO: handle other instructions that give type info
+ // (NewArray/Call/Field accesses/array accesses)
+
+ // Initialize exact types first for faster convergence.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (instr->IsNewInstance()) {
+ VisitNewInstance(instr->AsNewInstance());
+ } else if (instr->IsLoadClass()) {
+ VisitLoadClass(instr->AsLoadClass());
+ }
+ }
+
+ // Handle Phis.
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ VisitPhi(it.Current()->AsPhi());
+ }
+
+ // Add extra nodes to bound types.
+ BoundTypeForIfInstanceOf(block);
+}
+
+// Detects if `block` is the True block for the pattern
+// `if (x instanceof ClassX) { }`
+// If that's the case insert an HBoundType instruction to bound the type of `x`
+// to `ClassX` in the scope of the dominated blocks.
+void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) {
+ HInstruction* lastInstruction = block->GetLastInstruction();
+ if (!lastInstruction->IsIf()) {
+ return;
+ }
+ HInstruction* ifInput = lastInstruction->InputAt(0);
+ // TODO: Handle more patterns here: HIf(bool) HIf(HNotEqual).
+ if (!ifInput->IsEqual()) {
+ return;
+ }
+ HInstruction* instanceOf = ifInput->InputAt(0);
+ HInstruction* comp_value = ifInput->InputAt(1);
+ if (!instanceOf->IsInstanceOf() || !comp_value->IsIntConstant()) {
+ return;
+ }
+
+ HInstruction* obj = instanceOf->InputAt(0);
+ HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
+
+ ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo();
+ ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ HBoundType* bound_type = new (graph_->GetArena()) HBoundType(obj, class_rti);
+
+ // Narrow the type as much as possible.
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ if (!load_class->IsResolved() || class_rti.IsSupertypeOf(obj_rti)) {
+ bound_type->SetReferenceTypeInfo(obj_rti);
+ } else {
+ bound_type->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+ }
+ }
+
+ block->InsertInstructionBefore(bound_type, lastInstruction);
+ // Pick the right successor based on the value we compare against.
+ HIntConstant* comp_value_int = comp_value->AsIntConstant();
+ HBasicBlock* instanceOfTrueBlock = comp_value_int->GetValue() == 0
+ ? lastInstruction->AsIf()->IfFalseSuccessor()
+ : lastInstruction->AsIf()->IfTrueSuccessor();
+
+ for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) {
+ HInstruction* user = it.Current()->GetUser();
+ if (instanceOfTrueBlock->Dominates(user->GetBlock())) {
+ user->ReplaceInput(bound_type, it.Current()->GetIndex());
+ }
+ }
+}
+
+void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+ // Get type from dex cache assuming it was populated by the verifier.
+ mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+ if (resolved_class != nullptr) {
+ MutableHandle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+ instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true));
+ }
+}
+
+void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::DexCache* dex_cache = dex_compilation_unit_.GetClassLinker()->FindDexCache(dex_file_);
+ // Get type from dex cache assuming it was populated by the verifier.
+ mirror::Class* resolved_class = dex_cache->GetResolvedType(instr->GetTypeIndex());
+ if (resolved_class != nullptr) {
+ Handle<mirror::Class> handle = handles_->NewHandle(resolved_class);
+ instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(handle, /* is_exact */ true));
+ }
+ Handle<mirror::Class> class_handle = handles_->NewHandle(mirror::Class::GetJavaLangClass());
+ instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(class_handle, /* is_exact */ true));
+}
+
+void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
+ if (phi->GetType() != Primitive::kPrimNot) {
+ return;
+ }
+
+ if (phi->GetBlock()->IsLoopHeader()) {
+ // Set the initial type for the phi. Use the non back edge input for reaching
+ // a fixed point faster.
+ AddToWorklist(phi);
+ phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
+ phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
+ } else {
+ // Eagerly compute the type of the phi, for quicker convergence. Note
+ // that we don't need to add users to the worklist because we are
+ // doing a reverse post-order visit, therefore either the phi users are
+ // non-loop phi and will be visited later in the visit, or are loop-phis,
+ // and they are already in the work list.
+ UpdateNullability(phi);
+ UpdateReferenceTypeInfo(phi);
+ }
+}
+
+ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
+ const ReferenceTypeInfo& b) {
+ bool is_exact = a.IsExact() && b.IsExact();
+ bool is_top = a.IsTop() || b.IsTop();
+ Handle<mirror::Class> type_handle;
+
+ if (!is_top) {
+ if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
+ type_handle = a.GetTypeHandle();
+ } else if (a.IsSupertypeOf(b)) {
+ type_handle = a.GetTypeHandle();
+ is_exact = false;
+ } else if (b.IsSupertypeOf(a)) {
+ type_handle = b.GetTypeHandle();
+ is_exact = false;
+ } else {
+ // TODO: Find a common super class.
+ is_top = true;
+ is_exact = false;
+ }
+ }
+
+ return is_top
+ ? ReferenceTypeInfo::CreateTop(is_exact)
+ : ReferenceTypeInfo::Create(type_handle, is_exact);
+}
+
+bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+
+ ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo();
+ if (instr->IsBoundType()) {
+ UpdateBoundType(instr->AsBoundType());
+ } else if (instr->IsPhi()) {
+ UpdatePhi(instr->AsPhi());
+ } else {
+ LOG(FATAL) << "Invalid instruction (should not get here)";
+ }
+
+ return !previous_rti.IsEqual(instr->GetReferenceTypeInfo());
+}
+
+void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
+ ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+ // Be sure that we don't go over the bounded type.
+ ReferenceTypeInfo bound_rti = instr->GetBoundType();
+ if (!bound_rti.IsSupertypeOf(new_rti)) {
+ new_rti = bound_rti;
+ }
+ instr->SetReferenceTypeInfo(new_rti);
+}
+
+void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
+ ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+ if (new_rti.IsTop() && !new_rti.IsExact()) {
+ // Early return if we are Top and inexact.
+ instr->SetReferenceTypeInfo(new_rti);
+ return;
+ }
+ for (size_t i = 1; i < instr->InputCount(); i++) {
+ new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
+ if (new_rti.IsTop()) {
+ if (!new_rti.IsExact()) {
+ break;
+ } else {
+ continue;
+ }
+ }
+ }
+ instr->SetReferenceTypeInfo(new_rti);
+}
+
// Re-computes and updates the nullability of the instruction. Returns whether or
// not the nullability was changed.
-bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
+bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) {
+ DCHECK(instr->IsPhi() || instr->IsBoundType());
+
+ if (!instr->IsPhi()) {
+ return false;
+ }
+
+ HPhi* phi = instr->AsPhi();
bool existing_can_be_null = phi->CanBeNull();
bool new_can_be_null = false;
for (size_t i = 0; i < phi->InputCount(); i++) {
@@ -47,48 +249,26 @@ bool ReferenceTypePropagation::UpdateNullability(HPhi* phi) {
return existing_can_be_null != new_can_be_null;
}
-
-void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
- if (block->IsLoopHeader()) {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- // Set the initial type for the phi. Use the non back edge input for reaching
- // a fixed point faster.
- HPhi* phi = it.Current()->AsPhi();
- AddToWorklist(phi);
- phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
- }
- } else {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- // Eagerly compute the type of the phi, for quicker convergence. Note
- // that we don't need to add users to the worklist because we are
- // doing a reverse post-order visit, therefore either the phi users are
- // non-loop phi and will be visited later in the visit, or are loop-phis,
- // and they are already in the work list.
- UpdateNullability(it.Current()->AsPhi());
- }
- }
-}
-
void ReferenceTypePropagation::ProcessWorklist() {
while (!worklist_.IsEmpty()) {
- HPhi* instruction = worklist_.Pop();
- if (UpdateNullability(instruction)) {
+ HInstruction* instruction = worklist_.Pop();
+ if (UpdateNullability(instruction) || UpdateReferenceTypeInfo(instruction)) {
AddDependentInstructionsToWorklist(instruction);
}
}
}
-void ReferenceTypePropagation::AddToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) {
+ DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) << instruction->GetType();
worklist_.Add(instruction);
}
-void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HPhi* instruction) {
+void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) {
for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->GetUser()->AsPhi();
- if (phi != nullptr) {
- AddToWorklist(phi);
+ HInstruction* user = it.Current()->GetUser();
+ if (user->IsPhi() || user->IsBoundType()) {
+ AddToWorklist(user);
}
}
}
-
} // namespace art
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index a74319d0c5..e346dbfc6c 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -17,31 +17,57 @@
#ifndef ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
#define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_
+#include "driver/dex_compilation_unit.h"
+#include "handle_scope-inl.h"
#include "nodes.h"
#include "optimization.h"
+#include "optimizing_compiler_stats.h"
namespace art {
/**
* Propagates reference types to instructions.
- * TODO: Currently only nullability is computed.
*/
class ReferenceTypePropagation : public HOptimization {
public:
- explicit ReferenceTypePropagation(HGraph* graph)
+ ReferenceTypePropagation(HGraph* graph,
+ const DexFile& dex_file,
+ const DexCompilationUnit& dex_compilation_unit,
+ StackHandleScopeCollection* handles)
: HOptimization(graph, true, "reference_type_propagation"),
+ dex_file_(dex_file),
+ dex_compilation_unit_(dex_compilation_unit),
+ handles_(handles),
worklist_(graph->GetArena(), kDefaultWorklistSize) {}
void Run() OVERRIDE;
private:
+ void VisitNewInstance(HNewInstance* new_instance);
+ void VisitLoadClass(HLoadClass* load_class);
+ void VisitPhi(HPhi* phi);
void VisitBasicBlock(HBasicBlock* block);
+
+ void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ void BoundTypeForIfInstanceOf(HBasicBlock* block);
+
void ProcessWorklist();
- void AddToWorklist(HPhi* phi);
- void AddDependentInstructionsToWorklist(HPhi* phi);
- bool UpdateNullability(HPhi* phi);
+ void AddToWorklist(HInstruction* instr);
+ void AddDependentInstructionsToWorklist(HInstruction* instr);
+
+ bool UpdateNullability(HInstruction* instr);
+ bool UpdateReferenceTypeInfo(HInstruction* instr);
+
+ ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+ const DexFile& dex_file_;
+ const DexCompilationUnit& dex_compilation_unit_;
+ StackHandleScopeCollection* handles_;
- GrowableArray<HPhi*> worklist_;
+ GrowableArray<HInstruction*> worklist_;
static constexpr size_t kDefaultWorklistSize = 8;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index 3809720cb4..54e62a5b2c 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
temp_intervals_(allocator, 4),
- spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ int_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+ double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
safepoints_(allocator, 0),
processing_core_registers_(false),
number_of_registers_(-1),
@@ -252,8 +255,13 @@ void RegisterAllocator::ProcessInstruction(HInstruction* instruction) {
&& (instruction->GetType() != Primitive::kPrimFloat);
if (locations->CanCall()) {
- if (!instruction->IsSuspendCheck()) {
- codegen_->MarkNotLeaf();
+ if (codegen_->IsLeafMethod()) {
+ // TODO: We do this here because we do not want the suspend check to artificially
+ // create live registers. We should find another place, but this is currently the
+ // simplest.
+ DCHECK(instruction->IsSuspendCheckEntry());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
}
safepoints_.Add(instruction);
if (locations->OnlyCallsOnSlowPath()) {
@@ -433,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
}
}
- return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_,
+ return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
allocator_, processing_core_registers_, log_fatal_on_failure);
}
@@ -1128,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
}
size_t end = last_sibling->GetEnd();
+ GrowableArray<size_t>* spill_slots = nullptr;
+ switch (interval->GetType()) {
+ case Primitive::kPrimDouble:
+ spill_slots = &double_spill_slots_;
+ break;
+ case Primitive::kPrimLong:
+ spill_slots = &long_spill_slots_;
+ break;
+ case Primitive::kPrimFloat:
+ spill_slots = &float_spill_slots_;
+ break;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ spill_slots = &int_spill_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+ }
+
// Find an available spill slot.
size_t slot = 0;
- for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
- // We check if it is less rather than less or equal because the parallel move
- // resolver does not work when a single spill slot needs to be exchanged with
- // a double spill slot. The strict comparison avoids needing to exchange these
- // locations at the same lifetime position.
- if (spill_slots_.Get(slot) < parent->GetStart()
- && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+ for (size_t e = spill_slots->Size(); slot < e; ++slot) {
+ if (spill_slots->Get(slot) <= parent->GetStart()
+ && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) {
break;
}
}
if (parent->NeedsTwoSpillSlots()) {
- if (slot == spill_slots_.Size()) {
+ if (slot == spill_slots->Size()) {
// We need a new spill slot.
- spill_slots_.Add(end);
- spill_slots_.Add(end);
- } else if (slot == spill_slots_.Size() - 1) {
- spill_slots_.Put(slot, end);
- spill_slots_.Add(end);
+ spill_slots->Add(end);
+ spill_slots->Add(end);
+ } else if (slot == spill_slots->Size() - 1) {
+ spill_slots->Put(slot, end);
+ spill_slots->Add(end);
} else {
- spill_slots_.Put(slot, end);
- spill_slots_.Put(slot + 1, end);
+ spill_slots->Put(slot, end);
+ spill_slots->Put(slot + 1, end);
}
} else {
- if (slot == spill_slots_.Size()) {
+ if (slot == spill_slots->Size()) {
// We need a new spill slot.
- spill_slots_.Add(end);
+ spill_slots->Add(end);
} else {
- spill_slots_.Put(slot, end);
+ spill_slots->Put(slot, end);
}
}
- parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
+ // Note that the exact spill slot location will be computed when we resolve,
+ // that is when we know the number of spill slots for each type.
+ parent->SetSpillSlot(slot);
}
static bool IsValidDestination(Location destination) {
@@ -1511,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
}
void RegisterAllocator::Resolve() {
- codegen_->InitializeCodeGeneration(spill_slots_.Size(),
+ codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
maximum_number_of_live_core_registers_,
maximum_number_of_live_fp_registers_,
reserved_out_slots_,
@@ -1537,6 +1566,39 @@ void RegisterAllocator::Resolve() {
} else if (current->HasSpillSlot()) {
current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
}
+ } else if (current->HasSpillSlot()) {
+ // Adjust the stack slot, now that we know the number of them for each type.
+ // The way this implementation lays out the stack is the following:
+ // [parameter slots ]
+ // [double spill slots ]
+ // [long spill slots ]
+ // [float spill slots ]
+ // [int/ref values ]
+ // [maximum out values ] (number of arguments for calls)
+ // [art method ].
+ uint32_t slot = current->GetSpillSlot();
+ switch (current->GetType()) {
+ case Primitive::kPrimDouble:
+ slot += long_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimLong:
+ slot += float_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimFloat:
+ slot += int_spill_slots_.Size();
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimNot:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimShort:
+ slot += reserved_out_slots_;
+ break;
+ case Primitive::kPrimVoid:
+ LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+ }
+ current->SetSpillSlot(slot * kVRegSize);
}
Location source = current->ToLocation();
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index b8f70bdc18..ff2f106b74 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -75,7 +75,10 @@ class RegisterAllocator {
}
size_t GetNumberOfSpillSlots() const {
- return spill_slots_.Size();
+ return int_spill_slots_.Size()
+ + long_spill_slots_.Size()
+ + float_spill_slots_.Size()
+ + double_spill_slots_.Size();
}
private:
@@ -171,8 +174,14 @@ class RegisterAllocator {
// where an instruction requires a temporary.
GrowableArray<LiveInterval*> temp_intervals_;
- // The spill slots allocated for live intervals.
- GrowableArray<size_t> spill_slots_;
+ // The spill slots allocated for live intervals. We ensure spill slots
+ // are typed to avoid (1) doing moves and swaps between two different kinds
+ // of registers, and (2) swapping between a single stack slot and a double
+ // stack slot. This simplifies the parallel move resolver.
+ GrowableArray<size_t> int_spill_slots_;
+ GrowableArray<size_t> long_spill_slots_;
+ GrowableArray<size_t> float_spill_slots_;
+ GrowableArray<size_t> double_spill_slots_;
// Instructions that need a safepoint.
GrowableArray<HInstruction*> safepoints_;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 0cc00c0fde..e5d06a9f8b 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
#include "code_generator_x86.h"
@@ -25,7 +26,6 @@
#include "register_allocator.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index c9a21aa681..3dc75059b2 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -42,20 +42,33 @@ void SsaBuilder::BuildSsa() {
}
}
- // 3) Remove dead phis. This will remove phis that are only used by environments:
+ // 3) Mark dead phis. This will mark phis that are only used by environments:
// at the DEX level, the type of these phis does not need to be consistent, but
// our code generator will complain if the inputs of a phi do not have the same
- // type (modulo the special case of `null`).
- SsaDeadPhiElimination dead_phis(GetGraph());
- dead_phis.Run();
+ // type. The marking allows the type propagation to know which phis it needs
+ // to handle. We mark but do not eliminate: the elimination will be done in
+ // step 5).
+ {
+ SsaDeadPhiElimination dead_phis(GetGraph());
+ dead_phis.MarkDeadPhis();
+ }
// 4) Propagate types of phis. At this point, phis are typed void in the general
- // case, or float or double when we created a floating-point equivalent. So we
+ // case, or float/double/reference when we created an equivalent phi. So we
// need to propagate the types across phis to give them a correct type.
PrimitiveTypePropagation type_propagation(GetGraph());
type_propagation.Run();
- // 5) Clear locals.
+ // 5) Step 4) changes inputs of phis which may lead to dead phis again. We re-run
+ // the algorithm and this time elimimates them.
+ // TODO: Make this work with debug info and reference liveness. We currently
+ // eagerly remove phis used in environments.
+ {
+ SsaDeadPhiElimination dead_phis(GetGraph());
+ dead_phis.Run();
+ }
+
+ // 6) Clear locals.
// TODO: Move this to a dead code eliminator phase.
for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions());
!it.Done();
@@ -185,15 +198,24 @@ static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant) {
/**
* Because of Dex format, we might end up having the same phi being
- * used for non floating point operations and floating point operations. Because
- * we want the graph to be correctly typed (and thereafter avoid moves between
+ * used for non floating point operations and floating point / reference operations.
+ * Because we want the graph to be correctly typed (and thereafter avoid moves between
* floating point registers and core registers), we need to create a copy of the
- * phi with a floating point type.
+ * phi with a floating point / reference type.
*/
-static HPhi* GetFloatOrDoubleEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
- // We place the floating point phi next to this phi.
+static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) {
+ // We place the floating point /reference phi next to this phi.
HInstruction* next = phi->GetNext();
- if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())) {
+ if (next != nullptr
+ && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
+ && next->GetType() != type) {
+ // Move to the next phi to see if it is the one we are looking for.
+ next = next->GetNext();
+ }
+
+ if (next == nullptr
+ || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
+ || (next->GetType() != type)) {
ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena();
HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type);
for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
@@ -223,7 +245,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
} else if (value->IsIntConstant()) {
return GetFloatEquivalent(value->AsIntConstant());
} else if (value->IsPhi()) {
- return GetFloatOrDoubleEquivalentOfPhi(value->AsPhi(), type);
+ return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type);
} else {
// For other instructions, we assume the verifier has checked that the dex format is correctly
// typed and the value in a dex register will not be used for both floating point and
@@ -234,12 +256,25 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user,
}
}
+HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) {
+ if (value->IsIntConstant()) {
+ DCHECK_EQ(value->AsIntConstant()->GetValue(), 0);
+ return value->GetBlock()->GetGraph()->GetNullConstant();
+ } else {
+ DCHECK(value->IsPhi());
+ return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot);
+ }
+}
+
void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
HInstruction* value = current_locals_->GetInstructionAt(load->GetLocal()->GetRegNumber());
- if (load->GetType() != value->GetType()
- && (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble)) {
- // If the operation requests a specific type, we make sure its input is of that type.
- value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+ // If the operation requests a specific type, we make sure its input is of that type.
+ if (load->GetType() != value->GetType()) {
+ if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) {
+ value = GetFloatOrDoubleEquivalent(load, value, load->GetType());
+ } else if (load->GetType() == Primitive::kPrimNot) {
+ value = GetReferenceTypeEquivalent(value);
+ }
}
load->ReplaceWith(value);
load->GetBlock()->RemoveInstruction(load);
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 2eec87b618..148e9590c3 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -58,6 +58,8 @@ class SsaBuilder : public HGraphVisitor {
HInstruction* instruction,
Primitive::Type type);
+ static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction);
+
private:
// Locals for the current block being visited.
HEnvironment* current_locals_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 1b06315fce..bebb73ba22 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -115,14 +115,13 @@ void SsaLivenessAnalysis::NumberInstructions() {
// to differentiate between the start and end of an instruction. Adding 2 to
// the lifetime position for each instruction ensures the start of an
// instruction is different than the end of the previous instruction.
- HGraphVisitor* location_builder = codegen_->GetLocationBuilder();
for (HLinearOrderIterator it(*this); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
block->SetLifetimeStart(lifetime_position);
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HInstruction* current = inst_it.Current();
- current->Accept(location_builder);
+ codegen_->AllocateLocations(current);
LocationSummary* locations = current->GetLocations();
if (locations != nullptr && locations->Out().IsValid()) {
instructions_from_ssa_index_.Add(current);
@@ -140,7 +139,7 @@ void SsaLivenessAnalysis::NumberInstructions() {
for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
HInstruction* current = inst_it.Current();
- current->Accept(codegen_->GetLocationBuilder());
+ codegen_->AllocateLocations(current);
LocationSummary* locations = current->GetLocations();
if (locations != nullptr && locations->Out().IsValid()) {
instructions_from_ssa_index_.Add(current);
@@ -312,7 +311,12 @@ bool SsaLivenessAnalysis::UpdateLiveIn(const HBasicBlock& block) {
return live_in->UnionIfNotIn(live_out, kill);
}
+static int RegisterOrLowRegister(Location location) {
+ return location.IsPair() ? location.low() : location.reg();
+}
+
int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
+ DCHECK(!IsHighInterval());
if (GetParent() == this && defined_by_ != nullptr) {
// This is the first interval for the instruction. Try to find
// a register based on its definition.
@@ -334,8 +338,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
if (user->IsPhi()) {
// If the phi has a register, try to use the same.
Location phi_location = user->GetLiveInterval()->ToLocation();
- if (SameRegisterKind(phi_location) && free_until[phi_location.reg()] >= use_position) {
- return phi_location.reg();
+ if (phi_location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(phi_location));
+ int reg = RegisterOrLowRegister(phi_location);
+ if (free_until[reg] >= use_position) {
+ return reg;
+ }
}
const GrowableArray<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
// If the instruction dies at the phi assignment, we can try having the
@@ -348,8 +356,11 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
HInstruction* input = user->InputAt(i);
Location location = input->GetLiveInterval()->GetLocationAt(
predecessors.Get(i)->GetLifetimeEnd() - 1);
- if (location.IsRegister() && free_until[location.reg()] >= use_position) {
- return location.reg();
+ if (location.IsRegisterKind()) {
+ int reg = RegisterOrLowRegister(location);
+ if (free_until[reg] >= use_position) {
+ return reg;
+ }
}
}
}
@@ -360,8 +371,12 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until) const {
// We use the user's lifetime position - 1 (and not `use_position`) because the
// register is blocked at the beginning of the user.
size_t position = user->GetLifetimePosition() - 1;
- if (SameRegisterKind(expected) && free_until[expected.reg()] >= position) {
- return expected.reg();
+ if (expected.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(expected));
+ int reg = RegisterOrLowRegister(expected);
+ if (free_until[reg] >= position) {
+ return reg;
+ }
}
}
}
@@ -383,8 +398,9 @@ int LiveInterval::FindHintAtDefinition() const {
// If the input dies at the end of the predecessor, we know its register can
// be reused.
Location input_location = input_interval.ToLocation();
- if (SameRegisterKind(input_location)) {
- return input_location.reg();
+ if (input_location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(input_location));
+ return RegisterOrLowRegister(input_location);
}
}
}
@@ -399,8 +415,9 @@ int LiveInterval::FindHintAtDefinition() const {
// If the input dies at the start of this instruction, we know its register can
// be reused.
Location location = input_interval.ToLocation();
- if (SameRegisterKind(location)) {
- return location.reg();
+ if (location.IsRegisterKind()) {
+ DCHECK(SameRegisterKind(location));
+ return RegisterOrLowRegister(location);
}
}
}
@@ -409,9 +426,19 @@ int LiveInterval::FindHintAtDefinition() const {
}
bool LiveInterval::SameRegisterKind(Location other) const {
- return IsFloatingPoint()
- ? other.IsFpuRegister()
- : other.IsRegister();
+ if (IsFloatingPoint()) {
+ if (IsLowInterval() || IsHighInterval()) {
+ return other.IsFpuRegisterPair();
+ } else {
+ return other.IsFpuRegister();
+ }
+ } else {
+ if (IsLowInterval() || IsHighInterval()) {
+ return other.IsRegisterPair();
+ } else {
+ return other.IsRegister();
+ }
+ }
}
bool LiveInterval::NeedsTwoSpillSlots() const {
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index fd30c1bc76..2f2e2d1fab 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -19,6 +19,11 @@
namespace art {
void SsaDeadPhiElimination::Run() {
+ MarkDeadPhis();
+ EliminateDeadPhis();
+}
+
+void SsaDeadPhiElimination::MarkDeadPhis() {
// Add to the worklist phis referenced by non-phi instructions.
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
@@ -49,7 +54,9 @@ void SsaDeadPhiElimination::Run() {
}
}
}
+}
+void SsaDeadPhiElimination::EliminateDeadPhis() {
// Remove phis that are not live. Visit in post order so that phis
// that are not inputs of loop phis can be removed when they have
// no users left (dead phis might use dead phis).
@@ -57,31 +64,33 @@ void SsaDeadPhiElimination::Run() {
HBasicBlock* block = it.Current();
HInstruction* current = block->GetFirstPhi();
HInstruction* next = nullptr;
+ HPhi* phi;
while (current != nullptr) {
+ phi = current->AsPhi();
next = current->GetNext();
- if (current->AsPhi()->IsDead()) {
- if (current->HasUses()) {
- for (HUseIterator<HInstruction*> use_it(current->GetUses()); !use_it.Done();
+ if (phi->IsDead()) {
+ // Make sure the phi is only used by other dead phis.
+ if (kIsDebugBuild) {
+ for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done();
use_it.Advance()) {
- HUseListNode<HInstruction*>* user_node = use_it.Current();
- HInstruction* user = user_node->GetUser();
+ HInstruction* user = use_it.Current()->GetUser();
DCHECK(user->IsLoopHeaderPhi()) << user->GetId();
DCHECK(user->AsPhi()->IsDead()) << user->GetId();
- // Just put itself as an input. The phi will be removed in this loop anyway.
- user->SetRawInputAt(user_node->GetIndex(), user);
- current->RemoveUser(user, user_node->GetIndex());
}
}
- if (current->HasEnvironmentUses()) {
- for (HUseIterator<HEnvironment*> use_it(current->GetEnvUses()); !use_it.Done();
- use_it.Advance()) {
- HUseListNode<HEnvironment*>* user_node = use_it.Current();
- HEnvironment* user = user_node->GetUser();
- user->SetRawEnvAt(user_node->GetIndex(), nullptr);
- current->RemoveEnvironmentUser(user_node);
- }
+ // Remove the phi from use lists of its inputs.
+ for (size_t i = 0, e = phi->InputCount(); i < e; ++i) {
+ phi->RemoveAsUserOfInput(i);
+ }
+ // Remove the phi from environments that use it.
+ for (HUseIterator<HEnvironment*> use_it(phi->GetEnvUses()); !use_it.Done();
+ use_it.Advance()) {
+ HUseListNode<HEnvironment*>* user_node = use_it.Current();
+ HEnvironment* user = user_node->GetUser();
+ user->SetRawEnvAt(user_node->GetIndex(), nullptr);
}
- block->RemovePhi(current->AsPhi());
+ // Delete it from the instruction list.
+ block->RemovePhi(phi, /*ensure_safety=*/ false);
}
current = next;
}
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index b7899712d6..88a5279e14 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -34,6 +34,9 @@ class SsaDeadPhiElimination : public HOptimization {
void Run() OVERRIDE;
+ void MarkDeadPhis();
+ void EliminateDeadPhis();
+
private:
GrowableArray<HPhi*> worklist_;
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 7e90b37fe6..7fc1ec6dd1 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -14,6 +14,7 @@
* limitations under the License.
*/
+#include "base/arena_allocator.h"
#include "base/stringprintf.h"
#include "builder.h"
#include "dex_file.h"
@@ -22,7 +23,6 @@
#include "optimizing_unit_test.h"
#include "pretty_printer.h"
#include "ssa_builder.h"
-#include "utils/arena_allocator.h"
#include "gtest/gtest.h"
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 3974e53e6f..5283d5dcca 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -166,18 +166,23 @@ class StackMapStream : public ValueObject {
stack_map.SetStackMask(*entry.sp_mask);
}
- // Set the register map.
- MemoryRegion register_region = dex_register_maps_region.Subregion(
- next_dex_register_map_offset,
- DexRegisterMap::kFixedSize + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
- next_dex_register_map_offset += register_region.size();
- DexRegisterMap dex_register_map(register_region);
- stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
-
- for (size_t j = 0; j < entry.num_dex_registers; ++j) {
- DexRegisterEntry register_entry =
- dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
- dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+ if (entry.num_dex_registers != 0) {
+ // Set the register map.
+ MemoryRegion register_region = dex_register_maps_region.Subregion(
+ next_dex_register_map_offset,
+ DexRegisterMap::kFixedSize
+ + entry.num_dex_registers * DexRegisterMap::SingleEntrySize());
+ next_dex_register_map_offset += register_region.size();
+ DexRegisterMap dex_register_map(register_region);
+ stack_map.SetDexRegisterMapOffset(register_region.start() - memory_start);
+
+ for (size_t j = 0; j < entry.num_dex_registers; ++j) {
+ DexRegisterEntry register_entry =
+ dex_register_maps_.Get(j + entry.dex_register_maps_start_index);
+ dex_register_map.SetRegisterInfo(j, register_entry.kind, register_entry.value);
+ }
+ } else {
+ stack_map.SetDexRegisterMapOffset(StackMap::kNoDexRegisterMap);
}
// Set the inlining info.
@@ -196,7 +201,7 @@ class StackMapStream : public ValueObject {
inline_info.SetMethodReferenceIndexAtDepth(j, inline_entry.method_index);
}
} else {
- stack_map.SetInlineDescriptorOffset(InlineInfo::kNoInlineInfo);
+ stack_map.SetInlineDescriptorOffset(StackMap::kNoInlineInfo);
}
}
}
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 5ee6ae049c..744fb45fff 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -61,6 +61,7 @@ TEST(StackMapTest, Test1) {
MemoryRegion stack_mask = stack_map.GetStackMask();
ASSERT_TRUE(SameBits(stack_mask, sp_mask));
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));
@@ -107,6 +108,7 @@ TEST(StackMapTest, Test2) {
MemoryRegion stack_mask = stack_map.GetStackMask();
ASSERT_TRUE(SameBits(stack_mask, sp_mask1));
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
DexRegisterMap dex_registers = code_info.GetDexRegisterMapOf(stack_map, 2);
ASSERT_EQ(DexRegisterMap::kInStack, dex_registers.GetLocationKind(0));
ASSERT_EQ(DexRegisterMap::kConstant, dex_registers.GetLocationKind(1));