Improved subscript and data dependence analysis.
Rationale:
We missed vectorizing a simple stencil operation
due to inaccurate unit stride analysis and failure
to detect single runtime data dependence test.
Test: test-art-host, test-art-target
Change-Id: I07ba03455bfb1c0aff371c1244a1328f885d0916
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index c0ec58f..f35aace 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -373,21 +373,23 @@
bool InductionVarRange::IsUnitStride(HInstruction* context,
HInstruction* instruction,
+ HGraph* graph,
/*out*/ HInstruction** offset) const {
HLoopInformation* loop = nullptr;
HInductionVarAnalysis::InductionInfo* info = nullptr;
HInductionVarAnalysis::InductionInfo* trip = nullptr;
if (HasInductionInfo(context, instruction, &loop, &info, &trip)) {
if (info->induction_class == HInductionVarAnalysis::kLinear &&
- info->op_b->operation == HInductionVarAnalysis::kFetch &&
!HInductionVarAnalysis::IsNarrowingLinear(info)) {
int64_t stride_value = 0;
if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) {
int64_t off_value = 0;
- if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) {
- *offset = nullptr;
- } else {
+ if (IsConstant(info->op_b, kExact, &off_value)) {
+ *offset = graph->GetConstant(info->op_b->type, off_value);
+ } else if (info->op_b->operation == HInductionVarAnalysis::kFetch) {
*offset = info->op_b->fetch;
+ } else {
+ return false;
}
return true;
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index a8ee829..ab1772b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -163,6 +163,7 @@
*/
bool IsUnitStride(HInstruction* context,
HInstruction* instruction,
+ HGraph* graph,
/*out*/ HInstruction** offset) const;
/**
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d01d314..67d2093 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -770,8 +770,8 @@
EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
EXPECT_EQ(1000, tc);
HInstruction* offset = nullptr;
- EXPECT_TRUE(range_.IsUnitStride(phi, phi, &offset));
- EXPECT_TRUE(offset == nullptr);
+ EXPECT_TRUE(range_.IsUnitStride(phi, phi, graph_, &offset));
+ ExpectInt(0, offset);
HInstruction* tce = range_.GenerateTripCount(
loop_header_->GetLoopInformation(), graph_, loop_preheader_);
ASSERT_TRUE(tce != nullptr);
@@ -826,7 +826,7 @@
EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
EXPECT_EQ(1000, tc);
HInstruction* offset = nullptr;
- EXPECT_FALSE(range_.IsUnitStride(phi, phi, &offset));
+ EXPECT_FALSE(range_.IsUnitStride(phi, phi, graph_, &offset));
HInstruction* tce = range_.GenerateTripCount(
loop_header_->GetLoopInformation(), graph_, loop_preheader_);
ASSERT_TRUE(tce != nullptr);
@@ -908,8 +908,8 @@
EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
EXPECT_EQ(0, tc); // unknown
HInstruction* offset = nullptr;
- EXPECT_TRUE(range_.IsUnitStride(phi, phi, &offset));
- EXPECT_TRUE(offset == nullptr);
+ EXPECT_TRUE(range_.IsUnitStride(phi, phi, graph_, &offset));
+ ExpectInt(0, offset);
HInstruction* tce = range_.GenerateTripCount(
loop_header_->GetLoopInformation(), graph_, loop_preheader_);
ASSERT_TRUE(tce != nullptr);
@@ -994,7 +994,7 @@
EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
EXPECT_EQ(0, tc); // unknown
HInstruction* offset = nullptr;
- EXPECT_FALSE(range_.IsUnitStride(phi, phi, &offset));
+ EXPECT_FALSE(range_.IsUnitStride(phi, phi, graph_, &offset));
HInstruction* tce = range_.GenerateTripCount(
loop_header_->GetLoopInformation(), graph_, loop_preheader_);
ASSERT_TRUE(tce != nullptr);
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 32f4002..b61d7b8 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -620,12 +620,15 @@
// Conservatively assume a potential loop-carried data dependence otherwise, avoided by
// generating an explicit a != b disambiguation runtime test on the two references.
if (x != y) {
- // For now, we reject after one test to avoid excessive overhead.
- if (vector_runtime_test_a_ != nullptr) {
- return false;
+ // To avoid excessive overhead, we only accept one a != b test.
+ if (vector_runtime_test_a_ == nullptr) {
+ // First test found.
+ vector_runtime_test_a_ = a;
+ vector_runtime_test_b_ = b;
+ } else if ((vector_runtime_test_a_ != a || vector_runtime_test_b_ != b) &&
+ (vector_runtime_test_a_ != b || vector_runtime_test_b_ != a)) {
+ return false; // second test would be needed
}
- vector_runtime_test_a_ = a;
- vector_runtime_test_b_ = b;
}
}
}
@@ -842,7 +845,7 @@
HInstruction* offset = nullptr;
if (TrySetVectorType(type, &restrictions) &&
node->loop_info->IsDefinedOutOfTheLoop(base) &&
- induction_range_.IsUnitStride(instruction, index, &offset) &&
+ induction_range_.IsUnitStride(instruction, index, graph_, &offset) &&
VectorizeUse(node, value, generate_code, type, restrictions)) {
if (generate_code) {
GenerateVecSub(index, offset);
@@ -900,7 +903,7 @@
HInstruction* offset = nullptr;
if (type == instruction->GetType() &&
node->loop_info->IsDefinedOutOfTheLoop(base) &&
- induction_range_.IsUnitStride(instruction, index, &offset)) {
+ induction_range_.IsUnitStride(instruction, index, graph_, &offset)) {
if (generate_code) {
GenerateVecSub(index, offset);
GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type);
@@ -1216,7 +1219,8 @@
void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) {
if (vector_map_->find(org) == vector_map_->end()) {
HInstruction* subscript = vector_index_;
- if (offset != nullptr) {
+ int64_t value = 0;
+ if (!IsInt64AndGet(offset, &value) || value != 0) {
subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset);
if (org->IsPhi()) {
Insert(vector_body_, subscript); // lacks layout placeholder