diff options
author | 2017-06-28 14:08:00 -0700 | |
---|---|---|
committer | 2017-06-29 11:20:56 -0700 | |
commit | 37dc4df47fec811ea52f7180880961565f013434 (patch) | |
tree | eac308a6c7ef8b7d53f64889ff0a93740a2dc62a | |
parent | 76754cc816af46b41a8d1f419a38334b5db59b6e (diff) |
Improved subscript and data dependence analysis.
Rationale:
We missed vectorizing a simple stencil operation
due to inaccurate unit stride analysis and failure
to detect single runtime data dependence test.
Test: test-art-host, test-art-target
Change-Id: I07ba03455bfb1c0aff371c1244a1328f885d0916
-rw-r--r-- | compiler/optimizing/induction_var_range.cc | 10 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_range.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_range_test.cc | 12 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 20 | ||||
-rw-r--r-- | test/656-checker-simd-opt/src/Main.java | 43 |
5 files changed, 68 insertions, 18 deletions
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index c0ec58f824..f35aace3a9 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -373,21 +373,23 @@ bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) co bool InductionVarRange::IsUnitStride(HInstruction* context, HInstruction* instruction, + HGraph* graph, /*out*/ HInstruction** offset) const { HLoopInformation* loop = nullptr; HInductionVarAnalysis::InductionInfo* info = nullptr; HInductionVarAnalysis::InductionInfo* trip = nullptr; if (HasInductionInfo(context, instruction, &loop, &info, &trip)) { if (info->induction_class == HInductionVarAnalysis::kLinear && - info->op_b->operation == HInductionVarAnalysis::kFetch && !HInductionVarAnalysis::IsNarrowingLinear(info)) { int64_t stride_value = 0; if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) { int64_t off_value = 0; - if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) { - *offset = nullptr; - } else { + if (IsConstant(info->op_b, kExact, &off_value)) { + *offset = graph->GetConstant(info->op_b->type, off_value); + } else if (info->op_b->operation == HInductionVarAnalysis::kFetch) { *offset = info->op_b->fetch; + } else { + return false; } return true; } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index a8ee829d08..ab1772bf15 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -163,6 +163,7 @@ class InductionVarRange { */ bool IsUnitStride(HInstruction* context, HInstruction* instruction, + HGraph* graph, /*out*/ HInstruction** offset) const; /** diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index d01d3146fc..67d2093829 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -770,8 +770,8 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); EXPECT_EQ(1000, tc); HInstruction* offset = nullptr; - EXPECT_TRUE(range_.IsUnitStride(phi, phi, &offset)); - EXPECT_TRUE(offset == nullptr); + EXPECT_TRUE(range_.IsUnitStride(phi, phi, graph_, &offset)); + ExpectInt(0, offset); HInstruction* tce = range_.GenerateTripCount( loop_header_->GetLoopInformation(), graph_, loop_preheader_); ASSERT_TRUE(tce != nullptr); @@ -826,7 +826,7 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); EXPECT_EQ(1000, tc); HInstruction* offset = nullptr; - EXPECT_FALSE(range_.IsUnitStride(phi, phi, &offset)); + EXPECT_FALSE(range_.IsUnitStride(phi, phi, graph_, &offset)); HInstruction* tce = range_.GenerateTripCount( loop_header_->GetLoopInformation(), graph_, loop_preheader_); ASSERT_TRUE(tce != nullptr); @@ -908,8 +908,8 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); EXPECT_EQ(0, tc); // unknown HInstruction* offset = nullptr; - EXPECT_TRUE(range_.IsUnitStride(phi, phi, &offset)); - EXPECT_TRUE(offset == nullptr); + EXPECT_TRUE(range_.IsUnitStride(phi, phi, graph_, &offset)); + ExpectInt(0, offset); HInstruction* tce = range_.GenerateTripCount( loop_header_->GetLoopInformation(), graph_, loop_preheader_); ASSERT_TRUE(tce != nullptr); @@ -994,7 +994,7 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); EXPECT_EQ(0, tc); // unknown HInstruction* offset = nullptr; - EXPECT_FALSE(range_.IsUnitStride(phi, phi, &offset)); + EXPECT_FALSE(range_.IsUnitStride(phi, phi, graph_, &offset)); HInstruction* tce = range_.GenerateTripCount( loop_header_->GetLoopInformation(), graph_, loop_preheader_); ASSERT_TRUE(tce != nullptr); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 32f40024d3..b61d7b80d1 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -620,12 +620,15 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 // Conservatively assume a potential loop-carried data dependence otherwise, avoided by // generating an explicit a != b disambiguation runtime test on the two references. if (x != y) { - // For now, we reject after one test to avoid excessive overhead. - if (vector_runtime_test_a_ != nullptr) { - return false; + // To avoid excessive overhead, we only accept one a != b test. + if (vector_runtime_test_a_ == nullptr) { + // First test found. + vector_runtime_test_a_ = a; + vector_runtime_test_b_ = b; + } else if ((vector_runtime_test_a_ != a || vector_runtime_test_b_ != b) && + (vector_runtime_test_a_ != b || vector_runtime_test_b_ != a)) { + return false; // second test would be needed } - vector_runtime_test_a_ = a; - vector_runtime_test_b_ = b; } } } @@ -842,7 +845,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* offset = nullptr; if (TrySetVectorType(type, &restrictions) && node->loop_info->IsDefinedOutOfTheLoop(base) && - induction_range_.IsUnitStride(instruction, index, &offset) && + induction_range_.IsUnitStride(instruction, index, graph_, &offset) && VectorizeUse(node, value, generate_code, type, restrictions)) { if (generate_code) { GenerateVecSub(index, offset); @@ -900,7 +903,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* offset = nullptr; if (type == instruction->GetType() && node->loop_info->IsDefinedOutOfTheLoop(base) && - induction_range_.IsUnitStride(instruction, index, &offset)) { + induction_range_.IsUnitStride(instruction, index, graph_, &offset)) { if (generate_code) { GenerateVecSub(index, offset); GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type); @@ -1216,7 +1219,8 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) { if (vector_map_->find(org) == vector_map_->end()) { HInstruction* subscript = vector_index_; - if (offset != nullptr) { + int64_t value = 0; + if (!IsInt64AndGet(offset, &value) || value != 0) { subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset); if (org->IsPhi()) { Insert(vector_body_, subscript); // lacks layout placeholder diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java index 0d0885c85a..794c9b6c0d 100644 --- a/test/656-checker-simd-opt/src/Main.java +++ b/test/656-checker-simd-opt/src/Main.java @@ -46,6 +46,37 @@ public class Main { } } + /// CHECK-START: void Main.stencil(int[], int[], int) loop_optimization (before) + /// CHECK-DAG: <<CP1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<CM1:i\d+>> IntConstant -1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<Phi>>,<<CM1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Add1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:i\d+>> Add [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Phi>>,<<CP1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get3:i\d+>> ArrayGet [{{l\d+}},<<Add3>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add4:i\d+>> Add [<<Add2>>,<<Get3>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Add4>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.stencil(int[], int[], int) loop_optimization (after) + /// CHECK-DAG: <<CP1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<CP2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Add1:i\d+>> Add [<<Phi>>,<<CP1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:d\d+>> VecLoad [{{l\d+}},<<Add1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2:d\d+>> VecAdd [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add3:i\d+>> Add [<<Phi>>,<<CP2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get3:d\d+>> VecLoad [{{l\d+}},<<Add3>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add4:d\d+>> VecAdd [<<Add2>>,<<Get3>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Add1>>,<<Add4>>] loop:<<Loop>> outer_loop:none + private static void stencil(int[] a, int[] b, int n) { + for (int i = 1; i < n - 1; i++) { + a[i] = b[i - 1] + b[i] + b[i + 1]; + } + } + public static void main(String[] args) { float[] x = new float[100]; float[] y = new float[100]; @@ -58,6 +89,18 @@ public class Main { expectEquals(5.0f, x[i]); expectEquals(2.0f, y[i]); } + int[] a = new int[100]; + int[] b = new int[100]; + for (int i = 0; i < 100; i++) { + a[i] = 0; + b[i] = i; + } + stencil(a, b, 100); + for (int i = 1; i < 99; i++) { + int e = i + i + i; + expectEquals(e, a[i]); + expectEquals(i, b[i]); + } System.out.println("passed"); } |