ART: Fix a compiler crash for VectorizeDef() idioms.
SAD vectorization idiom (and DotProduct which was based on it)
had a bug when some instruction in the loop was visited twice
causing a compiler crash. GenerateVecOp() was called for both
the idiom root instruction and its argument in the idiom
vectorization routine; however the argument could have been
already processed by that time. It happened when two vectorization
idioms' matched patterns had a common sub-expression.
Test: test-art-target.
Test: 623-checker-loop-regressions.
Change-Id: I8823c52f8ef62377c29310f0e335b9728d11068a
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 5784707..770a011 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -2044,13 +2044,13 @@
(reduction_type != DataType::Type::kInt32 && reduction_type != DataType::Type::kInt64)) {
return false;
}
- HInstruction* q = instruction->InputAt(0);
- HInstruction* v = instruction->InputAt(1);
+ HInstruction* acc = instruction->InputAt(0);
+ HInstruction* abs = instruction->InputAt(1);
HInstruction* a = nullptr;
HInstruction* b = nullptr;
- if (v->IsAbs() &&
- v->GetType() == reduction_type &&
- IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+ if (abs->IsAbs() &&
+ abs->GetType() == reduction_type &&
+ IsSubConst2(graph_, abs->InputAt(0), /*out*/ &a, /*out*/ &b)) {
DCHECK(a != nullptr && b != nullptr);
} else {
return false;
@@ -2076,16 +2076,16 @@
// idiomatic operation. Sequential code uses the original scalar expressions.
DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = s = v->InputAt(0);
+ r = s = abs->InputAt(0);
}
- if (VectorizeUse(node, q, generate_code, sub_type, restrictions) &&
+ if (VectorizeUse(node, acc, generate_code, sub_type, restrictions) &&
VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
if (generate_code) {
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
global_allocator_,
- vector_map_->Get(q),
+ vector_map_->Get(acc),
vector_map_->Get(r),
vector_map_->Get(s),
HVecOperation::ToProperType(reduction_type, is_unsigned),
@@ -2093,8 +2093,14 @@
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
} else {
- GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type);
- GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+ // "GenerateVecOp()" must not be called more than once for each original loop body
+ // instruction. As the SAD idiom processes both "current" instruction ("instruction")
+ // and its ABS input in one go, we must check that for the scalar case the ABS instruction
+ // has not yet been processed.
+ if (vector_map_->find(abs) == vector_map_->end()) {
+ GenerateVecOp(abs, vector_map_->Get(r), nullptr, reduction_type);
+ }
+ GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(abs), reduction_type);
}
}
return true;
@@ -2116,20 +2122,20 @@
return false;
}
- HInstruction* q = instruction->InputAt(0);
- HInstruction* v = instruction->InputAt(1);
- if (!v->IsMul() || v->GetType() != reduction_type) {
+ HInstruction* const acc = instruction->InputAt(0);
+ HInstruction* const mul = instruction->InputAt(1);
+ if (!mul->IsMul() || mul->GetType() != reduction_type) {
return false;
}
- HInstruction* a = v->InputAt(0);
- HInstruction* b = v->InputAt(1);
- HInstruction* r = a;
- HInstruction* s = b;
- DataType::Type op_type = GetNarrowerType(a, b);
+ HInstruction* const mul_left = mul->InputAt(0);
+ HInstruction* const mul_right = mul->InputAt(1);
+ HInstruction* r = mul_left;
+ HInstruction* s = mul_right;
+ DataType::Type op_type = GetNarrowerType(mul_left, mul_right);
bool is_unsigned = false;
- if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) {
+ if (!IsNarrowerOperands(mul_left, mul_right, op_type, &r, &s, &is_unsigned)) {
return false;
}
op_type = HVecOperation::ToProperType(op_type, is_unsigned);
@@ -2143,17 +2149,17 @@
// Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand
// idiomatic operation. Sequential code uses the original scalar expressions.
if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = a;
- s = b;
+ r = mul_left;
+ s = mul_right;
}
- if (VectorizeUse(node, q, generate_code, op_type, restrictions) &&
+ if (VectorizeUse(node, acc, generate_code, op_type, restrictions) &&
VectorizeUse(node, r, generate_code, op_type, restrictions) &&
VectorizeUse(node, s, generate_code, op_type, restrictions)) {
if (generate_code) {
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecDotProd(
global_allocator_,
- vector_map_->Get(q),
+ vector_map_->Get(acc),
vector_map_->Get(r),
vector_map_->Get(s),
reduction_type,
@@ -2162,8 +2168,14 @@
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
} else {
- GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type);
- GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+ // "GenerateVecOp()" must not be called more than once for each original loop body
+ // instruction. As the DotProd idiom processes both "current" instruction ("instruction")
+ // and its MUL input in one go, we must check that for the scalar case the MUL instruction
+ // has not yet been processed.
+ if (vector_map_->find(mul) == vector_map_->end()) {
+ GenerateVecOp(mul, vector_map_->Get(r), vector_map_->Get(s), reduction_type);
+ }
+ GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(mul), reduction_type);
}
}
return true;
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 6fa38f8..36dfc84 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -589,6 +589,205 @@
return a[3];
}
+ // Dot product and SAD vectorization idioms used to have a bug when some
+ // instruction in the loop was visited twice causing a compiler crash.
+ // It happened when two vectorization idioms' matched patterns had a common
+ // sub-expression.
+
+ // Idioms common sub-expression bug: DotProduct and ArraySet.
+ //
+ /// CHECK-START-ARM64: int Main.testDotProdAndSet(byte[], byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecDotProd
+ /// CHECK-DAG: VecStore
+ public static final int testDotProdAndSet(byte[] a, byte[] b, byte[] c) {
+ int s = 1;
+ for (int i = 0; i < b.length; i++) {
+ int temp = a[i] * b[i];
+ c[i]= (byte)temp;
+ s += temp;
+ }
+ return s - 1;
+ }
+
+ // Idioms common sub-expression bug: DotProduct and DotProduct.
+ //
+ /// CHECK-START-ARM64: int Main.testDotProdAndDotProd(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecDotProd
+ /// CHECK-DAG: VecDotProd
+ public static final int testDotProdAndDotProd(byte[] a, byte[] b) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < b.length; i++) {
+ int temp = a[i] * b[i];
+ s0 += temp;
+ s1 += temp;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: SAD and ArraySet.
+ //
+ /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSet(int[], int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecStore
+ public static int testSADAndSet(int[] x, int[] y, int[] z) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ int temp = Math.abs(x[i] - y[i]);
+ z[i] = temp;
+ sad += temp;
+ }
+ return sad;
+ }
+
+ // Idioms common sub-expression bug: SAD and SAD.
+ /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSAD(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecSADAccumulate
+ public static final int testSADAndSAD(int[] x, int[] y) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < x.length; i++) {
+ int temp = Math.abs(x[i] - y[i]);
+ s0 += temp;
+ s1 += temp;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: DotProd and DotProd with extra mul.
+ //
+ /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul0(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecMul
+ /// CHECK-DAG: VecDotProd
+ /// CHECK-DAG: VecDotProd
+ public static final int testDotProdAndDotProdExtraMul0(byte[] a, byte[] b) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < b.length; i++) {
+ int temp0 = a[i] * b[i];
+ int temp1 = (byte)(temp0) * a[i];
+ s0 += temp1;
+ s1 += temp0;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: DotProd and DotProd with extra mul (reversed order).
+ //
+ /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul1(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecMul
+ /// CHECK-DAG: VecDotProd
+ /// CHECK-DAG: VecDotProd
+ public static final int testDotProdAndDotProdExtraMul1(byte[] a, byte[] b) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < b.length; i++) {
+ int temp0 = a[i] * b[i];
+ int temp1 = (byte)(temp0) * a[i];
+ s0 += temp0;
+ s1 += temp1;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: SAD and SAD with extra abs.
+ //
+ /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs0(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: VecSub
+ /// CHECK-DAG: VecAbs
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecSADAccumulate
+ public static final int testSADAndSADExtraAbs0(int[] x, int[] y) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < x.length; i++) {
+ int temp0 = Math.abs(x[i] - y[i]);
+ int temp1 = Math.abs(temp0 - y[i]);
+ s0 += temp1;
+ s1 += temp0;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: SAD and SAD with extra abs (reversed order).
+ //
+ /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs1(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: VecSub
+ /// CHECK-DAG: VecAbs
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecSADAccumulate
+ public static final int testSADAndSADExtraAbs1(int[] x, int[] y) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < x.length; i++) {
+ int temp0 = Math.abs(x[i] - y[i]);
+ int temp1 = Math.abs(temp0 - y[i]);
+ s0 += temp0;
+ s1 += temp1;
+ }
+ return s0 + s1;
+ }
+
+
+ // Idioms common sub-expression bug: SAD and DotProd combined.
+ //
+ /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined0(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecSub
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecDotProd
+ public static final int testSADAndDotProdCombined0(byte[] x, byte[] y) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < x.length; i++) {
+ int temp0 = x[i] - y[i];
+ int temp1 = Math.abs(temp0);
+ int temp2 = x[i] * (byte)(temp0);
+
+ s0 += temp1;
+ s1 += temp2;
+ }
+ return s0 + s1;
+ }
+
+ // Idioms common sub-expression bug: SAD and DotProd combined (reversed order).
+ /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined1(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: VecSub
+ /// CHECK-DAG: VecSADAccumulate
+ /// CHECK-DAG: VecDotProd
+ public static final int testSADAndDotProdCombined1(byte[] x, byte[] y) {
+ int s0 = 1;
+ int s1 = 1;
+ for (int i = 0; i < x.length; i++) {
+ int temp0 = x[i] - y[i];
+ int temp1 = Math.abs(temp0);
+ int temp2 = x[i] * (byte)(temp0);
+
+ s0 += temp2;
+ s1 += temp1;
+ }
+ return s0 + s1;
+ }
+
+ public static final int ARRAY_SIZE = 512;
+
+ private static byte[] createAndInitByteArray(int x) {
+ byte[] a = new byte[ARRAY_SIZE];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = (byte)((~i) + x);
+ }
+ return a;
+ }
+
+ private static int[] createAndInitIntArray(int x) {
+ int[] a = new int[ARRAY_SIZE];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = (~i) + x;
+ }
+ return a;
+ }
+
public static void main(String[] args) {
System.loadLibrary(args[0]);
@@ -780,6 +979,59 @@
expectEquals(10, reductionIntoReplication());
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ byte[] b_c = createAndInitByteArray(3);
+ expectEquals(2731008, testDotProdAndSet(b_a, b_b, b_c));
+ }
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ expectEquals(5462018, testDotProdAndDotProd(b_a, b_b));
+ }
+ {
+ int[] i_a = createAndInitIntArray(1);
+ int[] i_b = createAndInitIntArray(2);
+ int[] i_c = createAndInitIntArray(3);
+ expectEquals(512, testSADAndSet(i_a, i_b, i_c));
+ }
+ {
+ int[] i_a = createAndInitIntArray(1);
+ int[] i_b = createAndInitIntArray(2);
+ expectEquals(1026, testSADAndSAD(i_a, i_b));
+ }
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ expectEquals(2731266, testDotProdAndDotProdExtraMul0(b_a, b_b));
+ }
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ expectEquals(2731266, testDotProdAndDotProdExtraMul1(b_a, b_b));
+ }
+ {
+ int[] i_a = createAndInitIntArray(1);
+ int[] i_b = createAndInitIntArray(2);
+ expectEquals(131330, testSADAndSADExtraAbs0(i_a, i_b));
+ }
+ {
+ int[] i_a = createAndInitIntArray(1);
+ int[] i_b = createAndInitIntArray(2);
+ expectEquals(131330, testSADAndSADExtraAbs1(i_a, i_b));
+ }
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ expectEquals(1278, testSADAndDotProdCombined0(b_a, b_b));
+ }
+ {
+ byte[] b_a = createAndInitByteArray(1);
+ byte[] b_b = createAndInitByteArray(2);
+ expectEquals(1278, testSADAndDotProdCombined1(b_a, b_b));
+ }
+
System.out.println("passed");
}