diff options
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 62 | ||||
-rw-r--r-- | test/623-checker-loop-regressions/src/Main.java | 252 |
2 files changed, 289 insertions, 25 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 5784707d0e..770a011550 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -2044,13 +2044,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, (reduction_type != DataType::Type::kInt32 && reduction_type != DataType::Type::kInt64)) { return false; } - HInstruction* q = instruction->InputAt(0); - HInstruction* v = instruction->InputAt(1); + HInstruction* acc = instruction->InputAt(0); + HInstruction* abs = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsAbs() && - v->GetType() == reduction_type && - IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { + if (abs->IsAbs() && + abs->GetType() == reduction_type && + IsSubConst2(graph_, abs->InputAt(0), /*out*/ &a, /*out*/ &b)) { DCHECK(a != nullptr && b != nullptr); } else { return false; @@ -2076,16 +2076,16 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, // idiomatic operation. Sequential code uses the original scalar expressions. DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom - r = s = v->InputAt(0); + r = s = abs->InputAt(0); } - if (VectorizeUse(node, q, generate_code, sub_type, restrictions) && + if (VectorizeUse(node, acc, generate_code, sub_type, restrictions) && VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, - vector_map_->Get(q), + vector_map_->Get(acc), vector_map_->Get(r), vector_map_->Get(s), HVecOperation::ToProperType(reduction_type, is_unsigned), @@ -2093,8 +2093,14 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { - GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type); - GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + // "GenerateVecOp()" must not be called more than once for each original loop body + // instruction. As the SAD idiom processes both "current" instruction ("instruction") + // and its ABS input in one go, we must check that for the scalar case the ABS instruction + // has not yet been processed. + if (vector_map_->find(abs) == vector_map_->end()) { + GenerateVecOp(abs, vector_map_->Get(r), nullptr, reduction_type); + } + GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(abs), reduction_type); } } return true; @@ -2116,20 +2122,20 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, return false; } - HInstruction* q = instruction->InputAt(0); - HInstruction* v = instruction->InputAt(1); - if (!v->IsMul() || v->GetType() != reduction_type) { + HInstruction* const acc = instruction->InputAt(0); + HInstruction* const mul = instruction->InputAt(1); + if (!mul->IsMul() || mul->GetType() != reduction_type) { return false; } - HInstruction* a = v->InputAt(0); - HInstruction* b = v->InputAt(1); - HInstruction* r = a; - HInstruction* s = b; - DataType::Type op_type = GetNarrowerType(a, b); + HInstruction* const mul_left = mul->InputAt(0); + HInstruction* const mul_right = mul->InputAt(1); + HInstruction* r = mul_left; + HInstruction* s = mul_right; + DataType::Type op_type = GetNarrowerType(mul_left, mul_right); bool is_unsigned = false; - if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + if (!IsNarrowerOperands(mul_left, mul_right, op_type, &r, &s, &is_unsigned)) { return false; } op_type = HVecOperation::ToProperType(op_type, is_unsigned); @@ -2143,17 +2149,17 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand // idiomatic operation. Sequential code uses the original scalar expressions. if (generate_code && vector_mode_ != kVector) { // de-idiom - r = a; - s = b; + r = mul_left; + s = mul_right; } - if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + if (VectorizeUse(node, acc, generate_code, op_type, restrictions) && VectorizeUse(node, r, generate_code, op_type, restrictions) && VectorizeUse(node, s, generate_code, op_type, restrictions)) { if (generate_code) { if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( global_allocator_, - vector_map_->Get(q), + vector_map_->Get(acc), vector_map_->Get(r), vector_map_->Get(s), reduction_type, @@ -2162,8 +2168,14 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { - GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); - GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + // "GenerateVecOp()" must not be called more than once for each original loop body + // instruction. As the DotProd idiom processes both "current" instruction ("instruction") + // and its MUL input in one go, we must check that for the scalar case the MUL instruction + // has not yet been processed. + if (vector_map_->find(mul) == vector_map_->end()) { + GenerateVecOp(mul, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + } + GenerateVecOp(instruction, vector_map_->Get(acc), vector_map_->Get(mul), reduction_type); } } return true; diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 6fa38f82b8..36dfc84a72 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -589,6 +589,205 @@ public class Main { return a[3]; } + // Dot product and SAD vectorization idioms used to have a bug when some + // instruction in the loop was visited twice causing a compiler crash. + // It happened when two vectorization idioms' matched patterns had a common + // sub-expression. + + // Idioms common sub-expression bug: DotProduct and ArraySet. + // + /// CHECK-START-ARM64: int Main.testDotProdAndSet(byte[], byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd + /// CHECK-DAG: VecStore + public static final int testDotProdAndSet(byte[] a, byte[] b, byte[] c) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + c[i]= (byte)temp; + s += temp; + } + return s - 1; + } + + // Idioms common sub-expression bug: DotProduct and DotProduct. + // + /// CHECK-START-ARM64: int Main.testDotProdAndDotProd(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd + /// CHECK-DAG: VecDotProd + public static final int testDotProdAndDotProd(byte[] a, byte[] b) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s0 += temp; + s1 += temp; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: SAD and ArraySet. + // + /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSet(int[], int[], int[]) loop_optimization (after) + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecStore + public static int testSADAndSet(int[] x, int[] y, int[] z) { + int min_length = Math.min(x.length, y.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + int temp = Math.abs(x[i] - y[i]); + z[i] = temp; + sad += temp; + } + return sad; + } + + // Idioms common sub-expression bug: SAD and SAD. + /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSAD(int[], int[]) loop_optimization (after) + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecSADAccumulate + public static final int testSADAndSAD(int[] x, int[] y) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < x.length; i++) { + int temp = Math.abs(x[i] - y[i]); + s0 += temp; + s1 += temp; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: DotProd and DotProd with extra mul. + // + /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul0(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecMul + /// CHECK-DAG: VecDotProd + /// CHECK-DAG: VecDotProd + public static final int testDotProdAndDotProdExtraMul0(byte[] a, byte[] b) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < b.length; i++) { + int temp0 = a[i] * b[i]; + int temp1 = (byte)(temp0) * a[i]; + s0 += temp1; + s1 += temp0; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: DotProd and DotProd with extra mul (reversed order). + // + /// CHECK-START-ARM64: int Main.testDotProdAndDotProdExtraMul1(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecMul + /// CHECK-DAG: VecDotProd + /// CHECK-DAG: VecDotProd + public static final int testDotProdAndDotProdExtraMul1(byte[] a, byte[] b) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < b.length; i++) { + int temp0 = a[i] * b[i]; + int temp1 = (byte)(temp0) * a[i]; + s0 += temp0; + s1 += temp1; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: SAD and SAD with extra abs. + // + /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs0(int[], int[]) loop_optimization (after) + /// CHECK-DAG: VecSub + /// CHECK-DAG: VecAbs + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecSADAccumulate + public static final int testSADAndSADExtraAbs0(int[] x, int[] y) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < x.length; i++) { + int temp0 = Math.abs(x[i] - y[i]); + int temp1 = Math.abs(temp0 - y[i]); + s0 += temp1; + s1 += temp0; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: SAD and SAD with extra abs (reversed order). + // + /// CHECK-START-{ARM,ARM64}: int Main.testSADAndSADExtraAbs1(int[], int[]) loop_optimization (after) + /// CHECK-DAG: VecSub + /// CHECK-DAG: VecAbs + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecSADAccumulate + public static final int testSADAndSADExtraAbs1(int[] x, int[] y) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < x.length; i++) { + int temp0 = Math.abs(x[i] - y[i]); + int temp1 = Math.abs(temp0 - y[i]); + s0 += temp0; + s1 += temp1; + } + return s0 + s1; + } + + + // Idioms common sub-expression bug: SAD and DotProd combined. + // + /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined0(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecSub + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecDotProd + public static final int testSADAndDotProdCombined0(byte[] x, byte[] y) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < x.length; i++) { + int temp0 = x[i] - y[i]; + int temp1 = Math.abs(temp0); + int temp2 = x[i] * (byte)(temp0); + + s0 += temp1; + s1 += temp2; + } + return s0 + s1; + } + + // Idioms common sub-expression bug: SAD and DotProd combined (reversed order). + /// CHECK-START-ARM64: int Main.testSADAndDotProdCombined1(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecSub + /// CHECK-DAG: VecSADAccumulate + /// CHECK-DAG: VecDotProd + public static final int testSADAndDotProdCombined1(byte[] x, byte[] y) { + int s0 = 1; + int s1 = 1; + for (int i = 0; i < x.length; i++) { + int temp0 = x[i] - y[i]; + int temp1 = Math.abs(temp0); + int temp2 = x[i] * (byte)(temp0); + + s0 += temp2; + s1 += temp1; + } + return s0 + s1; + } + + public static final int ARRAY_SIZE = 512; + + private static byte[] createAndInitByteArray(int x) { + byte[] a = new byte[ARRAY_SIZE]; + for (int i = 0; i < a.length; i++) { + a[i] = (byte)((~i) + x); + } + return a; + } + + private static int[] createAndInitIntArray(int x) { + int[] a = new int[ARRAY_SIZE]; + for (int i = 0; i < a.length; i++) { + a[i] = (~i) + x; + } + return a; + } + public static void main(String[] args) { System.loadLibrary(args[0]); @@ -780,6 +979,59 @@ public class Main { expectEquals(10, reductionIntoReplication()); + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + byte[] b_c = createAndInitByteArray(3); + expectEquals(2731008, testDotProdAndSet(b_a, b_b, b_c)); + } + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + expectEquals(5462018, testDotProdAndDotProd(b_a, b_b)); + } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + int[] i_c = createAndInitIntArray(3); + expectEquals(512, testSADAndSet(i_a, i_b, i_c)); + } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + expectEquals(1026, testSADAndSAD(i_a, i_b)); + } + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + expectEquals(2731266, testDotProdAndDotProdExtraMul0(b_a, b_b)); + } + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + expectEquals(2731266, testDotProdAndDotProdExtraMul1(b_a, b_b)); + } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + expectEquals(131330, testSADAndSADExtraAbs0(i_a, i_b)); + } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + expectEquals(131330, testSADAndSADExtraAbs1(i_a, i_b)); + } + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + expectEquals(1278, testSADAndDotProdCombined0(b_a, b_b)); + } + { + byte[] b_a = createAndInitByteArray(1); + byte[] b_b = createAndInitByteArray(2); + expectEquals(1278, testSADAndDotProdCombined1(b_a, b_b)); + } + System.out.println("passed"); } |