diff options
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 19 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.h | 7 | ||||
| -rw-r--r-- | test/623-checker-loop-regressions/src/Main.java | 44 |
3 files changed, 65 insertions, 5 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 14e6683cf8..9372e116bb 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1283,6 +1283,8 @@ void HLoopOptimization::VectorizePredicated(LoopNode* node, HBasicBlock* exit) { DCHECK(IsInPredicatedVectorizationMode()); + vector_external_set_->clear(); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1369,6 +1371,8 @@ void HLoopOptimization::VectorizeTraditional(LoopNode* node, int64_t trip_count) { DCHECK(!IsInPredicatedVectorizationMode()); + vector_external_set_->clear(); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1566,7 +1570,6 @@ HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstr vector_header_->AddPhi(phi); vector_index_ = phi; vector_permanent_map_->clear(); - vector_external_set_->clear(); predicate_info_map_->clear(); return phi; @@ -2208,7 +2211,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) { vector = new (global_allocator_) HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc); vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); - vector_external_set_->insert(vector); + MaybeInsertInVectorExternalSet(vector); } vector_map_->Put(org, vector); } @@ -2337,7 +2340,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r vector_length, kNoDexPc)); } - vector_external_set_->insert(new_init); + MaybeInsertInVectorExternalSet(new_init); } else { new_init = ReduceAndExtractIfNeeded(new_init); } @@ -2366,12 +2369,12 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct HVecReduce* reduce = new (global_allocator_) HVecReduce( global_allocator_, instruction, type, vector_length, kind, kNoDexPc); exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); - vector_external_set_->insert(reduce); + MaybeInsertInVectorExternalSet(reduce); instruction = new (global_allocator_) HVecExtractScalar( global_allocator_, reduce, type, vector_length, 0, kNoDexPc); exit->InsertInstructionAfter(instruction, reduce); - vector_external_set_->insert(instruction); + MaybeInsertInVectorExternalSet(instruction); } } return instruction; @@ -3167,6 +3170,12 @@ void HLoopOptimization::InitPredicateInfoMap(LoopNode* node, back_edge_info->SetControlPredicate(header_info->GetTruePredicate()); } +void HLoopOptimization::MaybeInsertInVectorExternalSet(HInstruction* instruction) { + if (IsInPredicatedVectorizationMode()) { + vector_external_set_->insert(instruction); + } +} + std::ostream& operator<<(std::ostream& os, const HLoopOptimization::VectorMode& mode) { switch (mode) { case HLoopOptimization::VectorMode::kSequential: diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 4817060dfa..b6998c7f90 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -480,6 +480,7 @@ class HLoopOptimization : public HOptimization { bool CanRemoveCycle(); // Whether the current 'iset_' is removable. bool IsInPredicatedVectorizationMode() const { return predicated_vectorization_mode_; } + void MaybeInsertInVectorExternalSet(HInstruction* instruction); // Compiler options (to query ISA features). const CompilerOptions* compiler_options_; @@ -547,6 +548,12 @@ class HLoopOptimization : public HOptimization { // Tracks vector operations that are inserted outside of the loop (preheader, exit) // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops // for loop reductions). + // + // The instructions in the set are live for the whole vectorization process of the current + // loop, not just during generation of a particular loop version (as the sets above). + // + // Currently the set is being only used in the predicated mode - for assigning governing + // predicates. ScopedArenaSet<HInstruction*>* vector_external_set_; // A mapping between a basic block of the original loop and its associated PredicateInfo. diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 134e90c6c5..5e9b3d7e15 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -977,6 +977,39 @@ public class Main { return s0 + s1; } + // Regression test for the case, where a loop is vectorized in predicated mode, and there is + // a disambiguation scalar loop added. Make sure that the set, which records instructions + // inserted outside of new loops, is not reset until the full vectorization process has + // happened. + // + // Based on void android.util.Spline$MonotoneCubicSpline.<init>(float[], float[]). + // + /// CHECK-START-ARM64: void Main.$noinline$testExternalSetForLoopWithDisambiguation(int[], int[]) loop_optimization (after) + /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' + // + /// CHECK-DAG: <<Pred:j\d+>> VecPredSetAll loop:none + /// CHECK-DAG: VecReplicateScalar [{{i\d+}},<<Pred>>] loop:none + // + /// CHECK-ELSE: + // + /// CHECK-DAG: VecReplicateScalar loop:none + // + /// CHECK-FI: + // + // Vector loop. + /// CHECK-DAG: Phi loop:<<VectorLoop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<VectorLoop>> outer_loop:none + // + // Backup scalar loop. + /// CHECK-DAG: Phi loop:<<ScalarLoop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<ScalarLoop>> outer_loop:none + public static void $noinline$testExternalSetForLoopWithDisambiguation(int[] d, int[] m) { + m[0] = d[0]; + for (int i = 1; i < m.length; i++) { + m[i] = (d[i - 1] + d[i]) * 53; + } + } + public static final int ARRAY_SIZE = 512; private static byte[] createAndInitByteArray(int x) { @@ -1253,6 +1286,17 @@ public class Main { byte[] b_b = createAndInitByteArray(2); expectEquals(1278, testSADAndDotProdCombined1(b_a, b_b)); } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + $noinline$testExternalSetForLoopWithDisambiguation(i_a, i_b); + + int sum = 0; + for (int i = 0; i < i_b.length; i++) { + sum += i_b[i]; + } + expectEquals(-13839413, sum); + } System.out.println("passed"); } |