diff options
author | 2024-05-21 14:28:56 +0100 | |
---|---|---|
committer | 2024-05-28 09:14:04 +0000 | |
commit | 1d116a904a41f969b549cac33a4baaf5a123c3ec (patch) | |
tree | aeb6f8944186f9b7e977e1d9c127a0980c3ef06a | |
parent | 6af68f471b6c1f638033f8261a7bfb5b9ad5354d (diff) |
[SVE] Fix a bug with vectorization external set.
This patch fixes a bug when LoopOptimization::vector_external_set_
(contains vector instructions to be inserted outside of newly
generated loops) was reset more than once during vectorization. Instead,
it should only be reset when vectorization of the loop has finished.
It also adds a regression test for the situation: a loop is vectorized
in predicated mode and a disambiguation test and a back-up scalar loop
are needed and inserted.
Note: This bug only affected predicated vectorization (e.g. using Arm
SVE) and didn't impact any production devices.
Test: run with ART_FORCE_TRY_PREDICATED_SIMD=true and without
Test: 623-checker-loop-regressions
Test: ./art/test.py --host --optimizing --jit
Test: ./art/test.py --target --optimizing --jit
Change-Id: I19fabd4d7034e5aa6421df79b2519d8f7e6ef43a
-rw-r--r-- | compiler/optimizing/loop_optimization.cc | 19 | ||||
-rw-r--r-- | compiler/optimizing/loop_optimization.h | 7 | ||||
-rw-r--r-- | test/623-checker-loop-regressions/src/Main.java | 44 |
3 files changed, 65 insertions, 5 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 14e6683cf8..9372e116bb 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1283,6 +1283,8 @@ void HLoopOptimization::VectorizePredicated(LoopNode* node, HBasicBlock* exit) { DCHECK(IsInPredicatedVectorizationMode()); + vector_external_set_->clear(); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1369,6 +1371,8 @@ void HLoopOptimization::VectorizeTraditional(LoopNode* node, int64_t trip_count) { DCHECK(!IsInPredicatedVectorizationMode()); + vector_external_set_->clear(); + HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -1566,7 +1570,6 @@ HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstr vector_header_->AddPhi(phi); vector_index_ = phi; vector_permanent_map_->clear(); - vector_external_set_->clear(); predicate_info_map_->clear(); return phi; @@ -2208,7 +2211,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) { vector = new (global_allocator_) HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc); vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); - vector_external_set_->insert(vector); + MaybeInsertInVectorExternalSet(vector); } vector_map_->Put(org, vector); } @@ -2337,7 +2340,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r vector_length, kNoDexPc)); } - vector_external_set_->insert(new_init); + MaybeInsertInVectorExternalSet(new_init); } else { new_init = ReduceAndExtractIfNeeded(new_init); } @@ -2366,12 +2369,12 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct HVecReduce* reduce = new (global_allocator_) HVecReduce( global_allocator_, instruction, type, vector_length, kind, kNoDexPc); exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); - vector_external_set_->insert(reduce); + MaybeInsertInVectorExternalSet(reduce); instruction = new (global_allocator_) HVecExtractScalar( global_allocator_, reduce, type, vector_length, 0, kNoDexPc); exit->InsertInstructionAfter(instruction, reduce); - vector_external_set_->insert(instruction); + MaybeInsertInVectorExternalSet(instruction); } } return instruction; @@ -3167,6 +3170,12 @@ void HLoopOptimization::InitPredicateInfoMap(LoopNode* node, back_edge_info->SetControlPredicate(header_info->GetTruePredicate()); } +void HLoopOptimization::MaybeInsertInVectorExternalSet(HInstruction* instruction) { + if (IsInPredicatedVectorizationMode()) { + vector_external_set_->insert(instruction); + } +} + std::ostream& operator<<(std::ostream& os, const HLoopOptimization::VectorMode& mode) { switch (mode) { case HLoopOptimization::VectorMode::kSequential: diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 4817060dfa..b6998c7f90 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -480,6 +480,7 @@ class HLoopOptimization : public HOptimization { bool CanRemoveCycle(); // Whether the current 'iset_' is removable. bool IsInPredicatedVectorizationMode() const { return predicated_vectorization_mode_; } + void MaybeInsertInVectorExternalSet(HInstruction* instruction); // Compiler options (to query ISA features). const CompilerOptions* compiler_options_; @@ -547,6 +548,12 @@ class HLoopOptimization : public HOptimization { // Tracks vector operations that are inserted outside of the loop (preheader, exit) // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops // for loop reductions). + // + // The instructions in the set are live for the whole vectorization process of the current + // loop, not just during generation of a particular loop version (as the sets above). + // + // Currently the set is being only used in the predicated mode - for assigning governing + // predicates. ScopedArenaSet<HInstruction*>* vector_external_set_; // A mapping between a basic block of the original loop and its associated PredicateInfo. diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 134e90c6c5..5e9b3d7e15 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -977,6 +977,39 @@ public class Main { return s0 + s1; } + // Regression test for the case, where a loop is vectorized in predicated mode, and there is + // a disambiguation scalar loop added. Make sure that the set, which records instructions + // inserted outside of new loops, is not reset until the full vectorization process has + // happened. + // + // Based on void android.util.Spline$MonotoneCubicSpline.<init>(float[], float[]). + // + /// CHECK-START-ARM64: void Main.$noinline$testExternalSetForLoopWithDisambiguation(int[], int[]) loop_optimization (after) + /// CHECK-IF: hasIsaFeature("sve") and os.environ.get('ART_FORCE_TRY_PREDICATED_SIMD') == 'true' + // + /// CHECK-DAG: <<Pred:j\d+>> VecPredSetAll loop:none + /// CHECK-DAG: VecReplicateScalar [{{i\d+}},<<Pred>>] loop:none + // + /// CHECK-ELSE: + // + /// CHECK-DAG: VecReplicateScalar loop:none + // + /// CHECK-FI: + // + // Vector loop. + /// CHECK-DAG: Phi loop:<<VectorLoop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<VectorLoop>> outer_loop:none + // + // Backup scalar loop. + /// CHECK-DAG: Phi loop:<<ScalarLoop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<ScalarLoop>> outer_loop:none + public static void $noinline$testExternalSetForLoopWithDisambiguation(int[] d, int[] m) { + m[0] = d[0]; + for (int i = 1; i < m.length; i++) { + m[i] = (d[i - 1] + d[i]) * 53; + } + } + public static final int ARRAY_SIZE = 512; private static byte[] createAndInitByteArray(int x) { @@ -1253,6 +1286,17 @@ public class Main { byte[] b_b = createAndInitByteArray(2); expectEquals(1278, testSADAndDotProdCombined1(b_a, b_b)); } + { + int[] i_a = createAndInitIntArray(1); + int[] i_b = createAndInitIntArray(2); + $noinline$testExternalSetForLoopWithDisambiguation(i_a, i_b); + + int sum = 0; + for (int i = 0; i < i_b.length; i++) { + sum += i_b[i]; + } + expectEquals(-13839413, sum); + } System.out.println("passed"); } |