Refactor vectorization data flow methods.
As part of vectorization some instructions are inserted outside
the loop (preheader, exit); in predicated mode they must be
assigned a governing predicate. Currently the corresponding
data flow analysis and generation methods have multiple patchy
'InPredicatedMode()' guarded actions for this.
This patch introduces vector_external_set_ to track such
instructions; in predicated mode it updates the governing
predicates separately using that set, making the methods
agnostic to vectorization type - predicated or traditional.
It also properly guards ArrayCharAt case for predicated mode
via vector restrictions and renames a few variables for
better readability.
Original author: Artem Serov <Artem.Serov@linaro.org>
Test: ./art/test/testrunner/testrunner.py --host --optimizing --jit
Test: ./art/test/testrunner/testrunner.py --target --optimizing --jit
Test: target tests on arm64 with SVE (for details see
art/test/README.arm_fvp).
Change-Id: I7fba731e6f4e8dd5cd4490cdfc95cb4ae8b2e99e
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index d5e3463..716ea19 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -482,6 +482,7 @@
vector_runtime_test_b_(nullptr),
vector_map_(nullptr),
vector_permanent_map_(nullptr),
+ vector_external_set_(nullptr),
vector_mode_(kSequential),
vector_preheader_(nullptr),
vector_header_(nullptr),
@@ -542,12 +543,14 @@
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = ↦
vector_permanent_map_ = &perm;
+ vector_external_set_ = &ext_set;
// Traverse.
const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -556,6 +559,8 @@
vector_refs_ = nullptr;
vector_map_ = nullptr;
vector_permanent_map_ = nullptr;
+ vector_external_set_ = nullptr;
+
return did_loop_opt;
}
@@ -1290,6 +1295,23 @@
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
+
+ if (IsInPredicatedVectorizationMode()) {
+ // Assigns governing predicates (all true) to the vector operations inserted outside the loop.
+ //
+ // TODO: Adjust GVN to support VecPredSetAll sharing.
+ for (auto it : *vector_external_set_) {
+ HVecOperation* vec_op = it->AsVecOperation();
+ HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
+ graph_->GetIntConstant(1),
+ vec_op->GetPackedType(),
+ vec_op->GetVectorLength(),
+ 0u);
+ vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op);
+ vec_op->SetMergingGoverningPredicate(set_pred);
+ }
+ }
+
while (!header->GetFirstInstruction()->IsGoto()) {
header->RemoveInstruction(header->GetFirstInstruction());
}
@@ -1353,6 +1375,8 @@
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
vector_permanent_map_->clear(); // preserved over unrolling
+ vector_external_set_->clear();
+
for (uint32_t u = 0; u < unroll; u++) {
// Generate instruction map.
vector_map_->clear();
@@ -1442,10 +1466,13 @@
VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
(TrySetVectorType(type, &restrictions) &&
VectorizeUse(node, instruction, generate_code, type, restrictions))) {
+ DCHECK(!instruction->IsPhi());
if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
+ HInstruction* new_red_vec_op = vector_map_->Get(instruction);
+ HInstruction* original_phi = redit->second;
+ DCHECK(original_phi->IsPhi());
+ vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi));
+ vector_permanent_map_->Overwrite(original_phi, new_red_vec_op);
}
return true;
}
@@ -1485,9 +1512,7 @@
// Deal with vector restrictions.
bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
- if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) ||
- IsInPredicatedVectorizationMode())) {
- // TODO: Support CharAt for predicated mode.
+ if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) {
return false;
}
// Accept a right-hand-side array base[index] for
@@ -1712,6 +1737,7 @@
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
+ kNoStringCharAt | // TODO: support in predicated mode.
kNoSignedHAdd |
kNoUnsignedHAdd |
kNoUnroundedHAdd |
@@ -1855,15 +1881,7 @@
vector = new (global_allocator_)
HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length_,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, vector);
- vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(vector);
}
vector_map_->Put(org, vector);
}
@@ -1936,18 +1954,18 @@
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) {
+ DCHECK(reductions_->find(orig_phi) != reductions_->end());
+ DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi);
HInstruction* vector = nullptr;
if (vector_mode_ == kSequential) {
HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
+ global_allocator_, kNoRegNumber, 0, orig_phi->GetType());
vector_header_->AddPhi(new_phi);
vector = new_phi;
} else {
// Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
+ auto it = vector_permanent_map_->find(orig_phi);
if (it != vector_permanent_map_->end()) {
vector = it->second;
} else {
@@ -1957,7 +1975,7 @@
vector = new_phi;
}
}
- vector_map_->Put(phi, vector);
+ vector_map_->Put(orig_phi, vector);
}
void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
@@ -1992,15 +2010,7 @@
vector_length,
kNoDexPc));
}
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, new_init);
- new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(new_init);
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -2029,20 +2039,12 @@
HVecReduce* reduce = new (global_allocator_) HVecReduce(
global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ vector_external_set_->insert(reduce);
instruction = new (global_allocator_) HVecExtractScalar(
global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
exit->InsertInstructionAfter(instruction, reduce);
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- exit->InsertInstructionBefore(set_pred, reduce);
- reduce->SetMergingGoverningPredicate(set_pred);
- instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(instruction);
}
}
return instruction;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6dd778b..27afc07 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -369,6 +369,11 @@
// Contents reside in phase-local heap memory.
ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+ // Tracks vector operations that are inserted outside of the loop (preheader, exit)
+ // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops
+ // for loop reductions).
+ ScopedArenaSet<HInstruction*>* vector_external_set_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 379e83e..b480884 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -84,7 +84,7 @@
/// CHECK-DAG: Add [<<I>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<PostLoopP:j\d+>> VecPredSetAll [<<TrueC>>] loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>,<<PostLoopP>>] loop:none
- /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>,<<PostLoopP>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>,{{j\d+}}] loop:none
//
/// CHECK-ELSE:
//