summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
author Artem Serov <artem.serov@arm.com> 2022-09-13 23:05:17 +0100
committer Santiago Aboy Solanes <solanes@google.com> 2023-06-09 15:41:40 +0000
commita3610a1a121e2ec2b4130dba3b9878fab3cfd192 (patch)
tree9547ff225ef7fea6d8aee14d6efc2574693b00da /compiler
parentfbf26acf186d4809045652a4d21996913c04388f (diff)
Refactor vectorization data flow methods.
As part of vectorization some instructions are inserted outside the loop (preheader, exit); in predicated mode they must be assigned a governing predicate. Currently the corresponding data flow analysis and generation methods have multiple patchy 'InPredicatedMode()' guarded actions for this. This patch introduces vector_external_set_ to track such instructions; in predicated mode it updates the governing predicates separately using that set, making the methods agnostic to vectorization type - predicated or traditional. It also properly guards ArrayCharAt case for predicated mode via vector restrictions and renames a few variables for better readability. Original author: Artem Serov <Artem.Serov@linaro.org> Test: ./art/test/testrunner/testrunner.py --host --optimizing --jit Test: ./art/test/testrunner/testrunner.py --target --optimizing --jit Test: target tests on arm64 with SVE (for details see art/test/README.arm_fvp). Change-Id: I7fba731e6f4e8dd5cd4490cdfc95cb4ae8b2e99e
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/loop_optimization.cc82
-rw-r--r--compiler/optimizing/loop_optimization.h5
2 files changed, 47 insertions, 40 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index d5e34634c8..716ea19657 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -482,6 +482,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_runtime_test_b_(nullptr),
vector_map_(nullptr),
vector_permanent_map_(nullptr),
+ vector_external_set_(nullptr),
vector_mode_(kSequential),
vector_preheader_(nullptr),
vector_header_(nullptr),
@@ -542,12 +543,14 @@ bool HLoopOptimization::LocalRun() {
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = &map;
vector_permanent_map_ = &perm;
+ vector_external_set_ = &ext_set;
// Traverse.
const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_);
// Detach.
@@ -556,6 +559,8 @@ bool HLoopOptimization::LocalRun() {
vector_refs_ = nullptr;
vector_map_ = nullptr;
vector_permanent_map_ = nullptr;
+ vector_external_set_ = nullptr;
+
return did_loop_opt;
}
@@ -1290,6 +1295,23 @@ void HLoopOptimization::Vectorize(LoopNode* node,
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
+
+ if (IsInPredicatedVectorizationMode()) {
+ // Assigns governing predicates (all true) to the vector operations inserted outside the loop.
+ //
+ // TODO: Adjust GVN to support VecPredSetAll sharing.
+ for (auto it : *vector_external_set_) {
+ HVecOperation* vec_op = it->AsVecOperation();
+ HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
+ graph_->GetIntConstant(1),
+ vec_op->GetPackedType(),
+ vec_op->GetVectorLength(),
+ 0u);
+ vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op);
+ vec_op->SetMergingGoverningPredicate(set_pred);
+ }
+ }
+
while (!header->GetFirstInstruction()->IsGoto()) {
header->RemoveInstruction(header->GetFirstInstruction());
}
@@ -1353,6 +1375,8 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
vector_permanent_map_->clear(); // preserved over unrolling
+ vector_external_set_->clear();
+
for (uint32_t u = 0; u < unroll; u++) {
// Generate instruction map.
vector_map_->clear();
@@ -1442,10 +1466,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
(TrySetVectorType(type, &restrictions) &&
VectorizeUse(node, instruction, generate_code, type, restrictions))) {
+ DCHECK(!instruction->IsPhi());
if (generate_code) {
- HInstruction* new_red = vector_map_->Get(instruction);
- vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
- vector_permanent_map_->Overwrite(redit->second, new_red);
+ HInstruction* new_red_vec_op = vector_map_->Get(instruction);
+ HInstruction* original_phi = redit->second;
+ DCHECK(original_phi->IsPhi());
+ vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi));
+ vector_permanent_map_->Overwrite(original_phi, new_red_vec_op);
}
return true;
}
@@ -1485,9 +1512,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
// Deal with vector restrictions.
bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
- if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) ||
- IsInPredicatedVectorizationMode())) {
- // TODO: Support CharAt for predicated mode.
+ if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) {
return false;
}
// Accept a right-hand-side array base[index] for
@@ -1712,6 +1737,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kUint16:
case DataType::Type::kInt16:
*restrictions |= kNoDiv |
+ kNoStringCharAt | // TODO: support in predicated mode.
kNoSignedHAdd |
kNoUnsignedHAdd |
kNoUnroundedHAdd |
@@ -1855,15 +1881,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) {
vector = new (global_allocator_)
HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length_,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, vector);
- vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(vector);
}
vector_map_->Put(org, vector);
}
@@ -1936,18 +1954,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
vector_map_->Put(org, vector);
}
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
- DCHECK(reductions_->find(phi) != reductions_->end());
- DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) {
+ DCHECK(reductions_->find(orig_phi) != reductions_->end());
+ DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi);
HInstruction* vector = nullptr;
if (vector_mode_ == kSequential) {
HPhi* new_phi = new (global_allocator_) HPhi(
- global_allocator_, kNoRegNumber, 0, phi->GetType());
+ global_allocator_, kNoRegNumber, 0, orig_phi->GetType());
vector_header_->AddPhi(new_phi);
vector = new_phi;
} else {
// Link vector reduction back to prior unrolled update, or a first phi.
- auto it = vector_permanent_map_->find(phi);
+ auto it = vector_permanent_map_->find(orig_phi);
if (it != vector_permanent_map_->end()) {
vector = it->second;
} else {
@@ -1957,7 +1975,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
vector = new_phi;
}
}
- vector_map_->Put(phi, vector);
+ vector_map_->Put(orig_phi, vector);
}
void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
@@ -1992,15 +2010,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r
vector_length,
kNoDexPc));
}
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- vector_preheader_->InsertInstructionBefore(set_pred, new_init);
- new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(new_init);
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -2029,20 +2039,12 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
HVecReduce* reduce = new (global_allocator_) HVecReduce(
global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ vector_external_set_->insert(reduce);
instruction = new (global_allocator_) HVecExtractScalar(
global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
exit->InsertInstructionAfter(instruction, reduce);
- if (IsInPredicatedVectorizationMode()) {
- HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
- graph_->GetIntConstant(1),
- type,
- vector_length,
- 0u);
- exit->InsertInstructionBefore(set_pred, reduce);
- reduce->SetMergingGoverningPredicate(set_pred);
- instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
- }
+ vector_external_set_->insert(instruction);
}
}
return instruction;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6dd778ba74..27afc07f4e 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -369,6 +369,11 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+ // Tracks vector operations that are inserted outside of the loop (preheader, exit)
+ // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops
+ // for loop reductions).
+ ScopedArenaSet<HInstruction*>* vector_external_set_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop