summaryrefslogtreecommitdiff
path: root/compiler/optimizing/loop_optimization.cc
diff options
context:
space:
mode:
author Chris Jones <christopher.jones@arm.com> 2024-06-03 16:25:38 +0100
committer Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com> 2024-11-06 10:48:31 +0000
commit740ae3479b54495d2dda92a000107325b08faf35 (patch)
tree66db8e0034b1d901988d2b4c67371f238bad88b7 /compiler/optimizing/loop_optimization.cc
parent8cec104ae64bd45e1377d799ac4653fbce7fb631 (diff)
Support all conditions in predicated vectorization
Support all condition types inside the condition when performing diamond loop auto-vectorization. This allows diamond loop auto-vectorization to be performed on a greater variety of loops. To support this change, new vector condition nodes are added to mirror the scalar condition nodes. Also add a new gtest class to test whether predicated vectorization can be performed on different combinations of condition types and data types. Authors: Chris Jones <christopher.jones@arm.com>, Konstantin Baladurin <konstantin.baladurin@arm.com> Test: export ART_FORCE_TRY_PREDICATED_SIMD=true && \ art/test.py --target --optimizing Test: art/test.py --target --host --optimizing Test: 661-checker-simd-cf-loops Test: art/test.py --gtest art_compiler_tests Change-Id: Ic9c925f1a58ada13d9031de3b445dcd4f77764b7
Diffstat (limited to 'compiler/optimizing/loop_optimization.cc')
-rw-r--r--compiler/optimizing/loop_optimization.cc65
1 files changed, 48 insertions, 17 deletions
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 215986910b..2f1aea68aa 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -875,13 +875,6 @@ static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) {
return exit;
}
-// Determines whether predicated loop vectorization should be tried for ALL loops.
-#ifdef ART_FORCE_TRY_PREDICATED_SIMD
- static constexpr bool kForceTryPredicatedSIMD = true;
-#else
- static constexpr bool kForceTryPredicatedSIMD = false;
-#endif
-
bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -2049,7 +2042,6 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case InstructionSet::kArm64:
if (IsInPredicatedVectorizationMode()) {
// SVE vectorization.
- CHECK(features->AsArm64InstructionSetFeatures()->HasSVE());
size_t vector_length = simd_register_size_ / DataType::Size(type);
DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
switch (type) {
@@ -2396,6 +2388,13 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
} \
break;
+// Some instructions in the scalar loop body can only occur in loops with control flow; for such
+// loops we don't support clean ups loop (generated via kSequential); see TryVectorizePredicated.
+#define GENERATE_PRED_VEC(x) \
+ DCHECK_EQ(synthesis_mode_, LoopSynthesisMode::kVector); \
+ vector = (x); \
+ break;
+
HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -2469,13 +2468,46 @@ HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HAbs(org_type, opa, dex_pc));
- case HInstruction::kEqual: {
- // Special case.
- DCHECK_EQ(synthesis_mode_, LoopSynthesisMode::kVector);
- vector = new (global_allocator_)
- HVecCondition(global_allocator_, opa, opb, type, vector_length_, dex_pc);
- }
- break;
+ case HInstruction::kEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kNotEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecNotEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kLessThan:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecLessThan(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kLessThanOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecLessThanOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kGreaterThan:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecGreaterThan(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kGreaterThanOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecGreaterThanOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kBelow:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecBelow(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kBelowOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecBelowOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kAbove:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecAbove(global_allocator_, opa, opb, type, vector_length_, dex_pc));
+ case HInstruction::kAboveOrEqual:
+ GENERATE_PRED_VEC(
+ new (global_allocator_)
+ HVecAboveOrEqual(global_allocator_, opa, opb, type, vector_length_, dex_pc));
default:
break;
} // switch
@@ -2733,8 +2765,7 @@ bool HLoopOptimization::VectorizeIfCondition(LoopNode* node,
return false;
}
- if (!if_input->IsEqual()) {
- // TODO: Support other condition types.
+ if (!if_input->IsCondition()) {
return false;
}