ART vectorizer.
Rationale:
Make SIMD great again with a retargetable and easily extendable vectorizer.
Provides a full x86/x86_64 and a proof-of-concept ARM implementation. Sample
improvement (without any perf tuning yet) for Linpack on x86 is about 20% to 50%.
Test: test-art-host, test-art-target (angler)
Bug: 34083438, 30933338
Change-Id: Ifb77a0f25f690a87cd65bf3d5e9f6be7ea71d6c1
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 5f5a28c..52a02c2 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -400,6 +400,12 @@
// put deoptimization instructions, etc.
void TransformLoopHeaderForBCE(HBasicBlock* header);
+ // Adds a new loop directly after the loop with the given header and exit.
+ // Returns the new preheader.
+ HBasicBlock* TransformLoopForVectorization(HBasicBlock* header,
+ HBasicBlock* body,
+ HBasicBlock* exit);
+
// Removes `block` from the graph. Assumes `block` has been disconnected from
// other blocks and has no instructions or phis.
void DeleteDeadEmptyBlock(HBasicBlock* block);
@@ -1363,6 +1369,25 @@
M(TypeConversion, Instruction) \
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
+ M(VecReplicateScalar, VecUnaryOperation) \
+ M(VecSetScalars, VecUnaryOperation) \
+ M(VecSumReduce, VecUnaryOperation) \
+ M(VecCnv, VecUnaryOperation) \
+ M(VecNeg, VecUnaryOperation) \
+ M(VecNot, VecUnaryOperation) \
+ M(VecAdd, VecBinaryOperation) \
+ M(VecSub, VecBinaryOperation) \
+ M(VecMul, VecBinaryOperation) \
+ M(VecDiv, VecBinaryOperation) \
+ M(VecAnd, VecBinaryOperation) \
+ M(VecAndNot, VecBinaryOperation) \
+ M(VecOr, VecBinaryOperation) \
+ M(VecXor, VecBinaryOperation) \
+ M(VecShl, VecBinaryOperation) \
+ M(VecShr, VecBinaryOperation) \
+ M(VecUShr, VecBinaryOperation) \
+ M(VecLoad, VecMemoryOperation) \
+ M(VecStore, VecMemoryOperation) \
/*
* Instructions, shared across several (not all) architectures.
@@ -1424,7 +1449,11 @@
M(Constant, Instruction) \
M(UnaryOperation, Instruction) \
M(BinaryOperation, Instruction) \
- M(Invoke, Instruction)
+ M(Invoke, Instruction) \
+ M(VecOperation, Instruction) \
+ M(VecUnaryOperation, VecOperation) \
+ M(VecBinaryOperation, VecOperation) \
+ M(VecMemoryOperation, VecOperation)
#define FOR_EACH_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION(M) \
@@ -6689,6 +6718,8 @@
} // namespace art
+#include "nodes_vector.h"
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
#include "nodes_shared.h"
#endif