ART: Introduce predicated vector instructions.
This CL introduces a minimal changes to the IR to support
autovectorization with use of predicated execution of SIMD
instructions (e.g. Arm SVE).
Test: test-art-target, test-art-host.
Change-Id: Ibb7c5520fec6b858fb29f0dde19ec65501831a3a
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index e817048..9c6b422 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -87,9 +87,64 @@
kArenaAllocVectorNode),
vector_length_(vector_length) {
SetPackedField<PackedTypeField>(packed_type);
+ // By default vector operations are not predicated.
+ SetPackedField<PredicationKindField>(PredicationKind::kNotPredicated);
DCHECK_LT(1u, vector_length);
}
+ // Predicated instructions execute a corresponding operation only on vector elements which are
+ // active (governing predicate is true for that element); the following modes determine what
+ // is happening with inactive elements.
+ //
+ // See HVecPredSetOperation.
+ enum class PredicationKind {
+ kNotPredicated, // Instruction doesn't take any predicate as an input.
+ kZeroingForm, // Inactive elements are reset to zero.
+ kMergingForm, // Inactive elements keep their value.
+ kLast = kMergingForm,
+ };
+
+ PredicationKind GetPredicationKind() const { return GetPackedField<PredicationKindField>(); }
+
+ // Returns whether the vector operation must be predicated in predicated SIMD mode
+ // (see CodeGenerator::SupportsPredicatedSIMD). The method reflects semantics of
+ // the instruction class rather than the state of a particular instruction instance.
+ //
+ // This property is introduced for robustness purpose - to maintain and check the invariant:
+ // all instructions of the same vector operation class must be either all predicated or all
+ // not predicated (depending on the predicated SIMD support) in a correct graph.
+ virtual bool MustBePredicatedInPredicatedSIMDMode() {
+ return true;
+ }
+
+ bool IsPredicated() const {
+ return GetPredicationKind() != PredicationKind::kNotPredicated;
+ }
+
+ // See HVecPredSetOperation.
+ void SetGoverningPredicate(HInstruction* input, PredicationKind pred_kind) {
+ DCHECK(!IsPredicated());
+ DCHECK(input->IsVecPredSetOperation());
+ AddInput(input);
+ SetPackedField<PredicationKindField>(pred_kind);
+ DCHECK(IsPredicated());
+ }
+
+ void SetMergingGoverningPredicate(HInstruction* input) {
+ SetGoverningPredicate(input, PredicationKind::kMergingForm);
+ }
+ void SetZeroingGoverningPredicate(HInstruction* input) {
+ SetGoverningPredicate(input, PredicationKind::kZeroingForm);
+ }
+
+ // See HVecPredSetOperation.
+ HVecPredSetOperation* GetGoverningPredicate() const {
+ DCHECK(IsPredicated());
+ HInstruction* pred_input = InputAt(InputCount() - 1);
+ DCHECK(pred_input->IsVecPredSetOperation());
+ return pred_input->AsVecPredSetOperation();
+ }
+
// Returns the number of elements packed in a vector.
size_t GetVectorLength() const {
return vector_length_;
@@ -181,12 +236,16 @@
protected:
// Additional packed bits.
- static constexpr size_t kFieldPackedType = HInstruction::kNumberOfGenericPackedBits;
+ static constexpr size_t kPredicationKind = HInstruction::kNumberOfGenericPackedBits;
+ static constexpr size_t kPredicationKindSize =
+ MinimumBitsToStore(static_cast<size_t>(PredicationKind::kLast));
+ static constexpr size_t kFieldPackedType = kPredicationKind + kPredicationKindSize;
static constexpr size_t kFieldPackedTypeSize =
MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
static constexpr size_t kNumberOfVectorOpPackedBits = kFieldPackedType + kFieldPackedTypeSize;
static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using PackedTypeField = BitField<DataType::Type, kFieldPackedType, kFieldPackedTypeSize>;
+ using PredicationKindField = BitField<PredicationKind, kPredicationKind, kPredicationKindSize>;
DEFAULT_COPY_CONSTRUCTOR(VecOperation);
@@ -1163,6 +1222,237 @@
DEFAULT_COPY_CONSTRUCTOR(VecStore)
};
+//
+// 'Predicate-setting' instructions.
+//
+
+// An abstract class for instructions for which the output value is a vector predicate -
+// a special kind of vector value:
+//
+// viz. [ p1, .. , pn ], where p_i is from { 0, 1 }.
+//
+// A VecOperation OP executes the same operation (e.g. ADD) on multiple elements of the vector.
+// It can be either unpredicated (operation is done on ALL of the elements) or predicated (only
+// on SOME elements, determined by a special extra input - vector predicate).
+// Implementations can vary depending on the ISA; the general idea is that for each element of the
+// regular vector a vector predicate has a corresponding element with either 0 or 1.
+// The value determines whether a vector element will be involved in OP calculations or not
+// (active or inactive). A vector predicate is referred as governing one if it is used to
+// control the execution of a predicated instruction.
+//
+// Note: vector predicate value type is introduced alongside existing vectors of booleans and
+// vectors of bytes to reflect their special semantics.
+//
+// TODO: we could introduce SIMD types in HIR.
+class HVecPredSetOperation : public HVecOperation {
+ public:
+ // A vector predicate-setting operation looks like a Int64 location.
+ // TODO: we could introduce vector types in HIR.
+ static constexpr DataType::Type kSIMDPredType = DataType::Type::kInt64;
+
+ HVecPredSetOperation(InstructionKind kind,
+ ArenaAllocator* allocator,
+ DataType::Type packed_type,
+ SideEffects side_effects,
+ size_t number_of_inputs,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(kind,
+ allocator,
+ packed_type,
+ side_effects,
+ number_of_inputs,
+ vector_length,
+ dex_pc) {
+ // Overrides the kSIMDType set by the VecOperation constructor.
+ SetPackedField<TypeField>(kSIMDPredType);
+ }
+
+ bool CanBeMoved() const override { return true; }
+
+ DECLARE_ABSTRACT_INSTRUCTION(VecPredSetOperation);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecPredSetOperation);
+};
+
+// Sets all the vector predicate elements as active or inactive.
+//
+// viz. [ p1, .. , pn ] = [ val, .. , val ] where val is from { 1, 0 }.
+class HVecPredSetAll final : public HVecPredSetOperation {
+ public:
+ HVecPredSetAll(ArenaAllocator* allocator,
+ HInstruction* input,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecPredSetAll,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 1,
+ vector_length,
+ dex_pc) {
+ DCHECK(input->IsIntConstant());
+ SetRawInputAt(0, input);
+ MarkEmittedAtUseSite();
+ }
+
+ // Having governing predicate doesn't make sense for set all TRUE/FALSE instruction.
+ bool MustBePredicatedInPredicatedSIMDMode() override { return false; }
+
+ bool IsSetTrue() const { return InputAt(0)->AsIntConstant()->IsTrue(); }
+
+ // Vector predicates are not kept alive across vector loop boundaries.
+ bool CanBeMoved() const override { return false; }
+
+ DECLARE_INSTRUCTION(VecPredSetAll);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecPredSetAll);
+};
+
+//
+// Arm64 SVE-specific instructions.
+//
+// Classes of instructions which are specific to Arm64 SVE (though could be adopted
+// by other targets, possibly being lowered to a number of ISA instructions) and
+// implement SIMD loop predicated execution idiom.
+//
+
+// Takes two scalar values x and y, creates a vector S: s(n) = x + n, compares (OP) each s(n)
+// with y and set the corresponding element of the predicate register to the result of the
+// comparison.
+//
+// viz. [ p1, .. , pn ] = [ x OP y , (x + 1) OP y, .. , (x + n) OP y ] where OP is CondKind
+// condition.
+class HVecPredWhile final : public HVecPredSetOperation {
+ public:
+ enum class CondKind {
+ kLE, // signed less than or equal.
+ kLO, // unsigned lower.
+ kLS, // unsigned lower or same.
+ kLT, // signed less.
+ kLast = kLT,
+ };
+
+ HVecPredWhile(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ CondKind cond,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc) :
+ HVecPredSetOperation(kVecPredWhile,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 2,
+ vector_length,
+ dex_pc) {
+ DCHECK(!left->IsVecOperation());
+ DCHECK(!left->IsVecPredSetOperation());
+ DCHECK(!right->IsVecOperation());
+ DCHECK(!right->IsVecPredSetOperation());
+ DCHECK(DataType::IsIntegralType(left->GetType()));
+ DCHECK(DataType::IsIntegralType(right->GetType()));
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ SetPackedField<CondKindField>(cond);
+ }
+
+ // This is a special loop control instruction which must not be predicated.
+ bool MustBePredicatedInPredicatedSIMDMode() override { return false; }
+
+ CondKind GetCondKind() const {
+ return GetPackedField<CondKindField>();
+ }
+
+ DECLARE_INSTRUCTION(VecPredWhile);
+
+ protected:
+ // Additional packed bits.
+ static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kCondKindSize =
+ MinimumBitsToStore(static_cast<size_t>(CondKind::kLast));
+ static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>;
+
+ DEFAULT_COPY_CONSTRUCTOR(VecPredWhile);
+};
+
+// Evaluates the predicate condition (PCondKind) for a vector predicate; outputs
+// a scalar boolean value result.
+//
+// Note: as VecPredCondition can be also predicated, only active elements (determined by the
+// instruction's governing predicate) of the input vector predicate are used for condition
+// evaluation.
+//
+// Note: this instruction is currently used as a workaround for the fact that IR instructions
+// can't have more than one output.
+class HVecPredCondition final : public HVecOperation {
+ public:
+ // To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of
+ // "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE),
+ // for ARMv8-A".
+ enum class PCondKind {
+ kNone, // No active elements were TRUE.
+ kAny, // An active element was TRUE.
+ kNLast, // The last active element was not TRUE.
+ kLast, // The last active element was TRUE.
+ kFirst, // The first active element was TRUE.
+ kNFirst, // The first active element was not TRUE.
+ kPMore, // An active element was TRUE but not the last active element.
+ kPLast, // The last active element was TRUE or no active elements were TRUE.
+ kEnumLast = kPLast
+ };
+
+ HVecPredCondition(ArenaAllocator* allocator,
+ HInstruction* input,
+ PCondKind pred_cond,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(kVecPredCondition,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs */ 1,
+ vector_length,
+ dex_pc) {
+ DCHECK(input->IsVecPredSetOperation());
+ SetRawInputAt(0, input);
+ // Overrides the kSIMDType set by the VecOperation constructor.
+ SetPackedField<TypeField>(DataType::Type::kBool);
+ SetPackedField<CondKindField>(pred_cond);
+ }
+
+ // This instruction is currently used only as a special loop control instruction
+ // which must not be predicated.
+ // TODO: Remove the constraint.
+ bool MustBePredicatedInPredicatedSIMDMode() override { return false; }
+
+ PCondKind GetPCondKind() const {
+ return GetPackedField<CondKindField>();
+ }
+
+ DECLARE_INSTRUCTION(VecPredCondition);
+
+ protected:
+ // Additional packed bits.
+ static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kCondKindSize =
+ MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast));
+ static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
+ static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>;
+
+ DEFAULT_COPY_CONSTRUCTOR(VecPredCondition);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_