diff options
author | 2017-09-01 13:06:08 -0700 | |
---|---|---|
committer | 2017-09-21 10:20:55 -0700 | |
commit | dbbac8f812a866b1b53f3007721f66038d208549 (patch) | |
tree | 05cecd927afccd33fc1c14b39ada47e86873f560 /compiler/optimizing/nodes_vector.h | |
parent | 2406bf17998e15bd40677a907beb3e9c41facce4 (diff) |
Implement Sum-of-Abs-Differences idiom recognition.
Rationale:
Currently just on ARM64 (x86 lacks proper support),
using the SAD idiom yields great speedup on loops
that compute the sum-of-abs-difference operation.
Also includes some refinements around type conversions.
Speedup ExoPlayerAudio (golem run):
1.3x on ARM64
1.1x on x86
Test: test-art-host test-art-target
Bug: 64091002
Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
Diffstat (limited to 'compiler/optimizing/nodes_vector.h')
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 59 |
1 files changed, 45 insertions, 14 deletions
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c5e75a7ca4..1488b7086a 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -461,8 +461,8 @@ class HVecAdd FINAL : public HVecBinaryOperation { }; // Performs halving add on every component in the two vectors, viz. -// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] -// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] +// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] +// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] // for signed operands x, y (sign extension) or unsigned operands x, y (zero extension). class HVecHalvingAdd FINAL : public HVecBinaryOperation { public: @@ -810,8 +810,8 @@ class HVecUShr FINAL : public HVecBinaryOperation { // // Assigns the given scalar elements to a vector, -// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m, -// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n. +// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m, +// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n. class HVecSetScalars FINAL : public HVecOperation { public: HVecSetScalars(ArenaAllocator* arena, @@ -842,9 +842,8 @@ class HVecSetScalars FINAL : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecSetScalars); }; -// Multiplies every component in the two vectors, adds the result vector to the accumulator vector. -// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] = -// [ acc1 + x1 * y1, .. , accn + xn * yn ]. +// Multiplies every component in the two vectors, adds the result vector to the accumulator vector, +// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. class HVecMultiplyAccumulate FINAL : public HVecOperation { public: HVecMultiplyAccumulate(ArenaAllocator* arena, @@ -866,15 +865,11 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); - SetRawInputAt(kInputAccumulatorIndex, accumulator); - SetRawInputAt(kInputMulLeftIndex, mul_left); - SetRawInputAt(kInputMulRightIndex, mul_right); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, mul_left); + SetRawInputAt(2, mul_right); } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -894,6 +889,42 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate); }; +// Takes the absolute difference of two vectors, and adds the results to +// same-precision or wider-precision components in the accumulator, +// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ] = +// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ], +// for m <= n and non-overlapping sums. +class HVecSADAccumulate FINAL : public HVecOperation { + public: + HVecSADAccumulate(ArenaAllocator* arena, + HInstruction* accumulator, + HInstruction* sad_left, + HInstruction* sad_right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /* number_of_inputs */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(sad_left->IsVecOperation()); + DCHECK(sad_right->IsVecOperation()); + DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(), + sad_right->AsVecOperation()->GetPackedType()); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, sad_left); + SetRawInputAt(2, sad_right); + } + + DECLARE_INSTRUCTION(VecSADAccumulate); + + private: + DISALLOW_COPY_AND_ASSIGN(HVecSADAccumulate); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad FINAL : public HVecMemoryOperation { |