summaryrefslogtreecommitdiff
path: root/compiler/optimizing/nodes_vector.h
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2017-09-01 13:06:08 -0700
committer Aart Bik <ajcbik@google.com> 2017-09-21 10:20:55 -0700
commitdbbac8f812a866b1b53f3007721f66038d208549 (patch)
tree05cecd927afccd33fc1c14b39ada47e86873f560 /compiler/optimizing/nodes_vector.h
parent2406bf17998e15bd40677a907beb3e9c41facce4 (diff)
Implement Sum-of-Abs-Differences idiom recognition.
Rationale: Currently just on ARM64 (x86 lacks proper support), using the SAD idiom yields great speedup on loops that compute the sum-of-abs-difference operation. Also includes some refinements around type conversions. Speedup ExoPlayerAudio (golem run): 1.3x on ARM64 1.1x on x86 Test: test-art-host test-art-target Bug: 64091002 Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
Diffstat (limited to 'compiler/optimizing/nodes_vector.h')
-rw-r--r--compiler/optimizing/nodes_vector.h59
1 files changed, 45 insertions, 14 deletions
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index c5e75a7ca4..1488b7086a 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -461,8 +461,8 @@ class HVecAdd FINAL : public HVecBinaryOperation {
};
// Performs halving add on every component in the two vectors, viz.
-// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
-// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
class HVecHalvingAdd FINAL : public HVecBinaryOperation {
public:
@@ -810,8 +810,8 @@ class HVecUShr FINAL : public HVecBinaryOperation {
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
-// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
+// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m,
+// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n.
class HVecSetScalars FINAL : public HVecOperation {
public:
HVecSetScalars(ArenaAllocator* arena,
@@ -842,9 +842,8 @@ class HVecSetScalars FINAL : public HVecOperation {
DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
};
-// Multiplies every component in the two vectors, adds the result vector to the accumulator vector.
-// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] =
-// [ acc1 + x1 * y1, .. , accn + xn * yn ].
+// Multiplies every component in the two vectors, adds the result vector to the accumulator vector,
+// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ].
class HVecMultiplyAccumulate FINAL : public HVecOperation {
public:
HVecMultiplyAccumulate(ArenaAllocator* arena,
@@ -866,15 +865,11 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
DCHECK(HasConsistentPackedTypes(mul_left, packed_type));
DCHECK(HasConsistentPackedTypes(mul_right, packed_type));
- SetRawInputAt(kInputAccumulatorIndex, accumulator);
- SetRawInputAt(kInputMulLeftIndex, mul_left);
- SetRawInputAt(kInputMulRightIndex, mul_right);
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, mul_left);
+ SetRawInputAt(2, mul_right);
}
- static constexpr int kInputAccumulatorIndex = 0;
- static constexpr int kInputMulLeftIndex = 1;
- static constexpr int kInputMulRightIndex = 2;
-
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
@@ -894,6 +889,42 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate);
};
+// Takes the absolute difference of two vectors, and adds the results to
+// same-precision or wider-precision components in the accumulator,
+// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ] =
+// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ],
+// for m <= n and non-overlapping sums.
+class HVecSADAccumulate FINAL : public HVecOperation {
+ public:
+ HVecSADAccumulate(ArenaAllocator* arena,
+ HInstruction* accumulator,
+ HInstruction* sad_left,
+ HInstruction* sad_right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs */ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
+ DCHECK(sad_left->IsVecOperation());
+ DCHECK(sad_right->IsVecOperation());
+ DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(),
+ sad_right->AsVecOperation()->GetPackedType());
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, sad_left);
+ SetRawInputAt(2, sad_right);
+ }
+
+ DECLARE_INSTRUCTION(VecSADAccumulate);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSADAccumulate);
+};
+
// Loads a vector from memory, viz. load(mem, 1)
// yield the vector [ mem(1), .. , mem(n) ].
class HVecLoad FINAL : public HVecMemoryOperation {