diff options
author | 2017-09-01 10:59:03 +0100 | |
---|---|---|
committer | 2017-09-07 21:29:41 +0100 | |
commit | f26bb6c74a973fde3d2783ac35324d5ce8def814 (patch) | |
tree | 70149908a20503dfaf1276d04d561024f3441c6f /runtime/base/bit_utils.h | |
parent | 66e3af9ce5b3aaa43e5ce3bce8233235af139072 (diff) |
ARM64: Tune SIMD loop unrolling factor heuristic.
Improve SIMD loop unrolling factor heuristic for ARM64 by
accounting for max desired loop size, trip_count, etc. The
following example shows 21% perf increase:
for (int i = 0; i < LENGTH; i++) {
bc[i] = ba[i]; // Byte arrays
}
Test: test-art-host, test-art-target.
Change-Id: Ic587759c51aa4354df621ffb1c7ce4ebd798dfc1
Diffstat (limited to 'runtime/base/bit_utils.h')
-rw-r--r-- | runtime/base/bit_utils.h | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h index 0844678b74..87dac0261e 100644 --- a/runtime/base/bit_utils.h +++ b/runtime/base/bit_utils.h @@ -127,6 +127,14 @@ constexpr T RoundUpToPowerOfTwo(T x) { return (x < 2u) ? x : static_cast<T>(1u) << (std::numeric_limits<T>::digits - CLZ(x - 1u)); } +// Return highest possible N - a power of two - such that val >= N. +template <typename T> +constexpr T TruncToPowerOfTwo(T val) { + static_assert(std::is_integral<T>::value, "T must be integral"); + static_assert(std::is_unsigned<T>::value, "T must be unsigned"); + return (val != 0) ? static_cast<T>(1u) << (BitSizeOf<T>() - CLZ(val) - 1u) : 0; +} + template<typename T> constexpr bool IsPowerOfTwo(T x) { static_assert(std::is_integral<T>::value, "T must be integral"); |