summaryrefslogtreecommitdiff
path: root/runtime/base/bit_utils.h
diff options
context:
space:
mode:
author Artem Serov <artem.serov@linaro.org> 2017-09-01 10:59:03 +0100
committer Artem Serov <artem.serov@linaro.org> 2017-09-07 21:29:41 +0100
commitf26bb6c74a973fde3d2783ac35324d5ce8def814 (patch)
tree70149908a20503dfaf1276d04d561024f3441c6f /runtime/base/bit_utils.h
parent66e3af9ce5b3aaa43e5ce3bce8233235af139072 (diff)
ARM64: Tune SIMD loop unrolling factor heuristic.
Improve SIMD loop unrolling factor heuristic for ARM64 by accounting for max desired loop size, trip_count, etc. The following example shows 21% perf increase: for (int i = 0; i < LENGTH; i++) { bc[i] = ba[i]; // Byte arrays } Test: test-art-host, test-art-target. Change-Id: Ic587759c51aa4354df621ffb1c7ce4ebd798dfc1
Diffstat (limited to 'runtime/base/bit_utils.h')
-rw-r--r--runtime/base/bit_utils.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index 0844678b74..87dac0261e 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -127,6 +127,14 @@ constexpr T RoundUpToPowerOfTwo(T x) {
return (x < 2u) ? x : static_cast<T>(1u) << (std::numeric_limits<T>::digits - CLZ(x - 1u));
}
+// Return highest possible N - a power of two - such that val >= N.
+template <typename T>
+constexpr T TruncToPowerOfTwo(T val) {
+ static_assert(std::is_integral<T>::value, "T must be integral");
+ static_assert(std::is_unsigned<T>::value, "T must be unsigned");
+ return (val != 0) ? static_cast<T>(1u) << (BitSizeOf<T>() - CLZ(val) - 1u) : 0;
+}
+
template<typename T>
constexpr bool IsPowerOfTwo(T x) {
static_assert(std::is_integral<T>::value, "T must be integral");