From f26bb6c74a973fde3d2783ac35324d5ce8def814 Mon Sep 17 00:00:00 2001 From: Artem Serov Date: Fri, 1 Sep 2017 10:59:03 +0100 Subject: ARM64: Tune SIMD loop unrolling factor heuristic. Improve SIMD loop unrolling factor heuristic for ARM64 by accounting for max desired loop size, trip_count, etc. The following example shows 21% perf increase: for (int i = 0; i < LENGTH; i++) { bc[i] = ba[i]; // Byte arrays } Test: test-art-host, test-art-target. Change-Id: Ic587759c51aa4354df621ffb1c7ce4ebd798dfc1 --- runtime/base/bit_utils.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'runtime/base/bit_utils.h') diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h index 0844678b74..87dac0261e 100644 --- a/runtime/base/bit_utils.h +++ b/runtime/base/bit_utils.h @@ -127,6 +127,14 @@ constexpr T RoundUpToPowerOfTwo(T x) { return (x < 2u) ? x : static_cast(1u) << (std::numeric_limits::digits - CLZ(x - 1u)); } +// Return highest possible N - a power of two - such that val >= N. +template +constexpr T TruncToPowerOfTwo(T val) { + static_assert(std::is_integral::value, "T must be integral"); + static_assert(std::is_unsigned::value, "T must be unsigned"); + return (val != 0) ? static_cast(1u) << (BitSizeOf() - CLZ(val) - 1u) : 0; +} + template constexpr bool IsPowerOfTwo(T x) { static_assert(std::is_integral::value, "T must be integral"); -- cgit v1.2.3-59-g8ed1b