diff options
Diffstat (limited to 'src/native/java_lang_System.cc')
-rw-r--r-- | src/native/java_lang_System.cc | 142 |
1 files changed, 102 insertions, 40 deletions
diff --git a/src/native/java_lang_System.cc b/src/native/java_lang_System.cc index 79614aeab4..5572623a0c 100644 --- a/src/native/java_lang_System.cc +++ b/src/native/java_lang_System.cc @@ -48,61 +48,123 @@ * appropriately for the element type, and that n is a multiple of the * element size. */ -#ifdef __BIONIC__ -#define HAVE_MEMMOVE_WORDS -#endif - -#ifdef HAVE_MEMMOVE_WORDS -extern "C" void _memmove_words(void* dst, const void* src, size_t n); -#define move16 _memmove_words -#define move32 _memmove_words -#else -static void move16(void* dst, const void* src, size_t n) { - DCHECK_EQ((((uintptr_t) dst | (uintptr_t) src | n) & 0x01), 0U); - uint16_t* d = reinterpret_cast<uint16_t*>(dst); - const uint16_t* s = reinterpret_cast<const uint16_t*>(src); +/* + * Works like memmove(), except: + * - if all arguments are at least 32-bit aligned, we guarantee that we + * will use operations that preserve atomicity of 32-bit values + * - if not, we guarantee atomicity of 16-bit values + * + * If all three arguments are not at least 16-bit aligned, the behavior + * of this function is undefined. (We could remove this restriction by + * testing for unaligned values and punting to memmove(), but that's + * not currently useful.) + * + * TODO: add loop for 64-bit alignment + * TODO: use __builtin_prefetch + * TODO: write ARM/MIPS/x86 optimized versions + */ +void MemmoveWords(void* dst, const void* src, size_t n) { + DCHECK_EQ((((uintptr_t) dst | (uintptr_t) src | n) & 0x01), 0U); - n /= sizeof(uint16_t); + char* d = reinterpret_cast<char*>(dst); + const char* s = reinterpret_cast<const char*>(src); + size_t copyCount; - if (d < s) { - // Copy forwards. - while (n--) { - *d++ = *s++; - } - } else { - // Copy backwards. - d += n; - s += n; - while (n--) { - *--d = *--s; - } + // If the source and destination pointers are the same, this is + // an expensive no-op. Testing for an empty move now allows us + // to skip a check later. + if (n == 0 || d == s) { + return; } -} -static void move32(void* dst, const void* src, size_t n) { - DCHECK_EQ((((uintptr_t) dst | (uintptr_t) src | n) & 0x03), 0U); + // Determine if the source and destination buffers will overlap if + // we copy data forward (i.e. *dst++ = *src++). + // + // It's okay if the destination buffer starts before the source and + // there is some overlap, because the reader is always ahead of the + // writer. + if (LIKELY((d < s) || ((size_t)(d - s) >= n))) { + // Copy forward. We prefer 32-bit loads and stores even for 16-bit + // data, so sort that out. + if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) { + // Not 32-bit aligned. Two possibilities: + // (1) Congruent, we can align to 32-bit by copying one 16-bit val + // (2) Non-congruent, we can do one of: + // a. copy whole buffer as a series of 16-bit values + // b. load/store 32 bits, using shifts to ensure alignment + // c. just copy the as 32-bit values and assume the CPU + // will do a reasonable job + // + // We're currently using (a), which is suboptimal. + if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) { + copyCount = n; + } else { + copyCount = 2; + } + n -= copyCount; + copyCount /= sizeof(uint16_t); - uint32_t* d = reinterpret_cast<uint32_t*>(dst); - const uint32_t* s = reinterpret_cast<const uint32_t*>(src); + while (copyCount--) { + *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s); + d += sizeof(uint16_t); + s += sizeof(uint16_t); + } + } - n /= sizeof(uint32_t); + // Copy 32-bit aligned words. + copyCount = n / sizeof(uint32_t); + while (copyCount--) { + *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s); + d += sizeof(uint32_t); + s += sizeof(uint32_t); + } - if (d < s) { - // Copy forwards. - while (n--) { - *d++ = *s++; + // Check for leftovers. Either we finished exactly, or we have one remaining 16-bit chunk. + if ((n & 0x02) != 0) { + *(uint16_t*)d = *(uint16_t*)s; } } else { - // Copy backwards. + // Copy backward, starting at the end. d += n; s += n; - while (n--) { - *--d = *--s; + + if (((reinterpret_cast<uintptr_t>(d) | reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) { + // try for 32-bit alignment. + if (((reinterpret_cast<uintptr_t>(d) ^ reinterpret_cast<uintptr_t>(s)) & 0x03) != 0) { + copyCount = n; + } else { + copyCount = 2; + } + n -= copyCount; + copyCount /= sizeof(uint16_t); + + while (copyCount--) { + d -= sizeof(uint16_t); + s -= sizeof(uint16_t); + *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s); + } + } + + // Copy 32-bit aligned words. + copyCount = n / sizeof(uint32_t); + while (copyCount--) { + d -= sizeof(uint32_t); + s -= sizeof(uint32_t); + *reinterpret_cast<uint32_t*>(d) = *reinterpret_cast<const uint32_t*>(s); + } + + // Copy leftovers. + if ((n & 0x02) != 0) { + d -= sizeof(uint16_t); + s -= sizeof(uint16_t); + *reinterpret_cast<uint16_t*>(d) = *reinterpret_cast<const uint16_t*>(s); } } } -#endif // HAVE_MEMMOVE_WORDS + +#define move16 MemmoveWords +#define move32 MemmoveWords namespace art { |