Stack maps: Micro-optimize ReadInterleavedVarints
Doing both operations in the same loop seems to trigger
some pathological case in clang's register allocator.
Splitting the loops fixes it.
This speeds up CodeInfo by 5%, and maps startup by 0.05%.
Test: test.py -b --host --64 --optimizing
Change-Id: If0b88373069a755a82f0c9969a1f461f223fb62f
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 9f0d546..8532a64 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -261,8 +261,13 @@
// StackMap BitTable uses over 8 varints in the header, so we need uint64_t.
uint64_t data = ReadBits<uint64_t>(N * kVarintBits);
for (size_t i = 0; i < N; i++) {
- uint32_t x = BitFieldExtract(data, i * kVarintBits, kVarintBits);
- values[i] = LIKELY(x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte);
+ values[i] = BitFieldExtract(data, i * kVarintBits, kVarintBits);
+ }
+ // Do the second part in its own loop as that seems to produce better code in clang.
+ for (size_t i = 0; i < N; i++) {
+ if (UNLIKELY(values[i] > kVarintMax)) {
+ values[i] = ReadBits((values[i] - kVarintMax) * kBitsPerByte);
+ }
}
return values;
}