Stack maps: Micro-optimize ReadInterleavedVarints

Doing both operations in the same loop seems to trigger
some pathological case in clang's register allocator.
Splitting the loops fixes it.

This speeds up CodeInfo by 5%, and maps startup by 0.05%.

Test: test.py -b --host --64 --optimizing
Change-Id: If0b88373069a755a82f0c9969a1f461f223fb62f
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 9f0d546..8532a64 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -261,8 +261,13 @@
     // StackMap BitTable uses over 8 varints in the header, so we need uint64_t.
     uint64_t data = ReadBits<uint64_t>(N * kVarintBits);
     for (size_t i = 0; i < N; i++) {
-      uint32_t x = BitFieldExtract(data, i * kVarintBits, kVarintBits);
-      values[i] = LIKELY(x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte);
+      values[i] = BitFieldExtract(data, i * kVarintBits, kVarintBits);
+    }
+    // Do the second part in its own loop as that seems to produce better code in clang.
+    for (size_t i = 0; i < N; i++) {
+      if (UNLIKELY(values[i] > kVarintMax)) {
+        values[i] = ReadBits((values[i] - kVarintMax) * kBitsPerByte);
+      }
     }
     return values;
   }