Stack maps: Micro-optimize LoadBits
Avoid branching.
This speeds up CodeInfo by 5%, and maps startup by 0.05%.
Test: test.py -b --host --64 --optimizing
Change-Id: Ifb4418e1e6670d8f6c8901c8088d009a31844132
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 8532a64..e98da24 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -115,17 +115,22 @@
if (bit_length == 0) {
return 0;
}
+ // Load naturally-aligned value which contains the least significant bit.
Result* data = reinterpret_cast<Result*>(data_);
size_t width = BitSizeOf<Result>();
- Result clear = (std::numeric_limits<Result>::max() << 1) << (bit_length - 1);
size_t index = (bit_start_ + bit_offset) / width;
size_t shift = (bit_start_ + bit_offset) % width;
Result value = data[index] >> shift;
- size_t finished_bits = width - shift;
- if (finished_bits < bit_length) {
- value |= data[index + 1] << finished_bits;
- }
- return value & ~clear;
+ // Load extra value containing the most significant bit (it might be the same one).
+ // We can not just load the following value as that could potentially cause SIGSEGV.
+ Result extra = data[index + (shift + (bit_length - 1)) / width];
+ // Mask to clear unwanted bits (the 1s are needed to avoid avoid undefined shift).
+ Result clear = (std::numeric_limits<Result>::max() << 1) << (bit_length - 1);
+ // Prepend the extra value. We add explicit '& (width - 1)' so that the shift is defined.
+ // It is a no-op for `shift != 0` and if `shift == 0` then `value == extra` because of
+ // bit_length <= width causing the `value` and `extra` to be read from the same location.
+ // The '& (width - 1)' is implied by the shift instruction on ARM and removed by compiler.
+ return (value | (extra << ((width - shift) & (width - 1)))) & ~clear;
}
// Store `bit_length` bits in `data` starting at given `bit_offset`.