Refactor and optimize memory region bit functions
Move optimized bit reading from FieldEncoding to MemoryRegion,
added optimized StoreBits to MemoryRegion.
Compilation of a large app on host:
Before:
Time -j1: 31.897s
2.00% art::MemoryRegion::StoreBits(unsigned long, unsigned int, unsigned long)
After:
Time -j1: 29.620s
0.39% art::MemoryRegion::StoreBits(unsigned long, unsigned int, unsigned long)
Bug: 34621054
Test: test-art-host
Change-Id: I0509613da83cc5741d5cfada3f8a8af503784e9e
diff --git a/runtime/memory_region.cc b/runtime/memory_region.cc
index a5c70c3..5bf0f40 100644
--- a/runtime/memory_region.cc
+++ b/runtime/memory_region.cc
@@ -33,4 +33,36 @@
from.pointer(), from.size());
}
+void MemoryRegion::StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
+ DCHECK_LE(value, MaxInt<uint32_t>(length));
+ DCHECK_LE(length, BitSizeOf<uint32_t>());
+ DCHECK_LE(bit_offset + length, size_in_bits());
+ if (length == 0) {
+ return;
+ }
+ // Bits are stored in this order {7 6 5 4 3 2 1 0}.
+ // How many remaining bits in current byte is (bit_offset % kBitsPerByte) + 1.
+ uint8_t* out = ComputeInternalPointer<uint8_t>(bit_offset >> kBitsPerByteLog2);
+ auto orig_len = length;
+ auto orig_value = value;
+ uintptr_t bit_remainder = bit_offset % kBitsPerByte;
+ while (true) {
+ const uintptr_t remaining_bits = kBitsPerByte - bit_remainder;
+ if (length <= remaining_bits) {
+ // Length is smaller than all of remainder bits.
+ size_t mask = ((1 << length) - 1) << bit_remainder;
+ *out = (*out & ~mask) | (value << bit_remainder);
+ break;
+ }
+ // Copy remaining bits in current byte.
+ size_t value_mask = (1 << remaining_bits) - 1;
+ *out = (*out & ~(value_mask << bit_remainder)) | ((value & value_mask) << bit_remainder);
+ value >>= remaining_bits;
+ bit_remainder = 0;
+ length -= remaining_bits;
+ ++out;
+ }
+ DCHECK_EQ(LoadBits(bit_offset, orig_len), orig_value) << bit_offset << " " << orig_len;
+}
+
} // namespace art
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index fe3f917..f55dff7 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -124,11 +124,35 @@
// The bit at the smallest offset is the least significant bit in the
// loaded value. `length` must not be larger than the number of bits
// contained in the return value (32).
- uint32_t LoadBits(uintptr_t bit_offset, size_t length) const {
- CHECK_LE(length, sizeof(uint32_t) * kBitsPerByte);
- uint32_t value = 0u;
+ ALWAYS_INLINE uint32_t LoadBits(uintptr_t bit_offset, size_t length) const {
+ DCHECK_LE(length, BitSizeOf<uint32_t>());
+ DCHECK_LE(bit_offset + length, size_in_bits());
+ if (UNLIKELY(length == 0)) {
+ // Do not touch any memory if the range is empty.
+ return 0;
+ }
+ const uint8_t* address = start() + bit_offset / kBitsPerByte;
+ const uint32_t shift = bit_offset & (kBitsPerByte - 1);
+ // Load the value (reading only the strictly needed bytes).
+ const uint32_t load_bit_count = shift + length;
+ uint32_t value = address[0] >> shift;
+ if (load_bit_count > 8) {
+ value |= static_cast<uint32_t>(address[1]) << (8 - shift);
+ if (load_bit_count > 16) {
+ value |= static_cast<uint32_t>(address[2]) << (16 - shift);
+ if (load_bit_count > 24) {
+ value |= static_cast<uint32_t>(address[3]) << (24 - shift);
+ if (load_bit_count > 32) {
+ value |= static_cast<uint32_t>(address[4]) << (32 - shift);
+ }
+ }
+ }
+ }
+ // Clear unwanted most significant bits.
+ uint32_t clear_bit_count = BitSizeOf(value) - length;
+ value = (value << clear_bit_count) >> clear_bit_count;
for (size_t i = 0; i < length; ++i) {
- value |= LoadBit(bit_offset + i) << i;
+ DCHECK_EQ((value >> i) & 1, LoadBit(bit_offset + i));
}
return value;
}
@@ -137,13 +161,7 @@
// `bit_offset`. The bit at the smallest offset is the least significant
// bit of the stored `value`. `value` must not be larger than `length`
// bits.
- void StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
- CHECK_LE(value, MaxInt<uint32_t>(length));
- for (size_t i = 0; i < length; ++i) {
- bool ith_bit = value & (1 << i);
- StoreBit(bit_offset + i, ith_bit);
- }
- }
+ void StoreBits(uintptr_t bit_offset, uint32_t value, size_t length);
void CopyFrom(size_t offset, const MemoryRegion& from) const;
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index cd9a3f0..5782521 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -667,32 +667,7 @@
ALWAYS_INLINE int32_t Load(const MemoryRegion& region) const {
DCHECK_LE(end_offset_, region.size_in_bits());
- const size_t bit_count = BitSize();
- if (bit_count == 0) {
- // Do not touch any memory if the range is empty.
- return min_value_;
- }
- uint8_t* address = region.start() + start_offset_ / kBitsPerByte;
- const uint32_t shift = start_offset_ & (kBitsPerByte - 1);
- // Load the value (reading only the strictly needed bytes).
- const uint32_t load_bit_count = shift + bit_count;
- uint32_t value = *address++ >> shift;
- if (load_bit_count > 8) {
- value |= static_cast<uint32_t>(*address++) << (8 - shift);
- if (load_bit_count > 16) {
- value |= static_cast<uint32_t>(*address++) << (16 - shift);
- if (load_bit_count > 24) {
- value |= static_cast<uint32_t>(*address++) << (24 - shift);
- if (load_bit_count > 32) {
- value |= static_cast<uint32_t>(*address++) << (32 - shift);
- }
- }
- }
- }
- // Clear unwanted most significant bits.
- uint32_t clear_bit_count = 32 - bit_count;
- value = (value << clear_bit_count) >> clear_bit_count;
- return value + min_value_;
+ return static_cast<int32_t>(region.LoadBits(start_offset_, BitSize())) + min_value_;
}
ALWAYS_INLINE void Store(MemoryRegion region, int32_t value) const {