Revert "Stack maps: Interleave consecutive varints."
This reverts commit a2b34561a7faca95d0a4f8194ad155798e238e37.
Reason for revert: <INSERT REASONING HERE>
Change-Id: Ie5b220e429e101bb5fa2606665a9c8cb64308ad3
Bug: 135638469
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 87e15ba..87702cc 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -307,14 +307,12 @@
ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream));
BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer);
- out.WriteInterleavedVarints(std::array<uint32_t, CodeInfo::kNumHeaders>{
- flags,
- packed_frame_size_,
- core_spill_mask_,
- fp_spill_mask_,
- num_dex_registers_,
- bit_table_flags,
- });
+ out.WriteVarint(flags);
+ out.WriteVarint(packed_frame_size_);
+ out.WriteVarint(core_spill_mask_);
+ out.WriteVarint(fp_spill_mask_);
+ out.WriteVarint(num_dex_registers_);
+ out.WriteVarint(bit_table_flags);
ForEachBitTable([&out](size_t, auto bit_table) {
if (bit_table->size() != 0) { // Skip empty bit-tables.
bit_table->Encode(out);
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index f19bcf1..50d132e 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -22,8 +22,6 @@
#include "bit_utils.h"
#include "memory_tool.h"
-#include <array>
-
namespace art {
// Bit memory region is a bit offset subregion of a normal memoryregion. This is useful for
@@ -39,9 +37,11 @@
BitMemoryRegion() = default;
ALWAYS_INLINE BitMemoryRegion(uint8_t* data, ssize_t bit_start, size_t bit_size) {
// Normalize the data pointer. Note that bit_start may be negative.
- data_ = AlignDown(data + (bit_start >> kBitsPerByteLog2), kPageSize);
- bit_start_ = bit_start + kBitsPerByte * (data - data_);
+ uint8_t* aligned_data = AlignDown(data + (bit_start >> kBitsPerByteLog2), sizeof(uintptr_t));
+ data_ = reinterpret_cast<uintptr_t*>(aligned_data);
+ bit_start_ = bit_start + kBitsPerByte * (data - aligned_data);
bit_size_ = bit_size;
+ DCHECK_LT(bit_start_, static_cast<size_t>(kBitsPerIntPtrT));
}
ALWAYS_INLINE explicit BitMemoryRegion(MemoryRegion region)
: BitMemoryRegion(region.begin(), /* bit_start */ 0, region.size_in_bits()) {
@@ -55,7 +55,7 @@
const uint8_t* data() const {
DCHECK_ALIGNED(bit_start_, kBitsPerByte);
- return data_ + bit_start_ / kBitsPerByte;
+ return reinterpret_cast<const uint8_t*>(data_) + bit_start_ / kBitsPerByte;
}
size_t size_in_bits() const {
@@ -87,45 +87,42 @@
// significant bit in the first byte.
ALWAYS_INLINE bool LoadBit(size_t bit_offset) const {
DCHECK_LT(bit_offset, bit_size_);
+ uint8_t* data = reinterpret_cast<uint8_t*>(data_);
size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
- return ((data_[index] >> shift) & 1) != 0;
+ return ((data[index] >> shift) & 1) != 0;
}
ALWAYS_INLINE void StoreBit(size_t bit_offset, bool value) {
DCHECK_LT(bit_offset, bit_size_);
+ uint8_t* data = reinterpret_cast<uint8_t*>(data_);
size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
- data_[index] &= ~(1 << shift); // Clear bit.
- data_[index] |= (value ? 1 : 0) << shift; // Set bit.
+ data[index] &= ~(1 << shift); // Clear bit.
+ data[index] |= (value ? 1 : 0) << shift; // Set bit.
DCHECK_EQ(value, LoadBit(bit_offset));
}
// Load `bit_length` bits from `data` starting at given `bit_offset`.
// The least significant bit is stored in the smallest memory offset.
ATTRIBUTE_NO_SANITIZE_ADDRESS // We might touch extra bytes due to the alignment.
- template<typename Result = size_t>
- ALWAYS_INLINE Result LoadBits(size_t bit_offset, size_t bit_length) const {
- static_assert(std::is_integral<Result>::value, "Result must be integral");
- static_assert(std::is_unsigned<Result>::value, "Result must be unsigned");
- DCHECK(IsAligned<sizeof(Result)>(data_));
+ ALWAYS_INLINE uint32_t LoadBits(size_t bit_offset, size_t bit_length) const {
+ DCHECK(IsAligned<sizeof(uintptr_t)>(data_));
DCHECK_LE(bit_offset, bit_size_);
DCHECK_LE(bit_length, bit_size_ - bit_offset);
- DCHECK_LE(bit_length, BitSizeOf<Result>());
+ DCHECK_LE(bit_length, BitSizeOf<uint32_t>());
if (bit_length == 0) {
return 0;
}
- Result* data = reinterpret_cast<Result*>(data_);
- size_t width = BitSizeOf<Result>();
- Result clear = (std::numeric_limits<Result>::max() << 1) << (bit_length - 1);
- size_t index = (bit_start_ + bit_offset) / width;
- size_t shift = (bit_start_ + bit_offset) % width;
- Result value = data[index] >> shift;
- size_t finished_bits = width - shift;
+ uintptr_t mask = std::numeric_limits<uintptr_t>::max() >> (kBitsPerIntPtrT - bit_length);
+ size_t index = (bit_start_ + bit_offset) / kBitsPerIntPtrT;
+ size_t shift = (bit_start_ + bit_offset) % kBitsPerIntPtrT;
+ uintptr_t value = data_[index] >> shift;
+ size_t finished_bits = kBitsPerIntPtrT - shift;
if (finished_bits < bit_length) {
- value |= data[index + 1] << finished_bits;
+ value |= data_[index + 1] << finished_bits;
}
- return value & ~clear;
+ return value & mask;
}
// Store `bit_length` bits in `data` starting at given `bit_offset`.
@@ -140,15 +137,16 @@
}
// Write data byte by byte to avoid races with other threads
// on bytes that do not overlap with this region.
+ uint8_t* data = reinterpret_cast<uint8_t*>(data_);
uint32_t mask = std::numeric_limits<uint32_t>::max() >> (BitSizeOf<uint32_t>() - bit_length);
size_t index = (bit_start_ + bit_offset) / kBitsPerByte;
size_t shift = (bit_start_ + bit_offset) % kBitsPerByte;
- data_[index] &= ~(mask << shift); // Clear bits.
- data_[index] |= (value << shift); // Set bits.
+ data[index] &= ~(mask << shift); // Clear bits.
+ data[index] |= (value << shift); // Set bits.
size_t finished_bits = kBitsPerByte - shift;
for (int i = 1; finished_bits < bit_length; i++, finished_bits += kBitsPerByte) {
- data_[index + i] &= ~(mask >> finished_bits); // Clear bits.
- data_[index + i] |= (value >> finished_bits); // Set bits.
+ data[index + i] &= ~(mask >> finished_bits); // Clear bits.
+ data[index + i] |= (value >> finished_bits); // Set bits.
}
DCHECK_EQ(value, LoadBits(bit_offset, bit_length));
}
@@ -203,13 +201,14 @@
}
private:
- uint8_t* data_ = nullptr; // The pointer is page aligned.
+ // The data pointer must be naturally aligned. This makes loading code faster.
+ uintptr_t* data_ = nullptr;
size_t bit_start_ = 0;
size_t bit_size_ = 0;
};
-constexpr uint32_t kVarintBits = 4; // Minimum number of bits used for varint.
-constexpr uint32_t kVarintMax = 11; // Maximum value which is stored "inline".
+constexpr uint32_t kVarintHeaderBits = 4;
+constexpr uint32_t kVarintSmallValue = 11; // Maximum value which is stored as-is.
class BitMemoryReader {
public:
@@ -233,9 +232,8 @@
return finished_region_.Subregion(bit_offset, bit_length);
}
- template<typename Result = size_t>
- ALWAYS_INLINE Result ReadBits(size_t bit_length) {
- return ReadRegion(bit_length).LoadBits<Result>(/* bit_offset */ 0, bit_length);
+ ALWAYS_INLINE uint32_t ReadBits(size_t bit_length) {
+ return ReadRegion(bit_length).LoadBits(/* bit_offset */ 0, bit_length);
}
ALWAYS_INLINE bool ReadBit() {
@@ -247,24 +245,35 @@
// Values 0..11 represent the result as-is, with no further following bits.
// Values 12..15 mean the result is in the next 8/16/24/32-bits respectively.
ALWAYS_INLINE uint32_t ReadVarint() {
- uint32_t x = ReadBits(kVarintBits);
- return (x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte);
+ uint32_t x = ReadBits(kVarintHeaderBits);
+ if (x > kVarintSmallValue) {
+ x = ReadBits((x - kVarintSmallValue) * kBitsPerByte);
+ }
+ return x;
}
- // Read N 'interleaved' varints (different to just reading consecutive varints).
- // All small values are stored first and the large values are stored after them.
- // This requires fewer bit-reads compared to indidually storing the varints.
- template<size_t N>
- ALWAYS_INLINE std::array<uint32_t, N> ReadInterleavedVarints() {
- static_assert(N * kVarintBits <= sizeof(uint64_t) * kBitsPerByte, "N too big");
- std::array<uint32_t, N> values;
- // StackMap BitTable uses over 8 varints in the header, so we need uint64_t.
- uint64_t data = ReadBits<uint64_t>(N * kVarintBits);
- for (size_t i = 0; i < N; i++) {
- uint32_t x = BitFieldExtract(data, i * kVarintBits, kVarintBits);
- values[i] = LIKELY(x <= kVarintMax) ? x : ReadBits((x - kVarintMax) * kBitsPerByte);
+ // Optimized version to read several consecutive varints.
+ // It reads all the headers at once in a single bit read.
+ template<size_t N> // Inference works only with ref-arrays.
+ ALWAYS_INLINE void ReadVarints(uint32_t (&varints)[N]) {
+ constexpr size_t kBatch = std::min(N, sizeof(uint32_t) * kBitsPerByte / kVarintHeaderBits);
+ uint32_t headers = ReadBits(kBatch * kVarintHeaderBits);
+ uint32_t* out = varints;
+ for (size_t i = 0; i < kBatch; out++) {
+ uint32_t header = BitFieldExtract(headers, (i++) * kVarintHeaderBits, kVarintHeaderBits);
+ if (LIKELY(header <= kVarintSmallValue)) {
+ // Fast-path: consume one of the headers and continue to the next varint.
+ *out = header;
+ } else {
+ // Slow-path: rollback reader, read large value, and read remaning headers.
+ finished_region_.Resize(finished_region_.size_in_bits() - (kBatch-i) * kVarintHeaderBits);
+ *out = ReadBits((header - kVarintSmallValue) * kBitsPerByte);
+ headers = ReadBits((kBatch-i) * kVarintHeaderBits) << (i * kVarintHeaderBits);
+ }
}
- return values;
+ for (size_t i = kBatch; i < N; i++, out++) {
+ *out = ReadVarint();
+ }
}
private:
@@ -311,26 +320,16 @@
Allocate(1).StoreBit(/* bit_offset */ 0, value);
}
- template<size_t N>
- ALWAYS_INLINE void WriteInterleavedVarints(std::array<uint32_t, N> values) {
- // Write small values (or the number of bytes needed for the large values).
- for (uint32_t value : values) {
- if (value > kVarintMax) {
- WriteBits(kVarintMax + BitsToBytesRoundUp(MinimumBitsToStore(value)), kVarintBits);
- } else {
- WriteBits(value, kVarintBits);
- }
- }
- // Write large values.
- for (uint32_t value : values) {
- if (value > kVarintMax) {
- WriteBits(value, BitsToBytesRoundUp(MinimumBitsToStore(value)) * kBitsPerByte);
- }
- }
- }
-
+ // Write variable-length bit-packed integer.
ALWAYS_INLINE void WriteVarint(uint32_t value) {
- WriteInterleavedVarints<1>({value});
+ if (value <= kVarintSmallValue) {
+ WriteBits(value, kVarintHeaderBits);
+ } else {
+ uint32_t num_bits = RoundUp(MinimumBitsToStore(value), kBitsPerByte);
+ uint32_t header = kVarintSmallValue + num_bits / kBitsPerByte;
+ WriteBits(header, kVarintHeaderBits);
+ WriteBits(value, num_bits);
+ }
}
ALWAYS_INLINE void ByteAlign() {
diff --git a/libartbase/base/bit_memory_region_test.cc b/libartbase/base/bit_memory_region_test.cc
index bf9c6d6..02623bf 100644
--- a/libartbase/base/bit_memory_region_test.cc
+++ b/libartbase/base/bit_memory_region_test.cc
@@ -43,7 +43,7 @@
BitMemoryReader reader(buffer.data(), start_bit_offset);
uint32_t result = reader.ReadVarint();
- uint32_t upper_bound = RoundUp(MinimumBitsToStore(value), kBitsPerByte) + kVarintBits;
+ uint32_t upper_bound = RoundUp(MinimumBitsToStore(value), kBitsPerByte) + kVarintHeaderBits;
EXPECT_EQ(writer.NumberOfWrittenBits(), reader.NumberOfReadBits());
EXPECT_EQ(value, result);
EXPECT_GE(upper_bound, writer.NumberOfWrittenBits());
diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h
index 5ec162d..1984265 100644
--- a/libartbase/base/bit_table.h
+++ b/libartbase/base/bit_table.h
@@ -49,7 +49,8 @@
ALWAYS_INLINE void Decode(BitMemoryReader& reader) {
// Decode row count and column sizes from the table header.
- std::array<uint32_t, 1+kNumColumns> header = reader.ReadInterleavedVarints<1+kNumColumns>();
+ uint32_t header[1 + kNumColumns];
+ reader.ReadVarints(header);
num_rows_ = header[0];
column_offset_[0] = 0;
for (uint32_t i = 0; i < kNumColumns; i++) {
@@ -334,7 +335,7 @@
}
// Calculate the column bit widths based on the current data.
- void Measure(/*out*/ uint32_t* column_bits) const {
+ void Measure(/*out*/ std::array<uint32_t, kNumColumns>* column_bits) const {
uint32_t max_column_value[kNumColumns];
std::fill_n(max_column_value, kNumColumns, 0);
for (uint32_t r = 0; r < size(); r++) {
@@ -343,7 +344,7 @@
}
}
for (uint32_t c = 0; c < kNumColumns; c++) {
- column_bits[c] = MinimumBitsToStore(max_column_value[c]);
+ (*column_bits)[c] = MinimumBitsToStore(max_column_value[c]);
}
}
@@ -352,12 +353,14 @@
void Encode(BitMemoryWriter<Vector>& out) const {
size_t initial_bit_offset = out.NumberOfWrittenBits();
+ std::array<uint32_t, kNumColumns> column_bits;
+ Measure(&column_bits);
+
// Write table header.
- std::array<uint32_t, 1 + kNumColumns> header;
- header[0] = size();
- uint32_t* column_bits = header.data() + 1;
- Measure(column_bits);
- out.WriteInterleavedVarints(header);
+ out.WriteVarint(size());
+ for (uint32_t c = 0; c < kNumColumns; c++) {
+ out.WriteVarint(column_bits[c]);
+ }
// Write table data.
for (uint32_t r = 0; r < size(); r++) {
@@ -441,10 +444,8 @@
size_t initial_bit_offset = out.NumberOfWrittenBits();
// Write table header.
- out.WriteInterleavedVarints(std::array<uint32_t, 2>{
- dchecked_integral_cast<uint32_t>(size()),
- dchecked_integral_cast<uint32_t>(max_num_bits_),
- });
+ out.WriteVarint(size());
+ out.WriteVarint(max_num_bits_);
// Write table data.
for (MemoryRegion row : rows_) {
diff --git a/runtime/oat.h b/runtime/oat.h
index 54d111c..c02ac0b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Optimize stack map decoding - interleave varints.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '3', '\0' } };
+ // Last oat version changed reason: Stack maps: Handle special cases using flags.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '2', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index eebca85..2300d1f 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -33,13 +33,14 @@
void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
BitMemoryReader reader(data);
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
+ uint32_t header[kNumHeaders];
+ reader.ReadVarints(header);
ForEachHeaderField([this, &header](size_t i, auto member_pointer) {
this->*member_pointer = header[i];
});
ForEachBitTableField([this, &reader](size_t i, auto member_pointer) {
auto& table = this->*member_pointer;
- if (LIKELY(HasBitTable(i))) {
+ if (HasBitTable(i)) {
if (UNLIKELY(IsBitTableDeduped(i))) {
ssize_t bit_offset = reader.NumberOfReadBits() - reader.ReadVarint();
BitMemoryReader reader2(reader.data(), bit_offset); // The offset is negative.
@@ -59,16 +60,12 @@
writer_.ByteAlign();
size_t deduped_offset = writer_.NumberOfWrittenBits() / kBitsPerByte;
- // The back-reference offset takes space so dedupe is not worth it for tiny tables.
- constexpr size_t kMinDedupSize = 32; // Assume 32-bit offset on average.
-
// Read the existing code info and find (and keep) dedup-map iterator for each table.
// The iterator stores BitMemoryRegion and bit_offset of previous identical BitTable.
BitMemoryReader reader(code_info_data);
CodeInfo code_info; // Temporary storage for decoded data.
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
- ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
- code_info.*member_pointer = header[i];
+ ForEachHeaderField([&reader, &code_info](size_t, auto member_pointer) {
+ code_info.*member_pointer = reader.ReadVarint();
});
std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less>::iterator it[kNumBitTables];
ForEachBitTableField([this, &reader, &code_info, &it](size_t i, auto member_pointer) {
@@ -78,17 +75,16 @@
(code_info.*member_pointer).Decode(reader);
BitMemoryRegion region = reader.GetReadRegion().Subregion(bit_table_start);
it[i] = dedupe_map_.emplace(region, /* default bit_offset */ 0).first;
- if (it[i]->second != 0 && region.size_in_bits() > kMinDedupSize) { // Seen before and large?
+ if (it[i]->second != 0 && region.size_in_bits() > 32) { // Seen before and large?
code_info.SetBitTableDeduped(i); // Mark as deduped before we write header.
}
}
});
// Write the code info back, but replace deduped tables with relative offsets.
- ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
- header[i] = code_info.*member_pointer;
+ ForEachHeaderField([this, &code_info](size_t, auto member_pointer) {
+ writer_.WriteVarint(code_info.*member_pointer);
});
- writer_.WriteInterleavedVarints(header);
ForEachBitTableField([this, &code_info, &it](size_t i, auto) {
if (code_info.HasBitTable(i)) {
uint32_t& bit_offset = it[i]->second;
@@ -110,8 +106,7 @@
DCHECK_EQ(old_code_info.*member_pointer, new_code_info.*member_pointer);
}
});
- ForEachBitTableField([&old_code_info, &new_code_info](size_t i, auto member_pointer) {
- DCHECK_EQ(old_code_info.HasBitTable(i), new_code_info.HasBitTable(i));
+ ForEachBitTableField([&old_code_info, &new_code_info](size_t, auto member_pointer) {
DCHECK((old_code_info.*member_pointer).Equals(new_code_info.*member_pointer));
});
}
@@ -208,9 +203,8 @@
Stats* codeinfo_stats = parent->Child("CodeInfo");
BitMemoryReader reader(code_info_data);
CodeInfo code_info; // Temporary storage for decoded tables.
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
- ForEachHeaderField([&code_info, &header](size_t i, auto member_pointer) {
- code_info.*member_pointer = header[i];
+ ForEachHeaderField([&reader, &code_info](size_t, auto member_pointer) {
+ code_info.*member_pointer = reader.ReadVarint();
});
codeinfo_stats->Child("Header")->AddBits(reader.NumberOfReadBits());
ForEachBitTableField([codeinfo_stats, &reader, &code_info](size_t i, auto member_pointer) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index b438074..c088eb6 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -443,7 +443,8 @@
ALWAYS_INLINE static QuickMethodFrameInfo DecodeFrameInfo(const uint8_t* code_info_data) {
BitMemoryReader reader(code_info_data);
- std::array<uint32_t, kNumHeaders> header = reader.ReadInterleavedVarints<kNumHeaders>();
+ uint32_t header[4]; // flags, packed_frame_size, core_spill_mask, fp_spill_mask.
+ reader.ReadVarints(header);
return QuickMethodFrameInfo(header[1] * kStackAlignment, header[2], header[3]);
}