diff options
-rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 101 | ||||
-rw-r--r-- | compiler/optimizing/stack_map_stream.h | 12 | ||||
-rw-r--r-- | compiler/optimizing/stack_map_test.cc | 4 | ||||
-rw-r--r-- | oatdump/oatdump.cc | 2 | ||||
-rw-r--r-- | runtime/oat.h | 4 | ||||
-rw-r--r-- | runtime/quick_exception_handler.cc | 2 | ||||
-rw-r--r-- | runtime/stack_map.cc | 2 | ||||
-rw-r--r-- | runtime/stack_map.h | 39 | ||||
-rw-r--r-- | runtime/thread.cc | 3 |
9 files changed, 106 insertions, 63 deletions
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index b40ea3768a..c6e375a1b2 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -48,6 +48,10 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); } + if (sp_mask != nullptr) { + stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); + } + current_dex_register_ = 0; } @@ -213,32 +217,11 @@ size_t StackMapStream::PrepareForFillIn() { PrepareMethodIndices(); // Dedup stack masks. Needs to be done first as it modifies the stack map entry. - BitmapTableBuilder stack_mask_builder(allocator_); - for (StackMapEntry& stack_map : stack_maps_) { - BitVector* mask = stack_map.sp_mask; - size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0; - if (num_bits != 0) { - stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits); - } else { - stack_map.stack_mask_index = StackMap::kNoValue; - } - } + size_t stack_mask_bits = stack_mask_max_ + 1; // Need room for max element too. + size_t num_stack_masks = PrepareStackMasks(stack_mask_bits); // Dedup register masks. Needs to be done first as it modifies the stack map entry. - BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_); - for (StackMapEntry& stack_map : stack_maps_) { - uint32_t register_mask = stack_map.register_mask; - if (register_mask != 0) { - uint32_t shift = LeastSignificantBit(register_mask); - std::array<uint32_t, RegisterMask::kCount> entry = { - register_mask >> shift, - shift, - }; - stack_map.register_mask_index = register_mask_builder.Dedup(&entry); - } else { - stack_map.register_mask_index = StackMap::kNoValue; - } - } + size_t num_register_masks = PrepareRegisterMasks(); // Write dex register maps. MemoryRegion dex_register_map_region = @@ -318,8 +301,31 @@ size_t StackMapStream::PrepareForFillIn() { stack_map_builder.Encode(&out_, &bit_offset); invoke_info_builder.Encode(&out_, &bit_offset); inline_info_builder.Encode(&out_, &bit_offset); + + // Write register masks table. + BitTableBuilder<uint32_t> register_mask_builder(allocator_); + for (size_t i = 0; i < num_register_masks; ++i) { + register_mask_builder.Add(register_masks_[i]); + } register_mask_builder.Encode(&out_, &bit_offset); - stack_mask_builder.Encode(&out_, &bit_offset); + + // Write stack masks table. + EncodeVarintBits(&out_, &bit_offset, stack_mask_bits); + out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks)); + BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()), + bit_offset, + stack_mask_bits * num_stack_masks); + if (stack_mask_bits > 0) { + for (size_t i = 0; i < num_stack_masks; ++i) { + size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits); + BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes)); + BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits); + for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) { + size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>()); + dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits); + } + } + } return UnsignedLeb128Size(out_.size()) + out_.size(); } @@ -442,6 +448,17 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, } } +size_t StackMapStream::PrepareRegisterMasks() { + register_masks_.resize(stack_maps_.size(), 0u); + ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); + for (StackMapEntry& stack_map : stack_maps_) { + const size_t index = dedupe.size(); + stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second; + register_masks_[index] = stack_map.register_mask; + } + return dedupe.size(); +} + void StackMapStream::PrepareMethodIndices() { CHECK(method_indices_.empty()); method_indices_.resize(stack_maps_.size() + inline_infos_.size()); @@ -464,10 +481,35 @@ void StackMapStream::PrepareMethodIndices() { method_indices_.resize(dedupe.size()); } + +size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { + // Preallocate memory since we do not want it to move (the dedup map will point into it). + const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte; + stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); + // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later + // when copying out from stack_masks_. + ScopedArenaUnorderedMap<MemoryRegion, + size_t, + FNVHash<MemoryRegion>, + MemoryRegion::ContentEquals> dedup( + stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); + for (StackMapEntry& stack_map : stack_maps_) { + size_t index = dedup.size(); + MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); + BitMemoryRegion stack_mask_bits(stack_mask); + for (size_t i = 0; i < entry_size_in_bits; i++) { + stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i)); + } + stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second; + } + return dedup.size(); +} + // Check that all StackMapStream inputs are correctly encoded by trying to read them back. void StackMapStream::CheckCodeInfo(MemoryRegion region) const { CodeInfo code_info(region); DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); + DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1)); DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size()); size_t invoke_info_index = 0; for (size_t s = 0; s < stack_maps_.size(); ++s) { @@ -480,15 +522,18 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const { DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc); DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index); DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask); + const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(); DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index); BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); if (entry.sp_mask != nullptr) { DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); - for (size_t b = 0; b < stack_mask.size_in_bits(); b++) { - DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b; + for (size_t b = 0; b < num_stack_mask_bits; b++) { + DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); } } else { - DCHECK_EQ(stack_mask.size_in_bits(), 0u); + for (size_t b = 0; b < num_stack_mask_bits; b++) { + DCHECK_EQ(stack_mask.LoadBit(b), 0u); + } } if (entry.dex_method_index != dex::kDexNoIndex) { InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 19863d882a..ea97cf6530 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -68,8 +68,11 @@ class StackMapStream : public ValueObject { location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), + stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), + register_masks_(allocator->Adapter(kArenaAllocStackMapStream)), method_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)), + stack_mask_max_(-1), out_(allocator->Adapter(kArenaAllocStackMapStream)), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter(kArenaAllocStackMapStream)), @@ -168,6 +171,12 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; + // Returns the number of unique stack masks. + size_t PrepareStackMasks(size_t entry_size_in_bits); + + // Returns the number of unique register masks. + size_t PrepareRegisterMasks(); + // Prepare and deduplicate method indices. void PrepareMethodIndices(); @@ -208,8 +217,11 @@ class StackMapStream : public ValueObject { // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. ScopedArenaVector<size_t> dex_register_locations_; ScopedArenaVector<InlineInfoEntry> inline_infos_; + ScopedArenaVector<uint8_t> stack_masks_; + ScopedArenaVector<uint32_t> register_masks_; ScopedArenaVector<uint32_t> method_indices_; ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_; + int stack_mask_max_; ScopedArenaVector<uint8_t> out_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index c372bb9b22..9db7588b3a 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -32,10 +32,10 @@ static bool CheckStackMask( const StackMap& stack_map, const BitVector& bit_vector) { BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); - if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) { + if (bit_vector.GetNumberOfBits() > code_info.GetNumberOfStackMaskBits()) { return false; } - for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) { + for (size_t i = 0; i < code_info.GetNumberOfStackMaskBits(); ++i) { if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; } diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index b080f92689..fcd6bfd46c 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1731,7 +1731,7 @@ class OatDumper { // Stack masks stats_.AddBits( Stats::kByteKindCodeInfoStackMasks, - code_info.stack_masks_.DataBitSize()); + code_info.stack_masks_.size_in_bits()); // Register masks stats_.AddBits( diff --git a/runtime/oat.h b/runtime/oat.h index 8069a15661..7b8f71a3f3 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - // Last oat version changed reason: Optimize masks in stack maps. - static constexpr uint8_t kOatVersion[] = { '1', '4', '5', '\0' }; + // Last oat version changed reason: Refactor stackmap encoding. + static constexpr uint8_t kOatVersion[] = { '1', '4', '4', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 26489209b8..de613d3b20 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -439,7 +439,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset; value = *reinterpret_cast<const uint32_t*>(addr); uint32_t bit = (offset >> 2); - if (bit < stack_mask.size_in_bits() && stack_mask.LoadBit(bit)) { + if (bit < code_info.GetNumberOfStackMaskBits() && stack_mask.LoadBit(bit)) { is_reference = true; } break; diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc index fd0e28d9ac..2b7e8dd748 100644 --- a/runtime/stack_map.cc +++ b/runtime/stack_map.cc @@ -200,7 +200,7 @@ void StackMap::Dump(VariableIndentationOutputStream* vios, << std::dec << ", stack_mask=0b"; BitMemoryRegion stack_mask = code_info.GetStackMaskOf(*this); - for (size_t i = 0, e = stack_mask.size_in_bits(); i < e; ++i) { + for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(); i < e; ++i) { vios->Stream() << stack_mask.LoadBit(e - i - 1); } vios->Stream() << ")\n"; diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 02d87130ec..91cecf0690 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -799,24 +799,6 @@ class InvokeInfo : public BitTable<3>::Accessor { } }; -// Register masks tend to have many tailing zero bits, -// therefore it is worth encoding them as value+shift. -class RegisterMask : public BitTable<2>::Accessor { - public: - enum Field { - kValue, - kShift, - kCount, - }; - - RegisterMask(const BitTable<kCount>* table, uint32_t row) - : BitTable<kCount>::Accessor(table, row) {} - - ALWAYS_INLINE uint32_t GetMask() const { - return Get<kValue>() << Get<kShift>(); - } -}; - /** * Wrapper around all compiler information collected for a method. * The information is of the form: @@ -851,22 +833,24 @@ class CodeInfo { return DexRegisterLocationCatalog(location_catalog_); } + ALWAYS_INLINE size_t GetNumberOfStackMaskBits() const { + return stack_mask_bits_; + } + ALWAYS_INLINE StackMap GetStackMapAt(size_t index) const { return StackMap(&stack_maps_, index); } BitMemoryRegion GetStackMask(size_t index) const { - return stack_masks_.GetBitMemoryRegion(index); + return stack_masks_.Subregion(index * stack_mask_bits_, stack_mask_bits_); } BitMemoryRegion GetStackMaskOf(const StackMap& stack_map) const { - uint32_t index = stack_map.GetStackMaskIndex(); - return (index == StackMap::kNoValue) ? BitMemoryRegion() : GetStackMask(index); + return GetStackMask(stack_map.GetStackMaskIndex()); } uint32_t GetRegisterMaskOf(const StackMap& stack_map) const { - uint32_t index = stack_map.GetRegisterMaskIndex(); - return (index == StackMap::kNoValue) ? 0 : RegisterMask(®ister_masks_, index).GetMask(); + return register_masks_.Get(stack_map.GetRegisterMaskIndex()); } uint32_t GetNumberOfLocationCatalogEntries() const { @@ -1061,8 +1045,8 @@ class CodeInfo { invoke_infos_.Decode(bit_region, &bit_offset); inline_infos_.Decode(bit_region, &bit_offset); register_masks_.Decode(bit_region, &bit_offset); - stack_masks_.Decode(bit_region, &bit_offset); - CHECK_EQ(BitsToBytesRoundUp(bit_offset), non_header_size); + stack_mask_bits_ = DecodeVarintBits(bit_region, &bit_offset); + stack_masks_ = bit_region.Subregion(bit_offset, non_header_size * kBitsPerByte - bit_offset); } size_t size_; @@ -1072,8 +1056,9 @@ class CodeInfo { BitTable<StackMap::Field::kCount> stack_maps_; BitTable<InvokeInfo::Field::kCount> invoke_infos_; BitTable<InlineInfo::Field::kCount> inline_infos_; - BitTable<RegisterMask::Field::kCount> register_masks_; - BitTable<1> stack_masks_; + BitTable<1> register_masks_; + uint32_t stack_mask_bits_ = 0; + BitMemoryRegion stack_masks_; friend class OatDumper; }; diff --git a/runtime/thread.cc b/runtime/thread.cc index 2e737f5258..129bae6d9a 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3568,8 +3568,9 @@ class ReferenceMapVisitor : public StackVisitor { T vreg_info(m, code_info, map, visitor_); // Visit stack entries that hold pointers. + const size_t number_of_bits = code_info.GetNumberOfStackMaskBits(); BitMemoryRegion stack_mask = code_info.GetStackMaskOf(map); - for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) { + for (size_t i = 0; i < number_of_bits; ++i) { if (stack_mask.LoadBit(i)) { StackReference<mirror::Object>* ref_addr = vreg_base + i; mirror::Object* ref = ref_addr->AsMirrorPtr(); |