diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 7 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/inliner.cc | 8 | ||||
| -rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 4 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 21 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 4 | ||||
| -rw-r--r-- | compiler/optimizing/reference_type_propagation.cc | 23 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 197 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_stream.h | 27 | ||||
| -rw-r--r-- | compiler/optimizing/stack_map_test.cc | 271 | ||||
| -rw-r--r-- | compiler/optimizing/superblock_cloner.cc | 154 | ||||
| -rw-r--r-- | compiler/optimizing/superblock_cloner.h | 31 |
15 files changed, 406 insertions, 354 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index de1be5b871..b358bfabe0 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1161,8 +1161,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // last emitted is different than the native pc of the stack map just emitted. size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps(); if (number_of_stack_maps > 1) { - DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset, - stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset); + DCHECK_NE(stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 1), + stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 2)); } } } @@ -1174,8 +1174,7 @@ bool CodeGenerator::HasStackMapAtCurrentPc() { if (count == 0) { return false; } - CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset; - return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); + return stack_map_stream->GetStackMapNativePcOffset(count - 1) == pc; } void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 7f3441fdf4..8be84a15bd 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1042,8 +1042,7 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index ee32b96daf..cd9e0e521e 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -988,8 +988,7 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ffa000e34e..6900cd883a 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -20,6 +20,7 @@ #include "base/enums.h" #include "builder.h" #include "class_linker.h" +#include "class_root.h" #include "constant_folding.h" #include "data_type-inl.h" #include "dead_code_elimination.h" @@ -537,7 +538,7 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle( mirror::ObjectArray<mirror::Class>::Alloc( self, - class_linker->GetClassRoot(ClassLinker::kClassArrayClass), + GetClassRoot<mirror::ObjectArray<mirror::Class>>(class_linker), InlineCache::kIndividualCacheSize)); if (inline_cache == nullptr) { // We got an OOME. Just clear the exception, and don't inline. @@ -777,7 +778,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, uint32_t dex_pc) const { - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>(class_linker)->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet( receiver, @@ -2120,9 +2121,8 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, return true; } else if (return_replacement->IsInstanceFieldGet()) { HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); if (field_get->GetFieldInfo().GetField() == - class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) { + GetClassRoot<mirror::Object>()->GetInstanceField(0)) { return true; } } diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index ca84d421a7..63704a470e 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,6 +18,7 @@ #include "art_method-inl.h" #include "class_linker-inl.h" +#include "class_root.h" #include "data_type-inl.h" #include "escape.h" #include "intrinsics.h" @@ -1563,8 +1564,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return false; diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 5287b4b2fa..fecf1ccbfa 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -25,7 +25,7 @@ #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/reference.h" -#include "mirror/string.h" +#include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 7f78dc257e..ef8a757ad0 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -22,6 +22,7 @@ #include "base/bit_vector-inl.h" #include "base/stl_util.h" #include "class_linker-inl.h" +#include "class_root.h" #include "code_generator.h" #include "common_dominator.h" #include "intrinsics.h" @@ -40,9 +41,8 @@ static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) { ScopedObjectAccess soa(Thread::Current()); // Create the inexact Object reference type and store it in the HGraph. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); inexact_object_rti_ = ReferenceTypeInfo::Create( - handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)), + handles->NewHandle(GetClassRoot<mirror::Object>()), /* is_exact */ false); } @@ -1121,6 +1121,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } +void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) { + const HUserRecord<HEnvironment*>& env_use_record = vregs_[index]; + HInstruction* orig_instr = env_use_record.GetInstruction(); + + DCHECK(orig_instr != replacement); + + HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode(); + // Note: fixup_end remains valid across splice_after(). + auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin() + : ++replacement->env_uses_.begin(); + replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(), + env_use_record.GetInstruction()->env_uses_, + before_use_node); + replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end); + orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node); +} + HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 09d9c57a33..3fd5b6b02d 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1909,6 +1909,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { void RemoveAsUserOfInput(size_t index) const; + // Replaces the input at the position 'index' with the replacement; the replacement and old + // input instructions' env_uses_ lists are adjusted. The function works similar to + // HInstruction::ReplaceInput. + void ReplaceInput(HInstruction* replacement, size_t index); + size_t Size() const { return vregs_.size(); } HEnvironment* GetParent() const { return parent_; } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index c4977decd9..79ac6b9b9d 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -31,6 +31,7 @@ #include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "builder.h" +#include "class_root.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -1309,13 +1310,12 @@ bool OptimizingCompiler::JitCompile(Thread* self, size_t method_info_size = 0; codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); size_t number_of_roots = codegen->GetNumberOfJitRoots(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is // executed, this array is not needed. Handle<mirror::ObjectArray<mirror::Object>> roots( hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc( - self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots))); + self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(), number_of_roots))); if (roots == nullptr) { // Out of memory, just clear the exception to avoid any Java exception uncaught problems. MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index ecfa790b91..f3fe62561f 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -22,6 +22,7 @@ #include "base/scoped_arena_containers.h" #include "base/enums.h" #include "class_linker-inl.h" +#include "class_root.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" @@ -40,43 +41,40 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( } static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles, - ClassLinker::ClassRoot class_root, + ClassRoot class_root, ReferenceTypeInfo::TypeHandle* cache) { if (!ReferenceTypeInfo::IsValidHandle(*cache)) { // Mutator lock is required for NewHandle. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); ScopedObjectAccess soa(Thread::Current()); - *cache = handles->NewHandle(linker->GetClassRoot(class_root)); + *cache = handles->NewHandle(GetClassRoot(class_root)); } return *cache; } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangObject, &object_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangClass, &class_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodHandleClassHandle() { return GetRootHandle(handles_, - ClassLinker::kJavaLangInvokeMethodHandleImpl, + ClassRoot::kJavaLangInvokeMethodHandleImpl, &method_handle_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodTypeClassHandle() { - return GetRootHandle(handles_, - ClassLinker::kJavaLangInvokeMethodType, - &method_type_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangInvokeMethodType, &method_type_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangString, &string_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangThrowable, &throwable_class_handle_); } class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { @@ -341,8 +339,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index aa28c8b500..b1dcb68415 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -25,6 +25,14 @@ namespace art { +uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) { + return StackMap::UnpackNativePc(stack_maps_[i].packed_native_pc, instruction_set_); +} + +void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + stack_maps_[i].packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_); +} + void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, uint32_t register_mask, @@ -33,7 +41,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, uint8_t inlining_depth) { DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; current_entry_.dex_pc = dex_pc; - current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_); + current_entry_.packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_); current_entry_.register_mask = register_mask; current_entry_.sp_mask = sp_mask; current_entry_.inlining_depth = inlining_depth; @@ -48,10 +56,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); } - if (sp_mask != nullptr) { - stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); - } - current_dex_register_ = 0; } @@ -199,9 +203,6 @@ static MemoryRegion EncodeMemoryRegion(Vector* out, size_t* bit_offset, uint32_t return region; } -template<uint32_t NumColumns> -using ScopedBitTableBuilder = BitTableBuilder<NumColumns, ScopedArenaAllocatorAdapter<uint32_t>>; - size_t StackMapStream::PrepareForFillIn() { size_t bit_offset = 0; out_.clear(); @@ -220,27 +221,36 @@ size_t StackMapStream::PrepareForFillIn() { PrepareMethodIndices(); // Dedup stack masks. Needs to be done first as it modifies the stack map entry. - size_t stack_mask_bits = stack_mask_max_ + 1; // Need room for max element too. - size_t num_stack_masks = PrepareStackMasks(stack_mask_bits); + BitmapTableBuilder stack_mask_builder(allocator_); + for (StackMapEntry& stack_map : stack_maps_) { + BitVector* mask = stack_map.sp_mask; + size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0; + if (num_bits != 0) { + stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits); + } else { + stack_map.stack_mask_index = StackMap::kNoValue; + } + } // Dedup register masks. Needs to be done first as it modifies the stack map entry. - size_t num_register_masks = PrepareRegisterMasks(); - - // Write dex register maps. - MemoryRegion dex_register_map_region = - EncodeMemoryRegion(&out_, &bit_offset, dex_register_map_bytes * kBitsPerByte); - for (DexRegisterMapEntry& entry : dex_register_entries_) { - size_t entry_size = entry.ComputeSize(location_catalog_entries_.size()); - if (entry_size != 0) { - DexRegisterMap dex_register_map( - dex_register_map_region.Subregion(entry.offset, entry_size)); - FillInDexRegisterMap(dex_register_map, - entry.num_dex_registers, - *entry.live_dex_registers_mask, - entry.locations_start_index); + BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_); + for (StackMapEntry& stack_map : stack_maps_) { + uint32_t register_mask = stack_map.register_mask; + if (register_mask != 0) { + uint32_t shift = LeastSignificantBit(register_mask); + std::array<uint32_t, RegisterMask::kCount> entry = { + register_mask >> shift, + shift, + }; + stack_map.register_mask_index = register_mask_builder.Dedup(&entry); + } else { + stack_map.register_mask_index = StackMap::kNoValue; } } + // Allocate space for dex register maps. + EncodeMemoryRegion(&out_, &bit_offset, dex_register_map_bytes * kBitsPerByte); + // Write dex register catalog. EncodeVarintBits(&out_, &bit_offset, location_catalog_entries_.size()); size_t location_catalog_bytes = ComputeDexRegisterLocationCatalogSize(); @@ -258,20 +268,21 @@ size_t StackMapStream::PrepareForFillIn() { DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); // Write stack maps. - ScopedArenaAllocatorAdapter<void> adapter = allocator_->Adapter(kArenaAllocStackMapStream); - ScopedBitTableBuilder<StackMap::Field::kCount> stack_map_builder((adapter)); - ScopedBitTableBuilder<InvokeInfo::Field::kCount> invoke_info_builder((adapter)); - ScopedBitTableBuilder<InlineInfo::Field::kCount> inline_info_builder((adapter)); + BitTableBuilder<std::array<uint32_t, StackMap::kCount>> stack_map_builder(allocator_); + BitTableBuilder<std::array<uint32_t, InvokeInfo::kCount>> invoke_info_builder(allocator_); + BitTableBuilder<std::array<uint32_t, InlineInfo::kCount>> inline_info_builder(allocator_); for (const StackMapEntry& entry : stack_maps_) { if (entry.dex_method_index != dex::kDexNoIndex) { - invoke_info_builder.AddRow( - entry.native_pc_code_offset.CompressedValue(), + std::array<uint32_t, InvokeInfo::kCount> invoke_info_entry { + entry.packed_native_pc, entry.invoke_type, - entry.dex_method_index_idx); + entry.dex_method_index_idx + }; + invoke_info_builder.Add(invoke_info_entry); } // Set the inlining info. - uint32_t inline_info_index = StackMap::kNoValue; + uint32_t inline_info_index = inline_info_builder.size(); DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; @@ -281,52 +292,30 @@ size_t StackMapStream::PrepareForFillIn() { method_index_idx = High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)); extra_data = Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)); } - uint32_t index = inline_info_builder.AddRow( + std::array<uint32_t, InlineInfo::kCount> inline_info_entry { (depth == entry.inlining_depth - 1) ? InlineInfo::kLast : InlineInfo::kMore, method_index_idx, inline_entry.dex_pc, extra_data, - dex_register_entries_[inline_entry.dex_register_map_index].offset); - if (depth == 0) { - inline_info_index = index; - } + dex_register_entries_[inline_entry.dex_register_map_index].offset, + }; + inline_info_builder.Add(inline_info_entry); } - stack_map_builder.AddRow( - entry.native_pc_code_offset.CompressedValue(), + std::array<uint32_t, StackMap::kCount> stack_map_entry { + entry.packed_native_pc, entry.dex_pc, dex_register_entries_[entry.dex_register_map_index].offset, - inline_info_index, + entry.inlining_depth != 0 ? inline_info_index : InlineInfo::kNoValue, entry.register_mask_index, - entry.stack_mask_index); + entry.stack_mask_index, + }; + stack_map_builder.Add(stack_map_entry); } stack_map_builder.Encode(&out_, &bit_offset); invoke_info_builder.Encode(&out_, &bit_offset); inline_info_builder.Encode(&out_, &bit_offset); - - // Write register masks table. - ScopedBitTableBuilder<1> register_mask_builder((adapter)); - for (size_t i = 0; i < num_register_masks; ++i) { - register_mask_builder.AddRow(register_masks_[i]); - } register_mask_builder.Encode(&out_, &bit_offset); - - // Write stack masks table. - EncodeVarintBits(&out_, &bit_offset, stack_mask_bits); - out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks)); - BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()), - bit_offset, - stack_mask_bits * num_stack_masks); - if (stack_mask_bits > 0) { - for (size_t i = 0; i < num_stack_masks; ++i) { - size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits); - BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes)); - BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits); - for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) { - size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>()); - dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits); - } - } - } + stack_mask_builder.Encode(&out_, &bit_offset); return UnsignedLeb128Size(out_.size()) + out_.size(); } @@ -339,6 +328,22 @@ void StackMapStream::FillInCodeInfo(MemoryRegion region) { uint8_t* ptr = EncodeUnsignedLeb128(region.begin(), out_.size()); region.CopyFromVector(ptr - region.begin(), out_); + // Write dex register maps. + CodeInfo code_info(region); + for (DexRegisterMapEntry& entry : dex_register_entries_) { + size_t entry_size = entry.ComputeSize(location_catalog_entries_.size()); + if (entry_size != 0) { + DexRegisterMap dex_register_map( + code_info.dex_register_maps_.Subregion(entry.offset, entry_size), + entry.num_dex_registers, + code_info); + FillInDexRegisterMap(dex_register_map, + entry.num_dex_registers, + *entry.live_dex_registers_mask, + entry.locations_start_index); + } + } + // Verify all written data in debug build. if (kIsDebugBuild) { CheckCodeInfo(region); @@ -363,7 +368,6 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, dex_register_map.SetLocationCatalogEntryIndex( index_in_dex_register_locations, location_catalog_entry_index, - num_dex_registers, location_catalog_entries_.size()); } } @@ -420,8 +424,7 @@ bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, } // Helper for CheckCodeInfo - check that register map has the expected content. -void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, +void StackMapStream::CheckDexRegisterMap(const DexRegisterMap& dex_register_map, size_t num_dex_registers, BitVector* live_dex_registers_mask, size_t dex_register_locations_index) const { @@ -438,8 +441,7 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, << dex_register_map.IsValid() << " " << dex_register_map.IsDexRegisterLive(reg); } else { DCHECK(dex_register_map.IsDexRegisterLive(reg)); - DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation( - reg, num_dex_registers, code_info); + DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(reg); DCHECK_EQ(expected.GetKind(), seen.GetKind()); DCHECK_EQ(expected.GetValue(), seen.GetValue()); } @@ -449,17 +451,6 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, } } -size_t StackMapStream::PrepareRegisterMasks() { - register_masks_.resize(stack_maps_.size(), 0u); - ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - const size_t index = dedupe.size(); - stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second; - register_masks_[index] = stack_map.register_mask; - } - return dedupe.size(); -} - void StackMapStream::PrepareMethodIndices() { CHECK(method_indices_.empty()); method_indices_.resize(stack_maps_.size() + inline_infos_.size()); @@ -482,35 +473,10 @@ void StackMapStream::PrepareMethodIndices() { method_indices_.resize(dedupe.size()); } - -size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { - // Preallocate memory since we do not want it to move (the dedup map will point into it). - const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte; - stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); - // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later - // when copying out from stack_masks_. - ScopedArenaUnorderedMap<MemoryRegion, - size_t, - FNVHash<MemoryRegion>, - MemoryRegion::ContentEquals> dedup( - stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - size_t index = dedup.size(); - MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); - BitMemoryRegion stack_mask_bits(stack_mask); - for (size_t i = 0; i < entry_size_in_bits; i++) { - stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i)); - } - stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second; - } - return dedup.size(); -} - // Check that all StackMapStream inputs are correctly encoded by trying to read them back. void StackMapStream::CheckCodeInfo(MemoryRegion region) const { CodeInfo code_info(region); DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); - DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1)); DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size()); size_t invoke_info_index = 0; for (size_t s = 0; s < stack_maps_.size(); ++s) { @@ -519,33 +485,29 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const { // Check main stack map fields. DCHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); + StackMap::UnpackNativePc(entry.packed_native_pc, instruction_set_)); DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc); DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index); DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask); - const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(); DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index); BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); if (entry.sp_mask != nullptr) { DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); + for (size_t b = 0; b < stack_mask.size_in_bits(); b++) { + DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b; } } else { - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), 0u); - } + DCHECK_EQ(stack_mask.size_in_bits(), 0u); } if (entry.dex_method_index != dex::kDexNoIndex) { InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index); DCHECK_EQ(invoke_info.GetNativePcOffset(instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); + StackMap::UnpackNativePc(entry.packed_native_pc, instruction_set_)); DCHECK_EQ(invoke_info.GetInvokeType(), entry.invoke_type); DCHECK_EQ(invoke_info.GetMethodIndexIdx(), entry.dex_method_index_idx); invoke_info_index++; } - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapOf( + CheckDexRegisterMap(code_info.GetDexRegisterMapOf( stack_map, entry.dex_register_entry.num_dex_registers), entry.dex_register_entry.num_dex_registers, entry.dex_register_entry.live_dex_registers_mask, @@ -571,8 +533,7 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const { DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index); } - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapAtDepth( + CheckDexRegisterMap(code_info.GetDexRegisterMapAtDepth( d, inline_info, inline_entry.dex_register_entry.num_dex_registers), diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index ea97cf6530..6d505b95db 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -68,11 +68,8 @@ class StackMapStream : public ValueObject { location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - register_masks_(allocator->Adapter(kArenaAllocStackMapStream)), method_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_mask_max_(-1), out_(allocator->Adapter(kArenaAllocStackMapStream)), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter(kArenaAllocStackMapStream)), @@ -106,7 +103,7 @@ class StackMapStream : public ValueObject { // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { uint32_t dex_pc; - CodeOffset native_pc_code_offset; + uint32_t packed_native_pc; uint32_t register_mask; BitVector* sp_mask; uint32_t inlining_depth; @@ -151,14 +148,8 @@ class StackMapStream : public ValueObject { return stack_maps_.size(); } - const StackMapEntry& GetStackMap(size_t i) const { - return stack_maps_[i]; - } - - void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - stack_maps_[i].native_pc_code_offset = - CodeOffset::FromOffset(native_pc_offset, instruction_set_); - } + uint32_t GetStackMapNativePcOffset(size_t i); + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset); // Prepares the stream to fill in a memory region. Must be called before FillIn. // Returns the size (in bytes) needed to store this stream. @@ -171,12 +162,6 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; - // Returns the number of unique stack masks. - size_t PrepareStackMasks(size_t entry_size_in_bits); - - // Returns the number of unique register masks. - size_t PrepareRegisterMasks(); - // Prepare and deduplicate method indices. void PrepareMethodIndices(); @@ -193,8 +178,7 @@ class StackMapStream : public ValueObject { const BitVector& live_dex_registers_mask, uint32_t start_index_in_dex_register_locations) const; - void CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, + void CheckDexRegisterMap(const DexRegisterMap& dex_register_map, size_t num_dex_registers, BitVector* live_dex_registers_mask, size_t dex_register_locations_index) const; @@ -217,11 +201,8 @@ class StackMapStream : public ValueObject { // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. ScopedArenaVector<size_t> dex_register_locations_; ScopedArenaVector<InlineInfoEntry> inline_infos_; - ScopedArenaVector<uint8_t> stack_masks_; - ScopedArenaVector<uint32_t> register_masks_; ScopedArenaVector<uint32_t> method_indices_; ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_; - int stack_mask_max_; ScopedArenaVector<uint8_t> out_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 9db7588b3a..112771847c 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -32,10 +32,10 @@ static bool CheckStackMask( const StackMap& stack_map, const BitVector& bit_vector) { BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); - if (bit_vector.GetNumberOfBits() > code_info.GetNumberOfStackMaskBits()) { + if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) { return false; } - for (size_t i = 0; i < code_info.GetNumberOfStackMaskBits(); ++i) { + for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) { if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; } @@ -96,22 +96,15 @@ TEST(StackMapTest, Test1) { size_t expected_dex_register_map_size = 1u + 1u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(0u, index0); ASSERT_EQ(1u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -211,22 +204,15 @@ TEST(StackMapTest, Test2) { size_t expected_dex_register_map_size = 1u + 1u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(0u, index0); ASSERT_EQ(1u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -270,23 +256,15 @@ TEST(StackMapTest, Test2) { size_t expected_dex_register_map_size = 1u + 1u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(18, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(18, dex_register_map.GetMachineRegister(0)); + ASSERT_EQ(3, dex_register_map.GetMachineRegister(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(2u, index0); ASSERT_EQ(3u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -324,23 +302,15 @@ TEST(StackMapTest, Test2) { size_t expected_dex_register_map_size = 1u + 1u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(6, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(8, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(6, dex_register_map.GetMachineRegister(0)); + ASSERT_EQ(8, dex_register_map.GetMachineRegister(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(4u, index0); ASSERT_EQ(5u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -378,23 +348,15 @@ TEST(StackMapTest, Test2) { size_t expected_dex_register_map_size = 1u + 1u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(1, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(3, dex_register_map.GetMachineRegister(0)); + ASSERT_EQ(1, dex_register_map.GetMachineRegister(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(3u, index0); // Shared with second stack map. ASSERT_EQ(6u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -470,20 +432,15 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { size_t expected_map_size = 1u + 1u; ASSERT_EQ(expected_map_size, map.Size()); - ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstant, - map.GetLocationKind(1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kInStack, - map.GetLocationInternalKind(0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstantLargeValue, - map.GetLocationInternalKind(1, number_of_dex_registers, code_info)); - ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info)); - ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info)); - - const size_t index0 = - map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries); - const size_t index1 = - map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0)); + ASSERT_EQ(Kind::kConstant, map.GetLocationKind(1)); + ASSERT_EQ(Kind::kInStack, map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kConstantLargeValue, map.GetLocationInternalKind(1)); + ASSERT_EQ(0, map.GetStackOffsetInBytes(0)); + ASSERT_EQ(-2, map.GetConstant(1)); + + const size_t index0 = map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + const size_t index1 = map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(0u, index0); ASSERT_EQ(1u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -552,20 +509,14 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { size_t expected_dex_register_map_size = 1u + 0u; ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); + ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind(0)); + ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1)); + ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind(0)); + ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1)); + ASSERT_EQ(-2, dex_register_map.GetConstant(1)); + + size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries); + size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries); ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); ASSERT_EQ(0u, index1); DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); @@ -632,8 +583,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { StackMap stack_map0 = code_info.GetStackMapAt(0); DexRegisterMap dex_register_map0 = code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers); - ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, - number_of_catalog_entries)); + ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_catalog_entries)); ASSERT_EQ(255u, dex_register_map0.Size()); StackMap stack_map1 = code_info.GetStackMapAt(1); @@ -680,20 +630,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) { // Verify first stack map. StackMap sm0 = ci.GetStackMapAt(0); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci)); + ASSERT_EQ(0, dex_registers0.GetMachineRegister(0)); + ASSERT_EQ(-2, dex_registers0.GetConstant(1)); // Verify second stack map. StackMap sm1 = ci.GetStackMapAt(1); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci)); + ASSERT_EQ(0, dex_registers1.GetMachineRegister(0)); + ASSERT_EQ(-2, dex_registers1.GetConstant(1)); // Verify third stack map. StackMap sm2 = ci.GetStackMapAt(2); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci)); + ASSERT_EQ(2, dex_registers2.GetMachineRegister(0)); + ASSERT_EQ(-2, dex_registers2.GetConstant(1)); // Verify dex register map offsets. ASSERT_EQ(sm0.GetDexRegisterMapOffset(), @@ -833,8 +783,8 @@ TEST(StackMapTest, InlineTest) { StackMap sm0 = ci.GetStackMapAt(0); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, 2); - ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci)); + ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0)); + ASSERT_EQ(4, dex_registers0.GetConstant(1)); InlineInfo if0 = ci.GetInlineInfoOf(sm0); ASSERT_EQ(2u, if0.GetDepth()); @@ -844,12 +794,12 @@ TEST(StackMapTest, InlineTest) { ASSERT_TRUE(if0.EncodesArtMethodAtDepth(1)); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, 1); - ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci)); + ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0)); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, 3); - ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci)); - ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci)); - ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci)); + ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0)); + ASSERT_EQ(20, dex_registers2.GetConstant(1)); + ASSERT_EQ(15, dex_registers2.GetMachineRegister(2)); } { @@ -857,8 +807,8 @@ TEST(StackMapTest, InlineTest) { StackMap sm1 = ci.GetStackMapAt(1); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci)); + ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0)); + ASSERT_EQ(0, dex_registers0.GetConstant(1)); InlineInfo if1 = ci.GetInlineInfoOf(sm1); ASSERT_EQ(3u, if1.GetDepth()); @@ -870,12 +820,12 @@ TEST(StackMapTest, InlineTest) { ASSERT_TRUE(if1.EncodesArtMethodAtDepth(2)); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, 1); - ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci)); + ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0)); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, 3); - ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci)); - ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci)); - ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci)); + ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0)); + ASSERT_EQ(10, dex_registers2.GetConstant(1)); + ASSERT_EQ(5, dex_registers2.GetMachineRegister(2)); ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2)); } @@ -886,7 +836,7 @@ TEST(StackMapTest, InlineTest) { DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, 2); ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci)); + ASSERT_EQ(4, dex_registers0.GetConstant(1)); ASSERT_FALSE(sm2.HasInlineInfo()); } @@ -895,8 +845,8 @@ TEST(StackMapTest, InlineTest) { StackMap sm3 = ci.GetStackMapAt(3); DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci)); + ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0)); + ASSERT_EQ(0, dex_registers0.GetConstant(1)); InlineInfo if2 = ci.GetInlineInfoOf(sm3); ASSERT_EQ(3u, if2.GetDepth()); @@ -910,34 +860,39 @@ TEST(StackMapTest, InlineTest) { ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0)); DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, 1); - ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci)); + ASSERT_EQ(2, dex_registers1.GetMachineRegister(0)); DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, 2); ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0)); - ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci)); + ASSERT_EQ(3, dex_registers2.GetMachineRegister(1)); } } -TEST(StackMapTest, CodeOffsetTest) { - // Test minimum alignments, and decoding. - CodeOffset offset_thumb2 = - CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2); - CodeOffset offset_arm64 = - CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64); - CodeOffset offset_x86 = - CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86); - CodeOffset offset_x86_64 = - CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64); - CodeOffset offset_mips = - CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips); - CodeOffset offset_mips64 = - CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64); - EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment); - EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment); - EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment); - EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment); - EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment); - EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment); +TEST(StackMapTest, PackedNativePcTest) { + uint32_t packed_thumb2 = + StackMap::PackNativePc(kThumb2InstructionAlignment, InstructionSet::kThumb2); + uint32_t packed_arm64 = + StackMap::PackNativePc(kArm64InstructionAlignment, InstructionSet::kArm64); + uint32_t packed_x86 = + StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86); + uint32_t packed_x86_64 = + StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64); + uint32_t packed_mips = + StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips); + uint32_t packed_mips64 = + StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64); + EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2), + kThumb2InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64), + kArm64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86, InstructionSet::kX86), + kX86InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64), + kX86_64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips), + kMipsInstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64), + kMips64InstructionAlignment); } TEST(StackMapTest, TestDeduplicateStackMask) { diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index fad7729956..1b43618538 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -409,7 +409,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) { // Main algorithm methods. // -void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const { DCHECK(exits->empty()); for (uint32_t block_id : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(block_id); @@ -521,6 +521,113 @@ void SuperblockCloner::ResolveDataFlow() { } // +// Helpers for live-outs processing and Subgraph-closed SSA. +// + +bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const { + DCHECK(live_outs->empty()); + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + DCHECK(instr->IsClonable()); + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + live_outs->FindOrAdd(instr, instr); + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable()) { + return false; + } + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input. + if (instr->IsLoadClass()) { + return false; + } + live_outs->FindOrAdd(instr, instr); + } + } + } + return true; +} + +void SuperblockCloner::ConstructSubgraphClosedSSA() { + if (live_outs_.empty()) { + return; + } + + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + if (exits.empty()) { + DCHECK(live_outs_.empty()); + return; + } + + DCHECK_EQ(exits.size(), 1u); + HBasicBlock* exit_block = exits[0]; + // There should be no critical edges. + DCHECK_EQ(exit_block->GetPredecessors().size(), 1u); + DCHECK(exit_block->GetPhis().IsEmpty()); + + // For each live-out value insert a phi into the loop exit and replace all the value's uses + // external to the loop with this phi. The phi will have the original value as its only input; + // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the + // original value as the second input thus merging data flow from the original and copy parts of + // the subgraph. Also update the record in the live_outs_ map from (value, value) to + // (value, new_phi). + for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) { + HInstruction* value = live_out_it->first; + HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); + + if (value->GetType() == DataType::Type::kReference) { + phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); + } + + exit_block->AddPhi(phi); + live_out_it->second = phi; + + const HUseList<HInstruction*>& uses = value->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (!IsInOrigBBSet(user->GetBlock())) { + user->ReplaceInput(phi, index); + } + } + + const HUseList<HEnvironment*>& env_uses = value->GetEnvUses(); + for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) { + HEnvironment* env = it->GetUser(); + size_t index = it->GetIndex(); + ++it; + if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) { + env->ReplaceInput(phi, index); + } + } + + phi->AddInput(value); + } +} + +void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() { + for (auto it : live_outs_) { + DCHECK(it.first != it.second); + HInstruction* orig_value = it.first; + HPhi* phi = it.second->AsPhi(); + HInstruction* copy_value = GetInstrCopy(orig_value); + // Copy edges are inserted after the original so we can just add new input to the phi. + phi->AddInput(copy_value); + } +} + +// // Debug and logging methods. // @@ -644,7 +751,6 @@ void DumpBBSet(const ArenaBitVector* set) { } void SuperblockCloner::DumpInputSets() { - std::cout << graph_->PrettyMethod() << "\n"; std::cout << "orig_bb_set:\n"; for (uint32_t idx : orig_bb_set_.Indexes()) { std::cout << idx << "\n"; @@ -680,7 +786,9 @@ SuperblockCloner::SuperblockCloner(HGraph* graph, bb_map_(bb_map), hir_map_(hir_map), outer_loop_(nullptr), - outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + live_outs_(std::less<HInstruction*>(), + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) { orig_bb_set_.Copy(orig_bb_set); } @@ -699,26 +807,19 @@ bool SuperblockCloner::IsSubgraphClonable() const { return false; } - // Check that there are no instructions defined in the subgraph and used outside. - // TODO: Improve this by accepting graph with such uses but only one exit. - for (uint32_t idx : orig_bb_set_.Indexes()) { - HBasicBlock* block = GetBlockById(idx); + HInstructionMap live_outs( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { - return false; - } - } + if (!CollectLiveOutsAndCheckClonable(&live_outs)) { + return false; + } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { - return false; - } - } + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // The only loops with live-outs which are currently supported are loops with a single exit. + if (!live_outs.empty() && exits.size() != 1) { + return false; } return true; @@ -794,8 +895,10 @@ void SuperblockCloner::Run() { DumpInputSets(); } + CollectLiveOutsAndCheckClonable(&live_outs_); // Find an area in the graph for which control flow information should be adjusted. FindAndSetLocalAreaForAdjustments(); + ConstructSubgraphClosedSSA(); // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be // adjusted. CloneBasicBlocks(); @@ -819,6 +922,7 @@ void SuperblockCloner::Run() { AdjustControlFlowInfo(); // Fix data flow of the graph. ResolveDataFlow(); + FixSubgraphClosedSSAAfterCloning(); } void SuperblockCloner::CleanUp() { @@ -985,8 +1089,14 @@ HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { HBasicBlock* loop_header = loop_info_->GetHeader(); // Check that loop info is up-to-date. DCHECK(loop_info_ == loop_header->GetLoopInformation()); - HGraph* graph = loop_header->GetGraph(); + + if (kSuperblockClonerLogging) { + std::cout << "Method: " << graph->PrettyMethod() << std::endl; + std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") << + " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl; + } + ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index e0931674cb..f21172131b 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -218,7 +218,7 @@ class SuperblockCloner : public ValueObject { private: // Fills the 'exits' vector with the subgraph exits. - void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; // Finds and records information about the area in the graph for which control flow (back edges, // loops, dominators) needs to be adjusted. @@ -240,6 +240,33 @@ class SuperblockCloner : public ValueObject { void ResolveDataFlow(); // + // Helpers for live-outs processing and Subgraph-closed SSA. + // + // - live-outs - values which are defined inside the subgraph and have uses outside. + // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph + // have no outside uses except for the phi-nodes in the subgraph exits. + // + // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this + // makes the subgraph-closed SSA form construction much easier. + // + // TODO: Support subgraphs with live-outs and multiple exits. + // + + // For each live-out value 'val' in the region puts a record <val, val> into the map. + // Returns whether all of the instructions in the subgraph are clonable. + bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; + + // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. + // + // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates + // the record in the map to <val, phi> and replaces all outside uses with this phi. + void ConstructSubgraphClosedSSA(); + + // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding + // (<val, phi>) phi after the cloning is done. + void FixSubgraphClosedSSAAfterCloning(); + + // // Helpers for CloneBasicBlock. // @@ -316,6 +343,8 @@ class SuperblockCloner : public ValueObject { HLoopInformation* outer_loop_; HBasicBlockSet outer_loop_bb_set_; + HInstructionMap live_outs_; + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); |