summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc7
-rw-r--r--compiler/optimizing/code_generator_mips.cc3
-rw-r--r--compiler/optimizing/code_generator_mips64.cc3
-rw-r--r--compiler/optimizing/inliner.cc8
-rw-r--r--compiler/optimizing/instruction_simplifier.cc4
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc2
-rw-r--r--compiler/optimizing/nodes.cc21
-rw-r--r--compiler/optimizing/nodes.h5
-rw-r--r--compiler/optimizing/optimizing_compiler.cc4
-rw-r--r--compiler/optimizing/reference_type_propagation.cc23
-rw-r--r--compiler/optimizing/stack_map_stream.cc197
-rw-r--r--compiler/optimizing/stack_map_stream.h27
-rw-r--r--compiler/optimizing/stack_map_test.cc271
-rw-r--r--compiler/optimizing/superblock_cloner.cc154
-rw-r--r--compiler/optimizing/superblock_cloner.h31
15 files changed, 406 insertions, 354 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index de1be5b871..b358bfabe0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1161,8 +1161,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
// last emitted is different than the native pc of the stack map just emitted.
size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps();
if (number_of_stack_maps > 1) {
- DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset,
- stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset);
+ DCHECK_NE(stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 1),
+ stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 2));
}
}
}
@@ -1174,8 +1174,7 @@ bool CodeGenerator::HasStackMapAtCurrentPc() {
if (count == 0) {
return false;
}
- CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset;
- return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
+ return stack_map_stream->GetStackMapNativePcOffset(count - 1) == pc;
}
void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 7f3441fdf4..8be84a15bd 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1042,8 +1042,7 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
// Adjust native pc offsets in stack maps.
StackMapStream* stack_map_stream = GetStackMapStream();
for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
- uint32_t old_position =
- stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips);
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
uint32_t new_position = __ GetAdjustedPosition(old_position);
DCHECK_GE(new_position, old_position);
stack_map_stream->SetStackMapNativePcOffset(i, new_position);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index ee32b96daf..cd9e0e521e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -988,8 +988,7 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
// Adjust native pc offsets in stack maps.
StackMapStream* stack_map_stream = GetStackMapStream();
for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
- uint32_t old_position =
- stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64);
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
uint32_t new_position = __ GetAdjustedPosition(old_position);
DCHECK_GE(new_position, old_position);
stack_map_stream->SetStackMapNativePcOffset(i, new_position);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ffa000e34e..6900cd883a 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -20,6 +20,7 @@
#include "base/enums.h"
#include "builder.h"
#include "class_linker.h"
+#include "class_root.h"
#include "constant_folding.h"
#include "data_type-inl.h"
#include "dead_code_elimination.h"
@@ -537,7 +538,7 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
mirror::ObjectArray<mirror::Class>::Alloc(
self,
- class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+ GetClassRoot<mirror::ObjectArray<mirror::Class>>(class_linker),
InlineCache::kIndividualCacheSize));
if (inline_cache == nullptr) {
// We got an OOME. Just clear the exception, and don't inline.
@@ -777,7 +778,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
HInstruction* receiver,
uint32_t dex_pc) const {
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>(class_linker)->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet(
receiver,
@@ -2120,9 +2121,8 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
return true;
} else if (return_replacement->IsInstanceFieldGet()) {
HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet();
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
if (field_get->GetFieldInfo().GetField() ==
- class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) {
+ GetClassRoot<mirror::Object>()->GetInstanceField(0)) {
return true;
}
}
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index ca84d421a7..63704a470e 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -18,6 +18,7 @@
#include "art_method-inl.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "data_type-inl.h"
#include "escape.h"
#include "intrinsics.h"
@@ -1563,8 +1564,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
{
ScopedObjectAccess soa(Thread::Current());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
if (field_get->GetFieldInfo().GetField() != field) {
return false;
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 5287b4b2fa..fecf1ccbfa 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -25,7 +25,7 @@
#include "mirror/array-inl.h"
#include "mirror/object_array-inl.h"
#include "mirror/reference.h"
-#include "mirror/string.h"
+#include "mirror/string-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 7f78dc257e..ef8a757ad0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -22,6 +22,7 @@
#include "base/bit_vector-inl.h"
#include "base/stl_util.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "code_generator.h"
#include "common_dominator.h"
#include "intrinsics.h"
@@ -40,9 +41,8 @@ static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD ==
void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) {
ScopedObjectAccess soa(Thread::Current());
// Create the inexact Object reference type and store it in the HGraph.
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
inexact_object_rti_ = ReferenceTypeInfo::Create(
- handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
+ handles->NewHandle(GetClassRoot<mirror::Object>()),
/* is_exact */ false);
}
@@ -1121,6 +1121,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const {
user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
}
+void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) {
+ const HUserRecord<HEnvironment*>& env_use_record = vregs_[index];
+ HInstruction* orig_instr = env_use_record.GetInstruction();
+
+ DCHECK(orig_instr != replacement);
+
+ HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode();
+ // Note: fixup_end remains valid across splice_after().
+ auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin()
+ : ++replacement->env_uses_.begin();
+ replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(),
+ env_use_record.GetInstruction()->env_uses_,
+ before_use_node);
+ replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end);
+ orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node);
+}
+
HInstruction* HInstruction::GetNextDisregardingMoves() const {
HInstruction* next = GetNext();
while (next != nullptr && next->IsParallelMove()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 09d9c57a33..3fd5b6b02d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1909,6 +1909,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
void RemoveAsUserOfInput(size_t index) const;
+ // Replaces the input at the position 'index' with the replacement; the replacement and old
+ // input instructions' env_uses_ lists are adjusted. The function works similar to
+ // HInstruction::ReplaceInput.
+ void ReplaceInput(HInstruction* replacement, size_t index);
+
size_t Size() const { return vregs_.size(); }
HEnvironment* GetParent() const { return parent_; }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c4977decd9..79ac6b9b9d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -31,6 +31,7 @@
#include "base/scoped_arena_allocator.h"
#include "base/timing_logger.h"
#include "builder.h"
+#include "class_root.h"
#include "code_generator.h"
#include "compiled_method.h"
#include "compiler.h"
@@ -1309,13 +1310,12 @@ bool OptimizingCompiler::JitCompile(Thread* self,
size_t method_info_size = 0;
codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
size_t number_of_roots = codegen->GetNumberOfJitRoots();
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
// We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
// will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is
// executed, this array is not needed.
Handle<mirror::ObjectArray<mirror::Object>> roots(
hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc(
- self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots)));
+ self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(), number_of_roots)));
if (roots == nullptr) {
// Out of memory, just clear the exception to avoid any Java exception uncaught problems.
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index ecfa790b91..f3fe62561f 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -22,6 +22,7 @@
#include "base/scoped_arena_containers.h"
#include "base/enums.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "handle_scope-inl.h"
#include "mirror/class-inl.h"
#include "mirror/dex_cache.h"
@@ -40,43 +41,40 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
}
static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles,
- ClassLinker::ClassRoot class_root,
+ ClassRoot class_root,
ReferenceTypeInfo::TypeHandle* cache) {
if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
// Mutator lock is required for NewHandle.
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
ScopedObjectAccess soa(Thread::Current());
- *cache = handles->NewHandle(linker->GetClassRoot(class_root));
+ *cache = handles->NewHandle(GetClassRoot(class_root));
}
return *cache;
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangObject, &object_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangClass, &class_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodHandleClassHandle() {
return GetRootHandle(handles_,
- ClassLinker::kJavaLangInvokeMethodHandleImpl,
+ ClassRoot::kJavaLangInvokeMethodHandleImpl,
&method_handle_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodTypeClassHandle() {
- return GetRootHandle(handles_,
- ClassLinker::kJavaLangInvokeMethodType,
- &method_type_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangInvokeMethodType, &method_type_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangString, &string_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangThrowable, &throwable_class_handle_);
}
class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
@@ -341,8 +339,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
{
ScopedObjectAccess soa(Thread::Current());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
if (field_get->GetFieldInfo().GetField() != field) {
return;
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index aa28c8b500..b1dcb68415 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -25,6 +25,14 @@
namespace art {
+uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) {
+ return StackMap::UnpackNativePc(stack_maps_[i].packed_native_pc, instruction_set_);
+}
+
+void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
+ stack_maps_[i].packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_);
+}
+
void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
uint32_t native_pc_offset,
uint32_t register_mask,
@@ -33,7 +41,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
uint8_t inlining_depth) {
DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
current_entry_.dex_pc = dex_pc;
- current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_);
+ current_entry_.packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_);
current_entry_.register_mask = register_mask;
current_entry_.sp_mask = sp_mask;
current_entry_.inlining_depth = inlining_depth;
@@ -48,10 +56,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
}
- if (sp_mask != nullptr) {
- stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
- }
-
current_dex_register_ = 0;
}
@@ -199,9 +203,6 @@ static MemoryRegion EncodeMemoryRegion(Vector* out, size_t* bit_offset, uint32_t
return region;
}
-template<uint32_t NumColumns>
-using ScopedBitTableBuilder = BitTableBuilder<NumColumns, ScopedArenaAllocatorAdapter<uint32_t>>;
-
size_t StackMapStream::PrepareForFillIn() {
size_t bit_offset = 0;
out_.clear();
@@ -220,27 +221,36 @@ size_t StackMapStream::PrepareForFillIn() {
PrepareMethodIndices();
// Dedup stack masks. Needs to be done first as it modifies the stack map entry.
- size_t stack_mask_bits = stack_mask_max_ + 1; // Need room for max element too.
- size_t num_stack_masks = PrepareStackMasks(stack_mask_bits);
+ BitmapTableBuilder stack_mask_builder(allocator_);
+ for (StackMapEntry& stack_map : stack_maps_) {
+ BitVector* mask = stack_map.sp_mask;
+ size_t num_bits = (mask != nullptr) ? mask->GetNumberOfBits() : 0;
+ if (num_bits != 0) {
+ stack_map.stack_mask_index = stack_mask_builder.Dedup(mask->GetRawStorage(), num_bits);
+ } else {
+ stack_map.stack_mask_index = StackMap::kNoValue;
+ }
+ }
// Dedup register masks. Needs to be done first as it modifies the stack map entry.
- size_t num_register_masks = PrepareRegisterMasks();
-
- // Write dex register maps.
- MemoryRegion dex_register_map_region =
- EncodeMemoryRegion(&out_, &bit_offset, dex_register_map_bytes * kBitsPerByte);
- for (DexRegisterMapEntry& entry : dex_register_entries_) {
- size_t entry_size = entry.ComputeSize(location_catalog_entries_.size());
- if (entry_size != 0) {
- DexRegisterMap dex_register_map(
- dex_register_map_region.Subregion(entry.offset, entry_size));
- FillInDexRegisterMap(dex_register_map,
- entry.num_dex_registers,
- *entry.live_dex_registers_mask,
- entry.locations_start_index);
+ BitTableBuilder<std::array<uint32_t, RegisterMask::kCount>> register_mask_builder(allocator_);
+ for (StackMapEntry& stack_map : stack_maps_) {
+ uint32_t register_mask = stack_map.register_mask;
+ if (register_mask != 0) {
+ uint32_t shift = LeastSignificantBit(register_mask);
+ std::array<uint32_t, RegisterMask::kCount> entry = {
+ register_mask >> shift,
+ shift,
+ };
+ stack_map.register_mask_index = register_mask_builder.Dedup(&entry);
+ } else {
+ stack_map.register_mask_index = StackMap::kNoValue;
}
}
+ // Allocate space for dex register maps.
+ EncodeMemoryRegion(&out_, &bit_offset, dex_register_map_bytes * kBitsPerByte);
+
// Write dex register catalog.
EncodeVarintBits(&out_, &bit_offset, location_catalog_entries_.size());
size_t location_catalog_bytes = ComputeDexRegisterLocationCatalogSize();
@@ -258,20 +268,21 @@ size_t StackMapStream::PrepareForFillIn() {
DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
// Write stack maps.
- ScopedArenaAllocatorAdapter<void> adapter = allocator_->Adapter(kArenaAllocStackMapStream);
- ScopedBitTableBuilder<StackMap::Field::kCount> stack_map_builder((adapter));
- ScopedBitTableBuilder<InvokeInfo::Field::kCount> invoke_info_builder((adapter));
- ScopedBitTableBuilder<InlineInfo::Field::kCount> inline_info_builder((adapter));
+ BitTableBuilder<std::array<uint32_t, StackMap::kCount>> stack_map_builder(allocator_);
+ BitTableBuilder<std::array<uint32_t, InvokeInfo::kCount>> invoke_info_builder(allocator_);
+ BitTableBuilder<std::array<uint32_t, InlineInfo::kCount>> inline_info_builder(allocator_);
for (const StackMapEntry& entry : stack_maps_) {
if (entry.dex_method_index != dex::kDexNoIndex) {
- invoke_info_builder.AddRow(
- entry.native_pc_code_offset.CompressedValue(),
+ std::array<uint32_t, InvokeInfo::kCount> invoke_info_entry {
+ entry.packed_native_pc,
entry.invoke_type,
- entry.dex_method_index_idx);
+ entry.dex_method_index_idx
+ };
+ invoke_info_builder.Add(invoke_info_entry);
}
// Set the inlining info.
- uint32_t inline_info_index = StackMap::kNoValue;
+ uint32_t inline_info_index = inline_info_builder.size();
DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
@@ -281,52 +292,30 @@ size_t StackMapStream::PrepareForFillIn() {
method_index_idx = High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method));
extra_data = Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method));
}
- uint32_t index = inline_info_builder.AddRow(
+ std::array<uint32_t, InlineInfo::kCount> inline_info_entry {
(depth == entry.inlining_depth - 1) ? InlineInfo::kLast : InlineInfo::kMore,
method_index_idx,
inline_entry.dex_pc,
extra_data,
- dex_register_entries_[inline_entry.dex_register_map_index].offset);
- if (depth == 0) {
- inline_info_index = index;
- }
+ dex_register_entries_[inline_entry.dex_register_map_index].offset,
+ };
+ inline_info_builder.Add(inline_info_entry);
}
- stack_map_builder.AddRow(
- entry.native_pc_code_offset.CompressedValue(),
+ std::array<uint32_t, StackMap::kCount> stack_map_entry {
+ entry.packed_native_pc,
entry.dex_pc,
dex_register_entries_[entry.dex_register_map_index].offset,
- inline_info_index,
+ entry.inlining_depth != 0 ? inline_info_index : InlineInfo::kNoValue,
entry.register_mask_index,
- entry.stack_mask_index);
+ entry.stack_mask_index,
+ };
+ stack_map_builder.Add(stack_map_entry);
}
stack_map_builder.Encode(&out_, &bit_offset);
invoke_info_builder.Encode(&out_, &bit_offset);
inline_info_builder.Encode(&out_, &bit_offset);
-
- // Write register masks table.
- ScopedBitTableBuilder<1> register_mask_builder((adapter));
- for (size_t i = 0; i < num_register_masks; ++i) {
- register_mask_builder.AddRow(register_masks_[i]);
- }
register_mask_builder.Encode(&out_, &bit_offset);
-
- // Write stack masks table.
- EncodeVarintBits(&out_, &bit_offset, stack_mask_bits);
- out_.resize(BitsToBytesRoundUp(bit_offset + stack_mask_bits * num_stack_masks));
- BitMemoryRegion stack_mask_region(MemoryRegion(out_.data(), out_.size()),
- bit_offset,
- stack_mask_bits * num_stack_masks);
- if (stack_mask_bits > 0) {
- for (size_t i = 0; i < num_stack_masks; ++i) {
- size_t stack_mask_bytes = BitsToBytesRoundUp(stack_mask_bits);
- BitMemoryRegion src(MemoryRegion(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes));
- BitMemoryRegion dst = stack_mask_region.Subregion(i * stack_mask_bits, stack_mask_bits);
- for (size_t bit_index = 0; bit_index < stack_mask_bits; bit_index += BitSizeOf<uint32_t>()) {
- size_t num_bits = std::min<size_t>(stack_mask_bits - bit_index, BitSizeOf<uint32_t>());
- dst.StoreBits(bit_index, src.LoadBits(bit_index, num_bits), num_bits);
- }
- }
- }
+ stack_mask_builder.Encode(&out_, &bit_offset);
return UnsignedLeb128Size(out_.size()) + out_.size();
}
@@ -339,6 +328,22 @@ void StackMapStream::FillInCodeInfo(MemoryRegion region) {
uint8_t* ptr = EncodeUnsignedLeb128(region.begin(), out_.size());
region.CopyFromVector(ptr - region.begin(), out_);
+ // Write dex register maps.
+ CodeInfo code_info(region);
+ for (DexRegisterMapEntry& entry : dex_register_entries_) {
+ size_t entry_size = entry.ComputeSize(location_catalog_entries_.size());
+ if (entry_size != 0) {
+ DexRegisterMap dex_register_map(
+ code_info.dex_register_maps_.Subregion(entry.offset, entry_size),
+ entry.num_dex_registers,
+ code_info);
+ FillInDexRegisterMap(dex_register_map,
+ entry.num_dex_registers,
+ *entry.live_dex_registers_mask,
+ entry.locations_start_index);
+ }
+ }
+
// Verify all written data in debug build.
if (kIsDebugBuild) {
CheckCodeInfo(region);
@@ -363,7 +368,6 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
dex_register_map.SetLocationCatalogEntryIndex(
index_in_dex_register_locations,
location_catalog_entry_index,
- num_dex_registers,
location_catalog_entries_.size());
}
}
@@ -420,8 +424,7 @@ bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a,
}
// Helper for CheckCodeInfo - check that register map has the expected content.
-void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
- const DexRegisterMap& dex_register_map,
+void StackMapStream::CheckDexRegisterMap(const DexRegisterMap& dex_register_map,
size_t num_dex_registers,
BitVector* live_dex_registers_mask,
size_t dex_register_locations_index) const {
@@ -438,8 +441,7 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
<< dex_register_map.IsValid() << " " << dex_register_map.IsDexRegisterLive(reg);
} else {
DCHECK(dex_register_map.IsDexRegisterLive(reg));
- DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(
- reg, num_dex_registers, code_info);
+ DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(reg);
DCHECK_EQ(expected.GetKind(), seen.GetKind());
DCHECK_EQ(expected.GetValue(), seen.GetValue());
}
@@ -449,17 +451,6 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
}
}
-size_t StackMapStream::PrepareRegisterMasks() {
- register_masks_.resize(stack_maps_.size(), 0u);
- ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- const size_t index = dedupe.size();
- stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
- register_masks_[index] = stack_map.register_mask;
- }
- return dedupe.size();
-}
-
void StackMapStream::PrepareMethodIndices() {
CHECK(method_indices_.empty());
method_indices_.resize(stack_maps_.size() + inline_infos_.size());
@@ -482,35 +473,10 @@ void StackMapStream::PrepareMethodIndices() {
method_indices_.resize(dedupe.size());
}
-
-size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
- // Preallocate memory since we do not want it to move (the dedup map will point into it).
- const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
- stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
- // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
- // when copying out from stack_masks_.
- ScopedArenaUnorderedMap<MemoryRegion,
- size_t,
- FNVHash<MemoryRegion>,
- MemoryRegion::ContentEquals> dedup(
- stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- size_t index = dedup.size();
- MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
- BitMemoryRegion stack_mask_bits(stack_mask);
- for (size_t i = 0; i < entry_size_in_bits; i++) {
- stack_mask_bits.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
- }
- stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
- }
- return dedup.size();
-}
-
// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CodeInfo code_info(region);
DCHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
- DCHECK_EQ(code_info.GetNumberOfStackMaskBits(), static_cast<uint32_t>(stack_mask_max_ + 1));
DCHECK_EQ(code_info.GetNumberOfLocationCatalogEntries(), location_catalog_entries_.size());
size_t invoke_info_index = 0;
for (size_t s = 0; s < stack_maps_.size(); ++s) {
@@ -519,33 +485,29 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
// Check main stack map fields.
DCHECK_EQ(stack_map.GetNativePcOffset(instruction_set_),
- entry.native_pc_code_offset.Uint32Value(instruction_set_));
+ StackMap::UnpackNativePc(entry.packed_native_pc, instruction_set_));
DCHECK_EQ(stack_map.GetDexPc(), entry.dex_pc);
DCHECK_EQ(stack_map.GetRegisterMaskIndex(), entry.register_mask_index);
DCHECK_EQ(code_info.GetRegisterMaskOf(stack_map), entry.register_mask);
- const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits();
DCHECK_EQ(stack_map.GetStackMaskIndex(), entry.stack_mask_index);
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
if (entry.sp_mask != nullptr) {
DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
+ for (size_t b = 0; b < stack_mask.size_in_bits(); b++) {
+ DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)) << b;
}
} else {
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), 0u);
- }
+ DCHECK_EQ(stack_mask.size_in_bits(), 0u);
}
if (entry.dex_method_index != dex::kDexNoIndex) {
InvokeInfo invoke_info = code_info.GetInvokeInfo(invoke_info_index);
DCHECK_EQ(invoke_info.GetNativePcOffset(instruction_set_),
- entry.native_pc_code_offset.Uint32Value(instruction_set_));
+ StackMap::UnpackNativePc(entry.packed_native_pc, instruction_set_));
DCHECK_EQ(invoke_info.GetInvokeType(), entry.invoke_type);
DCHECK_EQ(invoke_info.GetMethodIndexIdx(), entry.dex_method_index_idx);
invoke_info_index++;
}
- CheckDexRegisterMap(code_info,
- code_info.GetDexRegisterMapOf(
+ CheckDexRegisterMap(code_info.GetDexRegisterMapOf(
stack_map, entry.dex_register_entry.num_dex_registers),
entry.dex_register_entry.num_dex_registers,
entry.dex_register_entry.live_dex_registers_mask,
@@ -571,8 +533,7 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index);
}
- CheckDexRegisterMap(code_info,
- code_info.GetDexRegisterMapAtDepth(
+ CheckDexRegisterMap(code_info.GetDexRegisterMapAtDepth(
d,
inline_info,
inline_entry.dex_register_entry.num_dex_registers),
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index ea97cf6530..6d505b95db 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,11 +68,8 @@ class StackMapStream : public ValueObject {
location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
- stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
- register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
- stack_mask_max_(-1),
out_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
allocator->Adapter(kArenaAllocStackMapStream)),
@@ -106,7 +103,7 @@ class StackMapStream : public ValueObject {
// See runtime/stack_map.h to know what these fields contain.
struct StackMapEntry {
uint32_t dex_pc;
- CodeOffset native_pc_code_offset;
+ uint32_t packed_native_pc;
uint32_t register_mask;
BitVector* sp_mask;
uint32_t inlining_depth;
@@ -151,14 +148,8 @@ class StackMapStream : public ValueObject {
return stack_maps_.size();
}
- const StackMapEntry& GetStackMap(size_t i) const {
- return stack_maps_[i];
- }
-
- void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
- stack_maps_[i].native_pc_code_offset =
- CodeOffset::FromOffset(native_pc_offset, instruction_set_);
- }
+ uint32_t GetStackMapNativePcOffset(size_t i);
+ void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset);
// Prepares the stream to fill in a memory region. Must be called before FillIn.
// Returns the size (in bytes) needed to store this stream.
@@ -171,12 +162,6 @@ class StackMapStream : public ValueObject {
private:
size_t ComputeDexRegisterLocationCatalogSize() const;
- // Returns the number of unique stack masks.
- size_t PrepareStackMasks(size_t entry_size_in_bits);
-
- // Returns the number of unique register masks.
- size_t PrepareRegisterMasks();
-
// Prepare and deduplicate method indices.
void PrepareMethodIndices();
@@ -193,8 +178,7 @@ class StackMapStream : public ValueObject {
const BitVector& live_dex_registers_mask,
uint32_t start_index_in_dex_register_locations) const;
- void CheckDexRegisterMap(const CodeInfo& code_info,
- const DexRegisterMap& dex_register_map,
+ void CheckDexRegisterMap(const DexRegisterMap& dex_register_map,
size_t num_dex_registers,
BitVector* live_dex_registers_mask,
size_t dex_register_locations_index) const;
@@ -217,11 +201,8 @@ class StackMapStream : public ValueObject {
// A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
ScopedArenaVector<size_t> dex_register_locations_;
ScopedArenaVector<InlineInfoEntry> inline_infos_;
- ScopedArenaVector<uint8_t> stack_masks_;
- ScopedArenaVector<uint32_t> register_masks_;
ScopedArenaVector<uint32_t> method_indices_;
ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_;
- int stack_mask_max_;
ScopedArenaVector<uint8_t> out_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 9db7588b3a..112771847c 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -32,10 +32,10 @@ static bool CheckStackMask(
const StackMap& stack_map,
const BitVector& bit_vector) {
BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
- if (bit_vector.GetNumberOfBits() > code_info.GetNumberOfStackMaskBits()) {
+ if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) {
return false;
}
- for (size_t i = 0; i < code_info.GetNumberOfStackMaskBits(); ++i) {
+ for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
}
@@ -96,22 +96,15 @@ TEST(StackMapTest, Test1) {
size_t expected_dex_register_map_size = 1u + 1u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(0u, index0);
ASSERT_EQ(1u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -211,22 +204,15 @@ TEST(StackMapTest, Test2) {
size_t expected_dex_register_map_size = 1u + 1u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(0));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(0u, index0);
ASSERT_EQ(1u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -270,23 +256,15 @@ TEST(StackMapTest, Test2) {
size_t expected_dex_register_map_size = 1u + 1u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(18, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(3, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(18, dex_register_map.GetMachineRegister(0));
+ ASSERT_EQ(3, dex_register_map.GetMachineRegister(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(2u, index0);
ASSERT_EQ(3u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -324,23 +302,15 @@ TEST(StackMapTest, Test2) {
size_t expected_dex_register_map_size = 1u + 1u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(6, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(8, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(6, dex_register_map.GetMachineRegister(0));
+ ASSERT_EQ(8, dex_register_map.GetMachineRegister(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(4u, index0);
ASSERT_EQ(5u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -378,23 +348,15 @@ TEST(StackMapTest, Test2) {
size_t expected_dex_register_map_size = 1u + 1u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(3, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(1, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(3, dex_register_map.GetMachineRegister(0));
+ ASSERT_EQ(1, dex_register_map.GetMachineRegister(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(3u, index0); // Shared with second stack map.
ASSERT_EQ(6u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -470,20 +432,15 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
size_t expected_map_size = 1u + 1u;
ASSERT_EQ(expected_map_size, map.Size());
- ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstant,
- map.GetLocationKind(1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kInStack,
- map.GetLocationInternalKind(0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstantLargeValue,
- map.GetLocationInternalKind(1, number_of_dex_registers, code_info));
- ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info));
- ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info));
-
- const size_t index0 =
- map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries);
- const size_t index1 =
- map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kConstant, map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kInStack, map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kConstantLargeValue, map.GetLocationInternalKind(1));
+ ASSERT_EQ(0, map.GetStackOffsetInBytes(0));
+ ASSERT_EQ(-2, map.GetConstant(1));
+
+ const size_t index0 = map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ const size_t index1 = map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(0u, index0);
ASSERT_EQ(1u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -552,20 +509,14 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
size_t expected_dex_register_map_size = 1u + 0u;
ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
- ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
+ ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind(0));
+ ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(1));
+ ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind(0));
+ ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(1));
+ ASSERT_EQ(-2, dex_register_map.GetConstant(1));
+
+ size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(0, number_of_catalog_entries);
+ size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(1, number_of_catalog_entries);
ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
ASSERT_EQ(0u, index1);
DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
@@ -632,8 +583,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
StackMap stack_map0 = code_info.GetStackMapAt(0);
DexRegisterMap dex_register_map0 =
code_info.GetDexRegisterMapOf(stack_map0, number_of_dex_registers);
- ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
- number_of_catalog_entries));
+ ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_catalog_entries));
ASSERT_EQ(255u, dex_register_map0.Size());
StackMap stack_map1 = code_info.GetStackMapAt(1);
@@ -680,20 +630,20 @@ TEST(StackMapTest, TestShareDexRegisterMap) {
// Verify first stack map.
StackMap sm0 = ci.GetStackMapAt(0);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci));
- ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci));
+ ASSERT_EQ(0, dex_registers0.GetMachineRegister(0));
+ ASSERT_EQ(-2, dex_registers0.GetConstant(1));
// Verify second stack map.
StackMap sm1 = ci.GetStackMapAt(1);
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci));
- ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci));
+ ASSERT_EQ(0, dex_registers1.GetMachineRegister(0));
+ ASSERT_EQ(-2, dex_registers1.GetConstant(1));
// Verify third stack map.
StackMap sm2 = ci.GetStackMapAt(2);
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, number_of_dex_registers);
- ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci));
- ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci));
+ ASSERT_EQ(2, dex_registers2.GetMachineRegister(0));
+ ASSERT_EQ(-2, dex_registers2.GetConstant(1));
// Verify dex register map offsets.
ASSERT_EQ(sm0.GetDexRegisterMapOffset(),
@@ -833,8 +783,8 @@ TEST(StackMapTest, InlineTest) {
StackMap sm0 = ci.GetStackMapAt(0);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, 2);
- ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
- ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci));
+ ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0));
+ ASSERT_EQ(4, dex_registers0.GetConstant(1));
InlineInfo if0 = ci.GetInlineInfoOf(sm0);
ASSERT_EQ(2u, if0.GetDepth());
@@ -844,12 +794,12 @@ TEST(StackMapTest, InlineTest) {
ASSERT_TRUE(if0.EncodesArtMethodAtDepth(1));
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, 1);
- ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci));
+ ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0));
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, 3);
- ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci));
- ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci));
- ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci));
+ ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0));
+ ASSERT_EQ(20, dex_registers2.GetConstant(1));
+ ASSERT_EQ(15, dex_registers2.GetMachineRegister(2));
}
{
@@ -857,8 +807,8 @@ TEST(StackMapTest, InlineTest) {
StackMap sm1 = ci.GetStackMapAt(1);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, 2);
- ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
- ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci));
+ ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0));
+ ASSERT_EQ(0, dex_registers0.GetConstant(1));
InlineInfo if1 = ci.GetInlineInfoOf(sm1);
ASSERT_EQ(3u, if1.GetDepth());
@@ -870,12 +820,12 @@ TEST(StackMapTest, InlineTest) {
ASSERT_TRUE(if1.EncodesArtMethodAtDepth(2));
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, 1);
- ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci));
+ ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0));
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, 3);
- ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci));
- ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci));
- ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci));
+ ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0));
+ ASSERT_EQ(10, dex_registers2.GetConstant(1));
+ ASSERT_EQ(5, dex_registers2.GetMachineRegister(2));
ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(2));
}
@@ -886,7 +836,7 @@ TEST(StackMapTest, InlineTest) {
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, 2);
ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0));
- ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci));
+ ASSERT_EQ(4, dex_registers0.GetConstant(1));
ASSERT_FALSE(sm2.HasInlineInfo());
}
@@ -895,8 +845,8 @@ TEST(StackMapTest, InlineTest) {
StackMap sm3 = ci.GetStackMapAt(3);
DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, 2);
- ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci));
- ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci));
+ ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0));
+ ASSERT_EQ(0, dex_registers0.GetConstant(1));
InlineInfo if2 = ci.GetInlineInfoOf(sm3);
ASSERT_EQ(3u, if2.GetDepth());
@@ -910,34 +860,39 @@ TEST(StackMapTest, InlineTest) {
ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(0));
DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, 1);
- ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci));
+ ASSERT_EQ(2, dex_registers1.GetMachineRegister(0));
DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, 2);
ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0));
- ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci));
+ ASSERT_EQ(3, dex_registers2.GetMachineRegister(1));
}
}
-TEST(StackMapTest, CodeOffsetTest) {
- // Test minimum alignments, and decoding.
- CodeOffset offset_thumb2 =
- CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2);
- CodeOffset offset_arm64 =
- CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64);
- CodeOffset offset_x86 =
- CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86);
- CodeOffset offset_x86_64 =
- CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64);
- CodeOffset offset_mips =
- CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips);
- CodeOffset offset_mips64 =
- CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64);
- EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment);
- EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment);
- EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment);
- EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment);
- EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment);
- EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment);
+TEST(StackMapTest, PackedNativePcTest) {
+ uint32_t packed_thumb2 =
+ StackMap::PackNativePc(kThumb2InstructionAlignment, InstructionSet::kThumb2);
+ uint32_t packed_arm64 =
+ StackMap::PackNativePc(kArm64InstructionAlignment, InstructionSet::kArm64);
+ uint32_t packed_x86 =
+ StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86);
+ uint32_t packed_x86_64 =
+ StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64);
+ uint32_t packed_mips =
+ StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips);
+ uint32_t packed_mips64 =
+ StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2),
+ kThumb2InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64),
+ kArm64InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_x86, InstructionSet::kX86),
+ kX86InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64),
+ kX86_64InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips),
+ kMipsInstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64),
+ kMips64InstructionAlignment);
}
TEST(StackMapTest, TestDeduplicateStackMask) {
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index fad7729956..1b43618538 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -409,7 +409,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) {
// Main algorithm methods.
//
-void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const {
DCHECK(exits->empty());
for (uint32_t block_id : orig_bb_set_.Indexes()) {
HBasicBlock* block = GetBlockById(block_id);
@@ -521,6 +521,113 @@ void SuperblockCloner::ResolveDataFlow() {
}
//
+// Helpers for live-outs processing and Subgraph-closed SSA.
+//
+
+bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const {
+ DCHECK(live_outs->empty());
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ DCHECK(instr->IsClonable());
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable()) {
+ return false;
+ }
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input.
+ if (instr->IsLoadClass()) {
+ return false;
+ }
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+ }
+ return true;
+}
+
+void SuperblockCloner::ConstructSubgraphClosedSSA() {
+ if (live_outs_.empty()) {
+ return;
+ }
+
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+ if (exits.empty()) {
+ DCHECK(live_outs_.empty());
+ return;
+ }
+
+ DCHECK_EQ(exits.size(), 1u);
+ HBasicBlock* exit_block = exits[0];
+ // There should be no critical edges.
+ DCHECK_EQ(exit_block->GetPredecessors().size(), 1u);
+ DCHECK(exit_block->GetPhis().IsEmpty());
+
+ // For each live-out value insert a phi into the loop exit and replace all the value's uses
+ // external to the loop with this phi. The phi will have the original value as its only input;
+ // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the
+ // original value as the second input thus merging data flow from the original and copy parts of
+ // the subgraph. Also update the record in the live_outs_ map from (value, value) to
+ // (value, new_phi).
+ for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) {
+ HInstruction* value = live_out_it->first;
+ HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
+
+ if (value->GetType() == DataType::Type::kReference) {
+ phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+ }
+
+ exit_block->AddPhi(phi);
+ live_out_it->second = phi;
+
+ const HUseList<HInstruction*>& uses = value->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (!IsInOrigBBSet(user->GetBlock())) {
+ user->ReplaceInput(phi, index);
+ }
+ }
+
+ const HUseList<HEnvironment*>& env_uses = value->GetEnvUses();
+ for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) {
+ HEnvironment* env = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it;
+ if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) {
+ env->ReplaceInput(phi, index);
+ }
+ }
+
+ phi->AddInput(value);
+ }
+}
+
+void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() {
+ for (auto it : live_outs_) {
+ DCHECK(it.first != it.second);
+ HInstruction* orig_value = it.first;
+ HPhi* phi = it.second->AsPhi();
+ HInstruction* copy_value = GetInstrCopy(orig_value);
+ // Copy edges are inserted after the original so we can just add new input to the phi.
+ phi->AddInput(copy_value);
+ }
+}
+
+//
// Debug and logging methods.
//
@@ -644,7 +751,6 @@ void DumpBBSet(const ArenaBitVector* set) {
}
void SuperblockCloner::DumpInputSets() {
- std::cout << graph_->PrettyMethod() << "\n";
std::cout << "orig_bb_set:\n";
for (uint32_t idx : orig_bb_set_.Indexes()) {
std::cout << idx << "\n";
@@ -680,7 +786,9 @@ SuperblockCloner::SuperblockCloner(HGraph* graph,
bb_map_(bb_map),
hir_map_(hir_map),
outer_loop_(nullptr),
- outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+ outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+ live_outs_(std::less<HInstruction*>(),
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) {
orig_bb_set_.Copy(orig_bb_set);
}
@@ -699,26 +807,19 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return false;
}
- // Check that there are no instructions defined in the subgraph and used outside.
- // TODO: Improve this by accepting graph with such uses but only one exit.
- for (uint32_t idx : orig_bb_set_.Indexes()) {
- HBasicBlock* block = GetBlockById(idx);
+ HInstructionMap live_outs(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
- return false;
- }
- }
+ if (!CollectLiveOutsAndCheckClonable(&live_outs)) {
+ return false;
+ }
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
- return false;
- }
- }
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+
+ // The only loops with live-outs which are currently supported are loops with a single exit.
+ if (!live_outs.empty() && exits.size() != 1) {
+ return false;
}
return true;
@@ -794,8 +895,10 @@ void SuperblockCloner::Run() {
DumpInputSets();
}
+ CollectLiveOutsAndCheckClonable(&live_outs_);
// Find an area in the graph for which control flow information should be adjusted.
FindAndSetLocalAreaForAdjustments();
+ ConstructSubgraphClosedSSA();
// Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
// adjusted.
CloneBasicBlocks();
@@ -819,6 +922,7 @@ void SuperblockCloner::Run() {
AdjustControlFlowInfo();
// Fix data flow of the graph.
ResolveDataFlow();
+ FixSubgraphClosedSSAAfterCloning();
}
void SuperblockCloner::CleanUp() {
@@ -985,8 +1089,14 @@ HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
HBasicBlock* loop_header = loop_info_->GetHeader();
// Check that loop info is up-to-date.
DCHECK(loop_info_ == loop_header->GetLoopInformation());
-
HGraph* graph = loop_header->GetGraph();
+
+ if (kSuperblockClonerLogging) {
+ std::cout << "Method: " << graph->PrettyMethod() << std::endl;
+ std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
+ " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+ }
+
ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index e0931674cb..f21172131b 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -218,7 +218,7 @@ class SuperblockCloner : public ValueObject {
private:
// Fills the 'exits' vector with the subgraph exits.
- void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+ void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
// Finds and records information about the area in the graph for which control flow (back edges,
// loops, dominators) needs to be adjusted.
@@ -240,6 +240,33 @@ class SuperblockCloner : public ValueObject {
void ResolveDataFlow();
//
+ // Helpers for live-outs processing and Subgraph-closed SSA.
+ //
+ // - live-outs - values which are defined inside the subgraph and have uses outside.
+ // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
+ // have no outside uses except for the phi-nodes in the subgraph exits.
+ //
+ // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
+ // makes the subgraph-closed SSA form construction much easier.
+ //
+ // TODO: Support subgraphs with live-outs and multiple exits.
+ //
+
+ // For each live-out value 'val' in the region puts a record <val, val> into the map.
+ // Returns whether all of the instructions in the subgraph are clonable.
+ bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
+
+ // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
+ //
+ // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
+ // the record in the map to <val, phi> and replaces all outside uses with this phi.
+ void ConstructSubgraphClosedSSA();
+
+ // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
+ // (<val, phi>) phi after the cloning is done.
+ void FixSubgraphClosedSSAAfterCloning();
+
+ //
// Helpers for CloneBasicBlock.
//
@@ -316,6 +343,8 @@ class SuperblockCloner : public ValueObject {
HLoopInformation* outer_loop_;
HBasicBlockSet outer_loop_bb_set_;
+ HInstructionMap live_outs_;
+
ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);