diff options
Diffstat (limited to 'compiler')
55 files changed, 3446 insertions, 1362 deletions
diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index 6d8a7dab2b..b1f5d870d4 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -1003,7 +1003,6 @@ bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() { vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg)); } } - unused_vregs_->Union(vregs_to_kill_); for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) { MIRData* data = vreg_chains_.GetMIRData(*it); DCHECK(!data->must_keep); @@ -1012,6 +1011,10 @@ bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() { KillMIR(data); } + // Each dependent register not in vregs_to_kill_ is either already marked unused or + // it's one word of a wide register where the other word has been overwritten. + unused_vregs_->UnionIfNotIn(dependent_vregs_, vregs_to_kill_); + vreg_chains_.RemoveTrailingNops(); return true; } diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc index de591d0edb..461c844a60 100644 --- a/compiler/dex/gvn_dead_code_elimination_test.cc +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -137,6 +137,8 @@ class GvnDeadCodeEliminationTest : public testing::Test { { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } } #define DEF_BINOP(bb, opcode, result, src1, src2) \ { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } } +#define DEF_BINOP_WIDE(bb, opcode, result, src1, src2) \ + { bb, opcode, 0u, 0u, 4, { src1, src1 + 1, src2, src2 + 1 }, 2, { result, result + 1 } } void DoPrepareIFields(const IFieldDef* defs, size_t count) { cu_.mir_graph->ifield_lowering_infos_.clear(); @@ -1936,7 +1938,7 @@ TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) { DEF_CONST(3, Instruction::CONST, 0u, 1000u), DEF_MOVE(3, Instruction::MOVE, 1u, 0u), DEF_CONST(3, Instruction::CONST, 2u, 2000u), - { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u} }, + { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u } }, DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 5u, 3u), DEF_CONST(3, Instruction::CONST, 7u, 3000u), DEF_CONST(3, Instruction::CONST, 8u, 4000u), @@ -1983,4 +1985,85 @@ TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) { EXPECT_EQ(0u, int_to_long->dalvikInsn.vB); } +TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs1) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_CONST(3, Instruction::CONST, 1u, 2000u), + DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u), + DEF_CONST(3, Instruction::CONST, 3u, 1000u), // NOT killed (b/21702651). + DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u), // Killed (RecordPass) + DEF_CONST(3, Instruction::CONST, 5u, 2000u), // Killed with 9u (BackwardPass) + DEF_BINOP(3, Instruction::ADD_INT, 6u, 5u, 0u), // Killed (RecordPass) + DEF_CONST(3, Instruction::CONST, 7u, 4000u), + DEF_MOVE(3, Instruction::MOVE, 8u, 0u), // Killed with 6u (BackwardPass) + }; + + static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 0, 3, 0, 3, 4, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 7 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[2], value_names_[4]); + EXPECT_EQ(value_names_[1], value_names_[5]); + EXPECT_EQ(value_names_[2], value_names_[6]); + EXPECT_EQ(value_names_[0], value_names_[8]); + + static const bool eliminated[] = { + false, false, false, false, true, true, true, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_CONST(3, Instruction::CONST, 1u, 2000u), + DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u), + DEF_CONST(3, Instruction::CONST, 3u, 1000u), // Killed (BackwardPass; b/21702651) + DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u), // Killed (RecordPass) + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 4000u), + { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 5u, 6u }, 1, { 7u } }, + DEF_BINOP(3, Instruction::ADD_INT, 8u, 7u, 0u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 9u, 4000u), // Killed with 12u (BackwardPass) + DEF_CONST(3, Instruction::CONST, 11u, 6000u), + { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 9u, 10u }, 1, { 12u } }, // Killed with 9u (BP) + }; + + static const int32_t sreg_to_vreg_map[] = { + 2, 3, 4, 1, 4, 5, 6 /* high word */, 0, 7, 0, 1 /* high word */, 8, 0 + }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 5, 9 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 5, 6, 7, 9 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[2], value_names_[4]); + EXPECT_EQ(value_names_[5], value_names_[8]); + EXPECT_EQ(value_names_[6], value_names_[10]); + + static const bool eliminated[] = { + false, false, false, true, true, false, false, false, true, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + } // namespace art diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index cc1ba35b96..38342420ac 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -398,12 +398,13 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, DCHECK(monitor_exit->Opcode() == Instruction::MONITOR_EXIT); int monitor_reg = monitor_exit->VRegA_11x(); const Instruction* check_insn = Instruction::At(current_code_item_->insns_ + catch_offset); - DCHECK(check_insn->Opcode() == Instruction::MOVE_EXCEPTION); - if (check_insn->VRegA_11x() == monitor_reg) { - // Unexpected move-exception to the same register. Probably not the pattern we're looking for. - return false; + if (check_insn->Opcode() == Instruction::MOVE_EXCEPTION) { + if (check_insn->VRegA_11x() == monitor_reg) { + // Unexpected move-exception to the same register. Probably not the pattern we're looking for. + return false; + } + check_insn = check_insn->Next(); } - check_insn = check_insn->Next(); while (true) { int dest = -1; bool wide = false; diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index b25e967609..e0c56fcc82 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -233,11 +233,32 @@ inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer return referrer_class == fields_class; } +inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) { + // Being loaded is a pre-requisite for being initialized but let's do the cheap check first. + // + // NOTE: When AOT compiling an app, we eagerly initialize app classes (and potentially their + // super classes in the boot image) but only those that have a trivial initialization, i.e. + // without <clinit>() or static values in the dex file for that class or any of its super + // classes. So while we could see the klass as initialized during AOT compilation and have + // it only loaded at runtime, the needed initialization would have to be trivial and + // unobservable from Java, so we may as well treat it as initialized. + if (!klass->IsInitialized()) { + return false; + } + return CanAssumeClassIsLoaded(klass); +} + +inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, + mirror::Class* klass) { + return (referrer_class != nullptr && referrer_class->IsSubClass(klass)) || + CanAssumeClassIsInitialized(klass); +} + inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class, ArtField* resolved_field) { DCHECK(resolved_field->IsStatic()); mirror::Class* fields_class = resolved_field->GetDeclaringClass(); - return fields_class == referrer_class || fields_class->IsInitialized(); + return CanReferrerAssumeClassIsInitialized(referrer_class, fields_class); } inline ArtMethod* CompilerDriver::ResolveMethod( @@ -394,7 +415,7 @@ inline bool CompilerDriver::IsMethodsClassInitialized(mirror::Class* referrer_cl return true; } mirror::Class* methods_class = resolved_method->GetDeclaringClass(); - return methods_class == referrer_class || methods_class->IsInitialized(); + return CanReferrerAssumeClassIsInitialized(referrer_class, methods_class); } } // namespace art diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 22fcf87524..84b6a52bda 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -659,7 +659,8 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De bool CompilerDriver::IsImageClass(const char* descriptor) const { if (!IsImage()) { - return true; + // NOTE: Currently unreachable, all callers check IsImage(). + return false; } else { return image_classes_->find(descriptor) != image_classes_->end(); } @@ -992,6 +993,24 @@ void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { } } +bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { + Runtime* runtime = Runtime::Current(); + if (!runtime->IsAotCompiler()) { + DCHECK(runtime->UseJit()); + // Having the klass reference here implies that the klass is already loaded. + return true; + } + if (!IsImage()) { + // Assume loaded only if klass is in the boot image. App classes cannot be assumed + // loaded because we don't even know what class loader will be used to load them. + bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace(); + return class_in_image; + } + std::string temp; + const char* descriptor = klass->GetDescriptor(&temp); + return IsImageClass(descriptor); +} + bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { if (IsImage() && IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 68c905eb22..f737007308 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -501,6 +501,16 @@ class CompilerDriver { uint32_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Can we assume that the klass is initialized? + bool CanAssumeClassIsInitialized(mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Can we assume that the klass is loaded? + bool CanAssumeClassIsLoaded(mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. enum { diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 4971f0ef10..4d423d007f 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -26,11 +26,11 @@ namespace art { namespace dwarf { -constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; - // Run the tests only on host since we need objdump. #ifndef HAVE_ANDROID_OS +constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; + TEST_F(DwarfTest, DebugFrame) { const bool is64bit = false; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 32bde8e3b4..73e121f1cd 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -110,10 +110,6 @@ bool ImageWriter::PrepareImageAddressSpace() { CheckNoDexObjects(); } - if (!AllocMemory()) { - return false; - } - if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); CheckNonImageClassesRemoved(); @@ -123,6 +119,12 @@ bool ImageWriter::PrepareImageAddressSpace() { CalculateNewObjectOffsets(); Thread::Current()->TransitionFromRunnableToSuspended(kNative); + // This needs to happen after CalculateNewObjectOffsets since it relies on intern_table_bytes_ and + // bin size sums being calculated. + if (!AllocMemory()) { + return false; + } + return true; } @@ -205,7 +207,7 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image bitmap at the page aligned start of the image end. - const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); CHECK_ALIGNED(bitmap_section.Offset(), kPageSize); if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), bitmap_section.Size(), bitmap_section.Offset())) { @@ -222,26 +224,10 @@ bool ImageWriter::Write(const std::string& image_filename, return true; } -void ImageWriter::SetImageOffset(mirror::Object* object, - ImageWriter::BinSlot bin_slot, - size_t offset) { +void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) { DCHECK(object != nullptr); DCHECK_NE(offset, 0U); - mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset); - DCHECK_ALIGNED(obj, kObjectAlignment); - static size_t max_offset = 0; - max_offset = std::max(max_offset, offset); - image_bitmap_->Set(obj); // Mark the obj as mutated, since we will end up changing it. - { - // Remember the object-inside-of-the-image's hash code so we can restore it after the copy. - auto hash_it = saved_hashes_map_.find(bin_slot); - if (hash_it != saved_hashes_map_.end()) { - std::pair<BinSlot, uint32_t> slot_hash = *hash_it; - saved_hashes_.push_back(std::make_pair(obj, slot_hash.second)); - saved_hashes_map_.erase(hash_it); - } - } // The object is already deflated from when we set the bin slot. Just overwrite the lock word. object->SetLockWord(LockWord::FromForwardingAddress(offset), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); @@ -262,7 +248,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); - SetImageOffset(object, bin_slot, new_offset); + SetImageOffset(object, new_offset); DCHECK_LT(new_offset, image_end_); } @@ -302,14 +288,14 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { // No hash, don't need to save it. break; case LockWord::kHashCode: - saved_hashes_map_[bin_slot] = lw.GetHashCode(); + DCHECK(saved_hashcode_map_.find(object) == saved_hashcode_map_.end()); + saved_hashcode_map_.emplace(object, lw.GetHashCode()); break; default: LOG(FATAL) << "Unreachable."; UNREACHABLE(); } - object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)), - false); + object->SetLockWord(LockWord::FromForwardingAddress(bin_slot.Uint32Value()), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); DCHECK(IsImageBinSlotAssigned(object)); } @@ -487,11 +473,8 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { ++bin_slot_count_[bin]; - DCHECK_LT(GetBinSizeSum(), image_->Size()); - // Grow the image closer to the end by the object we just assigned. image_end_ += offset_delta; - DCHECK_LT(image_end_, image_->Size()); } bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const { @@ -535,10 +518,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const } bool ImageWriter::AllocMemory() { - auto* runtime = Runtime::Current(); - const size_t heap_size = runtime->GetHeap()->GetTotalMemory(); - // Add linear alloc usage since we need to have room for the ArtFields. - const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize); + const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_, + kPageSize); std::string error_msg; image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE, false, false, &error_msg)); @@ -547,9 +528,10 @@ bool ImageWriter::AllocMemory() { return false; } - // Create the image bitmap. - image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(), - RoundUp(length, kPageSize))); + // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. + CHECK_LE(image_end_, length); + image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( + "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize))); if (image_bitmap_.get() == nullptr) { LOG(ERROR) << "Failed to allocate memory for image bitmap"; return false; @@ -569,42 +551,6 @@ bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) { return true; } -// Collect all the java.lang.String in the heap and put them in the output strings_ array. -class StringCollector { - public: - StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index) - : strings_(strings), index_(index) { - } - static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto* collector = reinterpret_cast<StringCollector*>(arg); - if (obj->GetClass()->IsStringClass()) { - collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString()); - } - } - size_t GetIndex() const { - return index_; - } - - private: - Handle<mirror::ObjectArray<mirror::String>> strings_; - size_t index_; -}; - -// Compare strings based on length, used for sorting strings by length / reverse length. -class LexicographicalStringComparator { - public: - bool operator()(const mirror::HeapReference<mirror::String>& lhs, - const mirror::HeapReference<mirror::String>& rhs) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - mirror::String* lhs_s = lhs.AsMirrorPtr(); - mirror::String* rhs_s = rhs.AsMirrorPtr(); - uint16_t* lhs_begin = lhs_s->GetValue(); - uint16_t* rhs_begin = rhs_s->GetValue(); - return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(), - rhs_begin, rhs_begin + rhs_s->GetLength()); - } -}; - void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) { if (!obj->GetClass()->IsStringClass()) { return; @@ -769,7 +715,8 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = obj->AsString()->Intern(); + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( + obj->AsString()->Intern()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early @@ -965,7 +912,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // know where image_roots is going to end up image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment - DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; // Prepare bin slots for dex cache arrays. PrepareDexCacheArraySlots(); @@ -997,7 +943,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); @@ -1010,6 +955,11 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_slot_previous_sizes_[native_reloc.bin_type]; } + // Calculate how big the intern table will be after being serialized. + auto* const intern_table = Runtime::Current()->GetInternTable(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Note that image_end_ is left at end of used mirror object section. } @@ -1039,6 +989,10 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); + // Calculate the size of the interned strings. + auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); + cur_pos = interned_strings_section->End(); // Finally bitmap section. const size_t bitmap_bytes = image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; @@ -1046,16 +1000,19 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { cur_pos = bitmap_section->End(); if (kIsDebugBuild) { size_t idx = 0; - for (auto& section : sections) { + for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; ++idx; } LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_; } + const size_t image_end = static_cast<uint32_t>(interned_strings_section->End()); + CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) << + "Oat file should be right after the image."; // Create the header. new (image_->Begin()) ImageHeader( - PointerToLowMemUInt32(image_begin_), static_cast<uint32_t>(methods_section->End()), sections, - image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), + PointerToLowMemUInt32(image_begin_), image_end, + sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_), PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_, compile_pic_); @@ -1068,6 +1025,37 @@ ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = ImageAddress(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + } + } + + private: + ImageWriter* const image_writer_; + + mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const size_t offset = image_writer_->GetImageOffset(obj); + auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); + VLOG(compiler) << "Update root from " << obj << " to " << dest; + return dest; + } +}; + void ImageWriter::CopyAndFixupNativeData() { // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_reloc_) { @@ -1088,7 +1076,7 @@ void ImageWriter::CopyAndFixupNativeData() { } // Fixup the image method roots. auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - const auto& methods_section = image_header->GetMethodsSection(); + const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { auto* m = image_methods_[i]; CHECK(m != nullptr); @@ -1101,18 +1089,35 @@ void ImageWriter::CopyAndFixupNativeData() { auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); } + // Write the intern table into the image. + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = Runtime::Current()->GetInternTable(); + uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(memory_ptr); + CHECK_EQ(temp_table.Size(), intern_table->Size()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); + CHECK_EQ(intern_table_bytes, intern_table_bytes_); } void ImageWriter::CopyAndFixupObjects() { gc::Heap* heap = Runtime::Current()->GetHeap(); heap->VisitObjects(CopyAndFixupObjectsCallback, this); // Fix up the object previously had hash codes. - for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) { + for (const auto& hash_pair : saved_hashcode_map_) { Object* obj = hash_pair.first; DCHECK_EQ(obj->GetLockWord<kVerifyNone>(false).ReadBarrierState(), 0U); obj->SetLockWord<kVerifyNone>(LockWord::FromHashCode(hash_pair.second, 0U), false); } - saved_hashes_.clear(); + saved_hashcode_map_.clear(); } void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { @@ -1155,18 +1160,22 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } void ImageWriter::CopyAndFixupObject(Object* obj) { - // see GetLocalAddress for similar computation size_t offset = GetImageOffset(obj); auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); - const uint8_t* src = reinterpret_cast<const uint8_t*>(obj); + DCHECK_LT(offset, image_end_); + const auto* src = reinterpret_cast<const uint8_t*>(obj); + + image_bitmap_->Set(dst); // Mark the obj as live. - size_t n = obj->SizeOf(); + const size_t n = obj->SizeOf(); DCHECK_LE(offset + n, image_->Size()); memcpy(dst, src, n); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor // word. - dst->SetLockWord(LockWord::Default(), false); + const auto it = saved_hashcode_map_.find(obj); + dst->SetLockWord(it != saved_hashcode_map_.end() ? + LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false); FixupObject(obj, dst); } @@ -1176,7 +1185,7 @@ class FixupVisitor { FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) { } - void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const + void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset); // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the @@ -1186,7 +1195,7 @@ class FixupVisitor { } // java.lang.ref.Reference visitor. - void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const + void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( @@ -1490,4 +1499,11 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } +uint8_t* ImageWriter::GetOatFileBegin() const { + DCHECK_GT(intern_table_bytes_, 0u); + return image_begin_ + RoundUp( + image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + + bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize); +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index a35d6ad9c9..9d45ce2bd4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -54,7 +54,7 @@ class ImageWriter FINAL { quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), - dirty_methods_(0u), clean_methods_(0u) { + intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) { CHECK_NE(image_begin, 0U); std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); } @@ -84,11 +84,7 @@ class ImageWriter FINAL { image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); } - uint8_t* GetOatFileBegin() const { - return image_begin_ + RoundUp( - image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + - bin_slot_sizes_[kBinArtMethodClean], kPageSize); - } + uint8_t* GetOatFileBegin() const; bool Write(const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) @@ -158,7 +154,7 @@ class ImageWriter FINAL { // The offset in bytes from the beginning of the bin. Aligned to object size. uint32_t GetIndex() const; // Pack into a single uint32_t, for storing into a lock word. - explicit operator uint32_t() const { return lockword_; } + uint32_t Uint32Value() const { return lockword_; } // Comparison operator for map support bool operator<(const BinSlot& other) const { return lockword_ < other.lockword_; } @@ -170,7 +166,7 @@ class ImageWriter FINAL { // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset) + void SetImageOffset(mirror::Object* object, size_t offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsImageOffsetAssigned(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -330,11 +326,9 @@ class ImageWriter FINAL { // The start offsets of the dex cache arrays. SafeMap<const DexFile*, size_t> dex_cache_array_starts_; - // Saved hashes (objects are inside of the image so that they don't move). - std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; - - // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address). - std::map<BinSlot, uint32_t> saved_hashes_map_; + // Saved hash codes. We use these to restore lockwords which were temporarily used to have + // forwarding addresses as well as copying over hash codes. + std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_; // Beginning target oat address for the pointers from the output image to its oat file. const uint8_t* oat_data_begin_; @@ -360,6 +354,9 @@ class ImageWriter FINAL { size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_; + // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. @@ -376,8 +373,9 @@ class ImageWriter FINAL { uint64_t dirty_methods_; uint64_t clean_methods_; - friend class FixupVisitor; friend class FixupClassVisitor; + friend class FixupRootVisitor; + friend class FixupVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); }; diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 3a0d520e47..016f28ef1e 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -56,7 +56,7 @@ class JNICFITest : public CFITest { jni_asm->IncreaseFrameSize(32); jni_asm->DecreaseFrameSize(32); jni_asm->RemoveFrame(frame_size, callee_save_regs); - jni_asm->EmitSlowPaths(); + jni_asm->FinalizeCode(); std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); MemoryRegion code(&actual_asm[0], actual_asm.size()); jni_asm->FinalizeInstructions(code); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 4d7d86cce6..85fd6962fa 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -474,7 +474,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 17. Finalize code generation - __ EmitSlowPaths(); + __ FinalizeCode(); size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index d0104300d3..a3e889f0f6 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -82,6 +82,7 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { arm::kLoadWord, arm::PC, arm::R0, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); assembler.bkpt(0); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index ee48789ad2..29355d6968 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -233,7 +233,7 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { kArm64PointerSize).Int32Value()); assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); // Ensure we emit the literal pool. - assembler.EmitSlowPaths(); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index b2b54965b5..97b3725da1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -126,11 +126,14 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { - // Null check on the NewArray should have been eliminated by instruction - // simplifier already. - if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return instruction->InputAt(0)->AsNewArray(); + static HInstruction* FromArrayLengthToArray(HInstruction* instruction) { + DCHECK(instruction->IsArrayLength() || instruction->IsNewArray()); + if (instruction->IsArrayLength()) { + HInstruction* input = instruction->InputAt(0); + if (input->IsNullCheck()) { + input = input->AsNullCheck()->InputAt(0); + } + return input; } return instruction; } @@ -146,8 +149,9 @@ class ValueBound : public ValueObject { // Some bounds are created with HNewArray* as the instruction instead // of HArrayLength*. They are treated the same. - instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); - instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + // HArrayLength with the same array input are considered equal also. + instruction1 = FromArrayLengthToArray(instruction1); + instruction2 = FromArrayLengthToArray(instruction2); return instruction1 == instruction2; } @@ -271,7 +275,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // Loop header of loop_info. Exiting loop is normal. return false; } - const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); for (size_t i = 0; i < successors.Size(); i++) { if (!loop_info->Contains(*successors.Get(i))) { // One of the successors exits the loop. @@ -293,8 +297,14 @@ class ArrayAccessInsideLoopFinder : public ValueObject { void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - HBasicBlock* block = it_loop.Current(); + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + HBasicBlock* block = it_loop.Current(); + DCHECK(block == induction_variable_->GetBlock()); + // Skip loop header. Since narrowed value range of a MonotonicValueRange only + // applies to the loop body (after the test at the end of the loop header). + it_loop.Advance(); + for (; !it_loop.Done(); it_loop.Advance()) { + block = it_loop.Current(); DCHECK(block->IsInLoop()); if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure @@ -308,30 +318,35 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // that the loop will loop through the full monotonic value range from // initial_ to end_. So adding deoptimization might be too aggressive and can // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. Otherwise, the loop induction variable is going to cover the full - // monotonic value range from initial_ to end_, and deoptimizations are added - // iff the loop will throw AIOOBE. + // AIOOBE. found_array_length_ = nullptr; return; } for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr; instruction = instruction->GetNext()) { - if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + if (!instruction->IsBoundsCheck()) { continue; } - HInstruction* index = instruction->InputAt(1); - if (!index->IsBoundsCheck()) { + + HInstruction* length_value = instruction->InputAt(1); + if (length_value->IsIntConstant()) { + // TODO: may optimize for constant case. continue; } - HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); - if (array_length == nullptr) { - DCHECK(index->InputAt(1)->IsIntConstant()); - // TODO: may optimize for constant case. + if (length_value->IsPhi()) { + // When adding deoptimizations in outer loops, we might create + // a phi for the array length, and update all uses of the + // length in the loop to that phi. Therefore, inner loops having + // bounds checks on the same array will use that phi. + // TODO: handle these cases. continue; } + DCHECK(length_value->IsArrayLength()); + HArrayLength* array_length = length_value->AsArrayLength(); + HInstruction* array = array_length->InputAt(0); if (array->IsNullCheck()) { array = array->AsNullCheck()->InputAt(0); @@ -347,7 +362,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { continue; } - index = index->AsBoundsCheck()->InputAt(0); + HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); HInstruction* left = index; int32_t right = 0; if (left == induction_variable_ || @@ -375,7 +390,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // The instruction that corresponds to a MonotonicValueRange. HInstruction* induction_variable_; - // The array length of the array that's accessed inside the loop. + // The array length of the array that's accessed inside the loop body. HArrayLength* found_array_length_; // The lowest and highest constant offsets relative to induction variable @@ -411,6 +426,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } + bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { if (other_range == nullptr) { @@ -495,13 +512,30 @@ class MonotonicValueRange : public ValueRange { ValueBound GetBound() const { return bound_; } void SetEnd(HInstruction* end) { end_ = end; } void SetInclusive(bool inclusive) { inclusive_ = inclusive; } - HBasicBlock* GetLoopHead() const { + HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + HBasicBlock* GetLoopHeaderSuccesorInLoop() { + HBasicBlock* header = GetLoopHeader(); + HInstruction* instruction = header->GetLastInstruction(); + DCHECK(instruction->IsIf()); + HIf* h_if = instruction->AsIf(); + HLoopInformation* loop_info = header->GetLoopInformation(); + bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); + bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); + + // Just in case it's some strange loop structure. + if (true_successor_in_loop && false_successor_in_loop) { + return nullptr; + } + DCHECK(true_successor_in_loop || false_successor_in_loop); + return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); + } + // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -593,12 +627,114 @@ class MonotonicValueRange : public ValueRange { } } + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + // For example, this loop: + // + // for (int i = start; i < end; i++) { + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // will be transformed to: + // + // int array_length_in_loop_body_if_needed; + // if (start >= end) { + // array_length_in_loop_body_if_needed = 0; + // } else { + // if (start < 1) deoptimize(); + // if (array == null) deoptimize(); + // array_length = array.length; + // if (end > array_length - 1) deoptimize; + // array_length_in_loop_body_if_needed = array_length; + // } + // for (int i = start; i < end; i++) { + // // No more null check and bounds check. + // // array.length value is replaced with array_length_in_loop_body_if_needed + // // in the loop body. + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // We basically first go through the loop body and find those array accesses whose + // index is at a constant offset from the induction variable ('i' in the above example), + // and update offset_low and offset_high along the way. We then add the following + // deoptimizations in the loop pre-header (suppose end is not inclusive). + // if (start < -offset_low) deoptimize(); + // if (end >= array.length - offset_high) deoptimize(); + // It might be necessary to first hoist array.length (and the null check on it) out of + // the loop with another deoptimization. + // + // In order not to trigger deoptimization unnecessarily, we want to make a strong + // guarantee that no deoptimization is triggered if the loop body itself doesn't + // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop + // body must throw AIOOBE). + // This is achieved by the following: + // 1) We only process loops that iterate through the full monotonic range from + // initial_ to end_. We do the following checks to make sure that's the case: + // a) The loop doesn't have early exit (via break, return, etc.) + // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. + // 2) We only collect array accesses of blocks in the loop body that dominate + // all loop back edges, these array accesses are guaranteed to happen + // at each loop iteration. + // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses + // when the induction variable is at initial_ and end_ must be in a legal range. + // Since the added deoptimizations are basically checking the induction variable + // at initial_ and end_ values, no deoptimization will be triggered either. + // + // A special case is the loop body isn't entered at all. In that case, we may still + // add deoptimization due to the analysis described above. In order not to trigger + // deoptimization, we do a test between initial_ and end_ first and skip over + // the added deoptimization. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // In order not to trigger deoptimization unnecessarily, we want to + // make sure the loop iterates through the full range from initial_ to + // end_ so that boundaries are covered by the loop. An increment of 2, + // for example, may skip end_. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't add a check in loop pre-header if the value isn't available there. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + // Returns true if adding a (constant >= value) check for deoptimization // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, - int32_t constant, - bool* is_proven) { + bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + // See if we can prove the relationship first. if (value->IsIntConstant()) { if (value->AsIntConstant()->GetValue() >= constant) { @@ -615,22 +751,118 @@ class MonotonicValueRange : public ValueRange { return true; } + // Try to filter out cases that the loop entry test will never be true. + bool LoopEntryTestUseful() { + if (initial_->IsIntConstant() && end_->IsIntConstant()) { + int32_t initial_val = initial_->AsIntConstant()->GetValue(); + int32_t end_val = end_->AsIntConstant()->GetValue(); + if (increment_ == 1) { + if (inclusive_) { + return initial_val > end_val; + } else { + return initial_val >= end_val; + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + return initial_val < end_val; + } else { + return initial_val <= end_val; + } + } + } + return true; + } + + // Returns the block for adding deoptimization. + HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + // Deoptimization is only added when both initial_ and end_ are defined + // before the loop. + DCHECK(initial_->GetBlock()->Dominates(pre_header)); + DCHECK(end_->GetBlock()->Dominates(pre_header)); + + // If it can be proven the loop body is definitely entered (unless exception + // is thrown in the loop header for which triggering deoptimization is fine), + // there is no need for tranforming the loop. In that case, deoptimization + // will just be added in the loop pre-header. + if (!LoopEntryTestUseful()) { + return pre_header; + } + + HGraph* graph = header->GetGraph(); + graph->TransformLoopHeaderForBCE(header); + HBasicBlock* new_pre_header = header->GetDominator(); + DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); + HBasicBlock* if_block = new_pre_header->GetDominator(); + HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0); // True successor. + HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1); // False successor. + + dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); + deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); + new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); + return deopt_block; + } + + // Adds a test between initial_ and end_ to see if the loop body is entered. + // If the loop body isn't entered at all, it jumps to the loop pre-header (after + // transformation) to avoid any deoptimization. + void AddLoopBodyEntryTest() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + HBasicBlock* if_block = pre_header->GetDominator(); + HGraph* graph = header->GetGraph(); + + HCondition* cond; + if (increment_ == 1) { + if (inclusive_) { + cond = new (graph->GetArena()) HGreaterThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + cond = new (graph->GetArena()) HLessThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); + } + } + HIf* h_if = new (graph->GetArena()) HIf(cond); + if_block->AddInstruction(cond); + if_block->AddInstruction(h_if); + } + // Adds a check that (value >= constant), and HDeoptimize otherwise. void AddDeoptimizationConstant(HInstruction* value, - int32_t constant) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t constant, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + if (loop_entry_test_block_added) { + DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1)); + } + HIntConstant* const_instr = graph->GetIntConstant(constant); HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); HDeoptimize* deoptimize = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } // Returns true if adding a (value <= array_length + offset) check for deoptimization @@ -640,6 +872,26 @@ class MonotonicValueRange : public ValueRange { int32_t offset, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + + if (array_length->GetBlock() == header) { + // array_length_in_loop_body_if_needed only has correct value when the loop + // body is entered. We bail out in this case. Usually array_length defined + // in the loop header is already hoisted by licm. + return false; + } else { + // array_length is defined either before the loop header already, or in + // the loop body since it's used in the loop body. If it's defined in the loop body, + // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, + // all the uses of array_length must be dominated by its definition in the loop + // body. array_length_in_loop_body_if_needed is guaranteed to be the same as + // array_length once the loop body is entered so all the uses of the phi will + // use the correct value. + } + if (offset > 0) { // There might be overflow issue. // TODO: handle this, possibly with some distance relationship between @@ -667,56 +919,99 @@ class MonotonicValueRange : public ValueRange { // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. void AddDeoptimizationArrayLength(HInstruction* value, HArrayLength* array_length, - int32_t offset) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t offset, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(pre_header)) { + if (!array_length->GetBlock()->Dominates(deopt_block)) { + // array_length must be defined in the loop body. + DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); + DCHECK(array_length->GetBlock() != header); + HInstruction* array = array_length->InputAt(0); HNullCheck* null_check = array->AsNullCheck(); if (null_check != nullptr) { array = null_check->InputAt(0); } - // We've already made sure array is defined before the loop when collecting + // We've already made sure the array is defined before the loop when collecting // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(pre_header)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + DCHECK(array->GetBlock()->Dominates(deopt_block)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { // Hoist null check out of loop with a deoptimization. HNullConstant* null_constant = graph->GetNullConstant(); HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); // TODO: for one dex_pc, share the same deoptimization slow path. HDeoptimize* null_check_deoptimize = new (graph->GetArena()) HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore( - null_check_deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_deoptimize, deopt_block->GetLastInstruction()); // Eliminate null check in the loop. null_check->ReplaceWith(array); null_check->GetBlock()->RemoveInstruction(null_check); null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } - // Hoist array_length out of loop. - array_length->MoveBefore(pre_header->GetLastInstruction()); + + HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array); + deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); + + if (loop_entry_test_block_added) { + // Replace array_length defined inside the loop body with a phi + // array_length_in_loop_body_if_needed. This is a synthetic phi so there is + // no vreg number for it. + HPhi* phi = new (graph->GetArena()) HPhi( + graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); + // Set to 0 if the loop body isn't entered. + phi->SetRawInputAt(0, graph->GetIntConstant(0)); + // Set to array.length if the loop body is entered. + phi->SetRawInputAt(1, new_array_length); + pre_header->AddPhi(phi); + array_length->ReplaceWith(phi); + // Make sure phi is only used after the loop body is entered. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> it(phi->GetUses()); + !it.Done(); + it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); + } + } + } else { + array_length->ReplaceWith(new_array_length); + } + + array_length->GetBlock()->RemoveInstruction(array_length); + // Use new_array_length for deopt. + array_length = new_array_length; } - HIntConstant* offset_instr = graph->GetIntConstant(offset); - HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + HInstruction* added = array_length; + if (offset != 0) { + HIntConstant* offset_instr = graph->GetIntConstant(offset); + added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); + } + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); + HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); + deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); } - // Add deoptimizations in loop pre-header with the collected array access + // Adds deoptimizations in loop pre-header with the collected array access // data so that value ranges can be established in loop body. // Returns true if deoptimizations are successfully added, or if it's proven // it's not necessary. @@ -733,70 +1028,60 @@ class MonotonicValueRange : public ValueRange { return false; } + HBasicBlock* deopt_block; + bool loop_entry_test_block_added = false; bool is_constant_proven, is_length_proven; + + HInstruction* const_comparing_instruction; + int32_t const_compared_to; + HInstruction* array_length_comparing_instruction; + int32_t array_length_offset; if (increment_ == 1) { // Increasing from initial_ to end_. - int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; - if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && - CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(initial_, -offset_low); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(end_, array_length, offset); + const_comparing_instruction = initial_; + const_compared_to = -offset_low; + array_length_comparing_instruction = end_; + array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; + } else { + const_comparing_instruction = end_; + const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; + array_length_comparing_instruction = initial_; + array_length_offset = -offset_high - 1; + } + + if (CanAddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + &is_constant_proven) && + CanAddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + &is_length_proven)) { + if (!is_constant_proven || !is_length_proven) { + deopt_block = TransformLoopForDeoptimizationIfNeeded(); + loop_entry_test_block_added = (deopt_block != pre_header); + if (loop_entry_test_block_added) { + // Loop body may be entered. + AddLoopBodyEntryTest(); } - return true; } - } else if (increment_ == -1) { - // Decreasing from initial_ to end_. - int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; - if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && - CanAddDeoptimizationArrayLength( - initial_, array_length, -offset_high - 1, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(end_, constant); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); - } - return true; + if (!is_constant_proven) { + AddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + deopt_block, + loop_entry_test_block_added); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + deopt_block, + loop_entry_test_block_added); } + return true; } return false; } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // TODO: possibly handle overflow/underflow issues with deoptimization. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. @@ -819,12 +1104,17 @@ class BCEVisitor : public HGraphVisitor { // it's likely some AIOOBE will be thrown. static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + // Added blocks for loop body entry test. + bool IsAddedBlock(HBasicBlock* block) const { + return block->GetBlockId() >= initial_block_size_; + } + explicit BCEVisitor(HGraph* graph) - : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()), - need_to_revisit_block_(false) {} + : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + DCHECK(!IsAddedBlock(block)); first_constant_index_bounds_check_map_.clear(); HGraphVisitor::VisitBasicBlock(block); if (need_to_revisit_block_) { @@ -839,6 +1129,10 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + if (IsAddedBlock(basic_block)) { + // Added blocks don't keep value ranges. + return nullptr; + } int block_id = basic_block->GetBlockId(); if (maps_.at(block_id) == nullptr) { std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( @@ -853,8 +1147,12 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); - if (map->find(instruction->GetId()) != map->end()) { - return map->Get(instruction->GetId()); + if (map != nullptr) { + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + } else { + DCHECK(IsAddedBlock(basic_block)); } basic_block = basic_block->GetDominator(); } @@ -971,7 +1269,7 @@ class BCEVisitor : public HGraphVisitor { if (left_range != nullptr) { left_monotonic_range = left_range->AsMonotonicValueRange(); if (left_monotonic_range != nullptr) { - HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + HBasicBlock* loop_head = left_monotonic_range->GetLoopHeader(); if (instruction->GetBlock() != loop_head) { // For monotonic value range, don't handle `instruction` // if it's not defined in the loop header. @@ -1013,7 +1311,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondLT); @@ -1047,7 +1345,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondGT); @@ -1083,7 +1381,16 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + DCHECK(array_length->IsIntConstant() || + array_length->IsArrayLength() || + array_length->IsPhi()); + + if (array_length->IsPhi()) { + // Input 1 of the phi contains the real array.length once the loop body is + // entered. That value will be used for bound analysis. The graph is still + // strickly in SSA form. + array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); + } if (!index->IsIntConstant()) { ValueRange* index_range = LookupValueRange(index, block); @@ -1238,25 +1545,26 @@ class BCEVisitor : public HGraphVisitor { } if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { // The comparison is for an induction variable in the loop header. DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor; - if (LIKELY(block->GetLoopInformation()-> - Contains(*instruction->IfFalseSuccessor()))) { - loop_body_successor = instruction->IfFalseSuccessor(); - } else { - loop_body_successor = instruction->IfTrueSuccessor(); + HBasicBlock* loop_body_successor = + left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); + if (loop_body_successor == nullptr) { + // In case it's some strange loop structure. + return; } ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if (new_left_range == left_range) { + if ((new_left_range == left_range) || + // Range narrowed with deoptimization is usually more useful than + // a constant range. + new_left_range->IsConstantValueRange()) { // We are not successful in narrowing the monotonic value range to // a regular value range. Try using deoptimization. new_left_range = left_range->AsMonotonicValueRange()-> NarrowWithDeoptimization(); if (new_left_range != left_range) { - GetValueRangeMap(instruction->IfFalseSuccessor())-> - Overwrite(left->GetId(), new_left_range); + GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); } } } @@ -1511,6 +1819,9 @@ class BCEVisitor : public HGraphVisitor { // eliminate those bounds checks. bool need_to_revisit_block_; + // Initial number of blocks. + int32_t initial_block_size_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; @@ -1527,7 +1838,22 @@ void BoundsCheckElimination::Run() { // value can be narrowed further down in the dominator tree. // // TODO: only visit blocks that dominate some array accesses. - visitor.VisitReversePostOrder(); + HBasicBlock* last_visited_block = nullptr; + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == last_visited_block) { + // We may insert blocks into the reverse post order list when processing + // a loop header. Don't process it again. + DCHECK(current->IsLoopHeader()); + continue; + } + if (visitor.IsAddedBlock(current)) { + // Skip added blocks. Their effects are already taken care of. + continue; + } + visitor.VisitBasicBlock(current); + last_visited_block = current; + } } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index e383ec664b..4701bddd48 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -440,22 +440,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -464,6 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -472,6 +467,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -481,6 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); @@ -489,6 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); @@ -579,22 +577,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph2(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -603,6 +595,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -611,6 +604,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); @@ -619,6 +613,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); @@ -710,15 +705,17 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -728,6 +725,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -737,6 +735,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); @@ -828,22 +827,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph4(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -852,6 +845,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -1027,6 +1021,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index cbd042901d..946c0602cf 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -603,7 +603,12 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); bool is_instance_call = invoke_type != kStatic; - size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); + // Remove the return type from the 'proto'. + size_t number_of_arguments = strlen(descriptor) - 1; + if (is_instance_call) { + // One extra argument for 'this'. + ++number_of_arguments; + } MethodReference target_method(dex_file_, method_idx); uintptr_t direct_code; @@ -614,7 +619,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true, &optimized_invoke_type, &target_method, &table_index, &direct_code, &direct_method)) { - VLOG(compiler) << "Did not compile " << PrettyMethod(method_idx, *dex_file_) + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) << " because a method call could not be resolved"; MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); return false; @@ -723,10 +729,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -740,19 +752,29 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, start_index = 1; } - uint32_t descriptor_index = 1; + uint32_t descriptor_index = 1; // Skip the return type. uint32_t argument_index = start_index; if (is_string_init) { start_index = 1; } - for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { + for (size_t i = start_index; + // Make sure we don't go over the expected arguments or over the number of + // dex registers given. If the instruction was seen as dead by the verifier, + // it hasn't been properly checked. + (i < number_of_vreg_arguments) && (argument_index < number_of_arguments); + i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); - if (!is_range && is_wide && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_pc; - // We do not implement non sequential register pair. - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonSequentialRegPair); + if (!is_range + && is_wide + && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { + // Longs and doubles should be in pairs, that is, sequential registers. The verifier should + // reject any class where this is violated. However, the verifier only does these checks + // on non trivially dead instructions, so we just bailout the compilation. + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of non-sequential dex register pair in wide argument"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); @@ -761,7 +783,14 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, i++; } } - DCHECK_EQ(argument_index, number_of_arguments); + + if (argument_index != number_of_arguments) { + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of wrong number of arguments in invoke instruction"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); + return false; + } if (invoke->IsInvokeStaticOrDirect()) { invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod()); @@ -1206,14 +1235,20 @@ bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { } void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for PackedSwitch contains: + // (a) number of entries (may be zero) + // (b) first and lowest switch case value (entry 0, always present) + // (c) list of target pcs (entries 1 <= i <= N) SwitchTable table(instruction, dex_pc, false); // Value to test against. HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + // Retrieve number of entries. uint16_t num_entries = table.GetNumEntries(); - // There should be at least one entry here. - DCHECK_GT(num_entries, 0U); + if (num_entries == 0) { + return; + } // Chained cmp-and-branch, starting from starting_key. int32_t starting_key = table.GetEntryAt(0); @@ -1225,6 +1260,10 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for SparseSwitch contains: + // (a) number of entries (may be zero) + // (b) sorted key values (entries 0 <= i < N) + // (c) target pcs corresponding to the switch values (entries N <= i < 2*N) SwitchTable table(instruction, dex_pc, true); // Value to test against. @@ -1424,21 +1463,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::RETURN: { - DCHECK_NE(return_type_, Primitive::kPrimNot); - DCHECK_NE(return_type_, Primitive::kPrimLong); - DCHECK_NE(return_type_, Primitive::kPrimDouble); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_OBJECT: { - DCHECK(return_type_ == Primitive::kPrimNot); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_WIDE: { - DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong); BuildReturn(instruction, return_type_); break; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 049b3e3a40..09f7d86605 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -236,7 +236,6 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, const GrowableArray<HBasicBlock*>& block_order) { block_order_ = &block_order; DCHECK(block_order_->Get(0) == GetGraph()->GetEntryBlock()); - DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), block_order_->Get(1))); ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; @@ -508,19 +507,14 @@ void CodeGenerator::BuildNativeGCMap( dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - uint32_t max_native_offset = 0; - for (size_t i = 0; i < pc_infos_.Size(); i++) { - uint32_t native_offset = pc_infos_.Get(i).native_pc; - if (native_offset > max_native_offset) { - max_native_offset = native_offset; - } - } + uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); - GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t native_offset = pc_info.native_pc; - uint32_t dex_pc = pc_info.dex_pc; + size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); + for (size_t i = 0; i != num_stack_maps; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t native_offset = stack_map_entry.native_pc_offset; + uint32_t dex_pc = stack_map_entry.dex_pc; const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; builder.AddEntry(native_offset, references); @@ -528,17 +522,17 @@ void CodeGenerator::BuildNativeGCMap( } void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t pc2dex_offset = pc_info.native_pc; - int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t pc2dex_offset = stack_map_entry.native_pc_offset; + int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc; src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); } } void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = pc_infos_.Size(); + uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); uint32_t pc2dex_offset = 0u; int32_t pc2dex_dalvik_offset = 0; uint32_t dex2pc_data_size = 0u; @@ -547,11 +541,11 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { int32_t dex2pc_dalvik_offset = 0; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); + pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } // Walk over the blocks and find which ones correspond to catch block entries. @@ -586,12 +580,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { dex2pc_dalvik_offset = 0u; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - DCHECK(pc2dex_offset <= pc_info.native_pc); - write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); + write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); + write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -617,9 +611,9 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { auto it = table.PcToDexBegin(); auto it2 = table.DexToPcBegin(); for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - CHECK_EQ(pc_info.native_pc, it.NativePcOffset()); - CHECK_EQ(pc_info.dex_pc, it.DexPc()); + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); + CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); ++it; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -695,14 +689,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - struct PcInfo pc_info; - pc_info.dex_pc = outer_dex_pc; - pc_info.native_pc = GetAssembler()->CodeSize(); - pc_infos_.Add(pc_info); + uint32_t native_pc = GetAssembler()->CodeSize(); if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream_.EndStackMapEntry(); return; } @@ -719,8 +710,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, - pc_info.native_pc, + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, + native_pc, register_mask, locations->GetStackMask(), outer_environment_size, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c6ebf6dbd8..5b0abd76b3 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -64,11 +64,6 @@ class CodeAllocator { DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; -struct PcInfo { - uint32_t dex_pc; - uintptr_t native_pc; -}; - class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: SlowPathCode() { @@ -363,16 +358,15 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + stack_map_stream_(graph->GetArena()), + block_order_(nullptr), is_baseline_(false), graph_(graph), compiler_options_(compiler_options), - pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), - block_order_(nullptr), current_block_index_(0), is_leaf_(true), - requires_current_method_(false), - stack_map_stream_(graph->GetArena()) {} + requires_current_method_(false) {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -442,6 +436,11 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + StackMapStream stack_map_stream_; + + // The order to use for code generation. + const GrowableArray<HBasicBlock*>* block_order_; + // Whether we are using baseline. bool is_baseline_; @@ -455,12 +454,8 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; - // The order to use for code generation. - const GrowableArray<HBasicBlock*>* block_order_; - // The current block index in `block_order_` of the block // we are generating code for. size_t current_block_index_; @@ -471,8 +466,6 @@ class CodeGenerator { // Whether an instruction in the graph accesses the current method. bool requires_current_method_; - StackMapStream stack_map_stream_; - friend class OptimizingCFITest; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3d3e35d0fc..f6ae45238c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -392,12 +392,38 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(false /* can_relocate_branches */), + assembler_(), isa_features_(isa_features) { // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } +void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + // Adjust native pc offsets of block labels. + for (size_t block_idx = 0u, end = block_order_->Size(); block_idx != end; ++block_idx) { + HBasicBlock* block = block_order_->Get(block_idx); + // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid + // FirstNonEmptyBlock() which could lead to adjusting a label more than once. + DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); + Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_EQ(block_label->IsBound(), !block->IsSingleGoto()); + if (block_label->IsBound()) { + __ AdjustLabelPosition(block_label); + } + } + + CodeGenerator::Finalize(allocator); +} + Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { switch (type) { case Primitive::kPrimLong: { @@ -2831,7 +2857,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - NearLabel less, greater, done; + Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2927,7 +2953,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register temp1, Register temp2, HInstruction* instruction) { - NearLabel fail; + Label fail; if (offset != 0) { __ LoadImmediate(temp1, offset); __ add(IP, addr, ShifterOperand(temp1)); @@ -3607,7 +3633,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp, Register object, Register value, bool can_be_null) { - NearLabel is_null; + Label is_null; if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } @@ -4036,7 +4062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { Register cls = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - NearLabel done, zero; + Label done, zero; SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -4093,19 +4119,15 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - NearLabel done; // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, slow_path->GetExitLabel()); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); __ b(slow_path->GetEntryLabel(), NE); __ Bind(slow_path->GetExitLabel()); - if (instruction->MustDoNullCheck()) { - __ Bind(&done); - } } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index af2481661a..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -139,10 +139,16 @@ class LocationsBuilderARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -163,10 +169,16 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + ArmAssembler* GetAssembler() const { return assembler_; } private: @@ -286,6 +298,8 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + void Finalize(CodeAllocator* allocator) OVERRIDE; + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2d2419a284..f96810ff80 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -147,9 +147,17 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + Arm64Assembler* GetAssembler() const { return assembler_; } vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } @@ -188,9 +196,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index faf3cf9ffa..696d8d549e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -124,10 +124,16 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); @@ -148,10 +154,16 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e46994c79e..215754cd46 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -134,10 +134,16 @@ class LocationsBuilderX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -158,10 +164,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86_64Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 17a006cc3a..fdfe518e95 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -122,10 +122,6 @@ void HDeadCodeElimination::RemoveDeadInstructions() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() - // The current method needs to stay in the graph in case of inlining. - // It is always passed anyway, and keeping it in the graph does not - // affect the generated code. - && !inst->IsCurrentMethod() // If we added an explicit barrier then we should keep it. && !inst->IsMemoryBarrier() && !inst->HasUses()) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index fd2e4e81df..b64791788d 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -21,6 +21,7 @@ #include "licm.h" #include "nodes.h" #include "optimization.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_liveness_analysis.h" @@ -354,6 +355,24 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } + } else if (IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + && is_after_pass_) { + if (instruction->GetType() == Primitive::kPrimNot) { + if (instruction->IsLoadClass()) { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") + << PrettyClass(instruction->AsLoadClass()->GetLoadedClassRTI().GetTypeHandle().Get()); + } else { + ReferenceTypeInfo info = instruction->GetReferenceTypeInfo(); + if (info.IsTop()) { + StartAttributeStream("klass") << "java.lang.Object"; + } else { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get()); + } + StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; + } + } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index c3fc33735a..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -27,6 +27,7 @@ #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" @@ -57,7 +58,7 @@ void HInliner::Run() { next_block = (i == blocks.Size() - 1) ? nullptr : blocks.Get(i + 1); for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); - HInvokeStaticOrDirect* call = instruction->AsInvokeStaticOrDirect(); + HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method @@ -83,6 +84,93 @@ void HInliner::Run() { } } +static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); +} + +/** + * Given the `resolved_method` looked up in the dex cache, try to find + * the actual runtime target of an interface or virtual call. + * Return nullptr if the runtime target cannot be proven. + */ +static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // No need to lookup further, the resolved method will be the target. + return resolved_method; + } + + HInstruction* receiver = invoke->InputAt(0); + if (receiver->IsNullCheck()) { + // Due to multiple levels of inlining within the same pass, it might be that + // null check does not have the reference type of the actual receiver. + receiver = receiver->InputAt(0); + } + ReferenceTypeInfo info = receiver->GetReferenceTypeInfo(); + if (info.IsTop()) { + // We have no information on the receiver. + return nullptr; + } else if (!info.IsExact()) { + // We currently only support inlining with known receivers. + // TODO: Remove this check, we should be able to inline final methods + // on unknown receivers. + return nullptr; + } else if (info.GetTypeHandle()->IsInterface()) { + // Statically knowing that the receiver has an interface type cannot + // help us find what is the target method. + return nullptr; + } else if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(info.GetTypeHandle().Get())) { + // The method that we're trying to call is not in the receiver's class or super classes. + return nullptr; + } + + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + size_t pointer_size = cl->GetImagePointerSize(); + if (invoke->IsInvokeInterface()) { + resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke->IsInvokeVirtual()); + resolved_method = info.GetTypeHandle()->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + + if (resolved_method == nullptr) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (resolved_method->IsAbstract()) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // A final method has to be the target method. + return resolved_method; + } else if (info.IsExact()) { + // If we found a method and the receiver's concrete type is statically + // known, we know for sure the target. + return resolved_method; + } else { + // Even if we did find a method, the receiver type was not enough to + // statically find the runtime target. + return nullptr; + } +} + +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t referrer_index) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + } +} + bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); @@ -99,6 +187,25 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + if (resolved_method == nullptr) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(method_index, caller_dex_file) + << " could not be statically determined"; + return false; + } + // We have found a method, but we need to find where that method is for the caller's + // dex file. + method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(resolved_method) + << " cannot be inlined because unaccessible to caller"; + return false; + } + } + bool same_dex_file = true; const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile(); if (resolved_method->GetDexFile()->GetLocation().compare(outer_dex_file.GetLocation()) != 0) { @@ -149,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -160,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -204,7 +311,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), @@ -221,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -231,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -247,11 +354,13 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // Run simple optimizations on the graph. HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); + ReferenceTypePropagation type_propagation(callee_graph, handles_); InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { &dce, &fold, + &type_propagation, &simplify, }; @@ -265,6 +374,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, outer_compilation_unit_, dex_compilation_unit, compiler_driver_, + handles_, stats_, depth_ + 1); inliner.Run(); @@ -275,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -289,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -300,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -314,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index f7d8cf8715..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -34,13 +34,15 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, CompilerDriver* compiler_driver, + StackHandleScopeCollection* handles, OptimizingCompilerStats* stats, size_t depth = 0) : HOptimization(outer_graph, true, kInlinerPassName, stats), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), compiler_driver_(compiler_driver), - depth_(depth) {} + depth_(depth), + handles_(handles) {} void Run() OVERRIDE; @@ -50,13 +52,13 @@ class HInliner : public HOptimization { bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CompilerDriver* const compiler_driver_; const size_t depth_; + StackHandleScopeCollection* const handles_; DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fcb3471821..2daeeb3c0c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -186,33 +186,94 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins return false; } -void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (!check_cast->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { - check_cast->ClearMustDoNullCheck(); - } - - if (!load_class->IsResolved()) { +// Returns whether doing a type test between the class of `object` against `klass` has +// a statically known outcome. The result of the test is stored in `outcome`. +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { + if (!klass->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type // and thus producing wrong results. - return; + return false; } - ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (class_rti.IsSupertypeOf(obj_rti)) { + *outcome = true; + return true; + } else if (obj_rti.IsExact()) { + // The test failed at compile time so will also fail at runtime. + *outcome = false; + return true; + } else if (!class_rti.IsInterface() + && !obj_rti.IsInterface() + && !obj_rti.IsSupertypeOf(class_rti)) { + // Different type hierarchy. The test will fail. + *outcome = false; + return true; + } + return false; +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* object = check_cast->InputAt(0); + if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + check_cast->ClearMustDoNullCheck(); + } + + if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } + } else { + // Don't do anything for exceptional cases for now. Ideally we should remove + // all instructions and blocks this instruction dominates. + } } } void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { - if (!instruction->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + HInstruction* object = instruction->InputAt(0); + bool can_be_null = true; + if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + can_be_null = false; instruction->ClearMustDoNullCheck(); } + + HGraph* graph = GetGraph(); + if (object->IsNullConstant()) { + instruction->ReplaceWith(graph->GetIntConstant(0)); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome && can_be_null) { + // Type test will succeed, we just need a null test. + HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + instruction->GetBlock()->InsertInstructionBefore(test, instruction); + instruction->ReplaceWith(test); + } else { + // We've statically determined the result of the instanceof. + instruction->ReplaceWith(graph->GetIntConstant(outcome)); + } + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + } } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd91d2c87b..68c197e607 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -288,7 +288,10 @@ void HGraph::InsertConstant(HConstant* constant) { } HNullConstant* HGraph::GetNullConstant() { - if (cached_null_constant_ == nullptr) { + // For simplicity, don't bother reviving the cached null constant if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { cached_null_constant_ = new (arena_) HNullConstant(); InsertConstant(cached_null_constant_); } @@ -296,7 +299,10 @@ HNullConstant* HGraph::GetNullConstant() { } HCurrentMethod* HGraph::GetCurrentMethod() { - if (cached_current_method_ == nullptr) { + // For simplicity, don't bother reviving the cached current method if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_current_method_ == nullptr) || (cached_current_method_->GetBlock() == nullptr)) { cached_current_method_ = new (arena_) HCurrentMethod( Is64BitInstructionSet(instruction_set_) ? Primitive::kPrimLong : Primitive::kPrimInt); if (entry_block_->GetFirstInstruction() == nullptr) { @@ -1510,6 +1516,81 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +/* + * Loop will be transformed to: + * old_pre_header + * | + * if_block + * / \ + * dummy_block deopt_block + * \ / + * new_pre_header + * | + * header + */ +void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + + // Need this to avoid critical edge. + HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + // Need this to avoid critical edge. + HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(if_block); + AddBlock(dummy_block); + AddBlock(deopt_block); + AddBlock(new_pre_header); + + header->ReplacePredecessor(pre_header, new_pre_header); + pre_header->successors_.Reset(); + pre_header->dominated_blocks_.Reset(); + + pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(dummy_block); // True successor + if_block->AddSuccessor(deopt_block); // False successor + dummy_block->AddSuccessor(new_pre_header); + deopt_block->AddSuccessor(new_pre_header); + + pre_header->dominated_blocks_.Add(if_block); + if_block->SetDominator(pre_header); + if_block->dominated_blocks_.Add(dummy_block); + dummy_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(deopt_block); + deopt_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(new_pre_header); + new_pre_header->SetDominator(if_block); + new_pre_header->dominated_blocks_.Add(header); + header->SetDominator(new_pre_header); + + size_t index_of_header = 0; + while (reverse_post_order_.Get(index_of_header) != header) { + index_of_header++; + } + MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); + reverse_post_order_.Put(index_of_header++, if_block); + reverse_post_order_.Put(index_of_header++, dummy_block); + reverse_post_order_.Put(index_of_header++, deopt_block); + reverse_post_order_.Put(index_of_header++, new_pre_header); + + HLoopInformation* info = pre_header->GetLoopInformation(); + if (info != nullptr) { + if_block->SetLoopInformation(info); + dummy_block->SetLoopInformation(info); + deopt_block->SetLoopInformation(info); + new_pre_header->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*pre_header); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(if_block); + loop_it.Current()->Add(dummy_block); + loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(new_pre_header); + } + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f87775e195..9443653db7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Need to add a couple of blocks to test if the loop body is entered and + // put deoptimization instructions, etc. + void TransformLoopHeaderForBCE(HBasicBlock* header); + // Removes `block` from the graph. void DeleteDeadBlock(HBasicBlock* block); @@ -331,6 +335,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { } // If not found or previously deleted, create and cache a new instruction. + // Don't bother reviving a previously deleted instruction, for simplicity. if (constant == nullptr || constant->GetBlock() == nullptr) { constant = new (arena_) InstructionType(value); cache->Overwrite(value, constant); @@ -824,7 +829,7 @@ class HLoopInformationOutwardIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator); }; -#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -894,6 +899,21 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ @@ -1281,6 +1301,9 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } bool IsTop() const { return is_top_; } + bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return !IsTop() && GetTypeHandle()->IsInterface(); + } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } @@ -2461,7 +2484,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -3581,7 +3604,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -4246,6 +4269,39 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +// Iterator over the blocks that art part of the loop. Includes blocks part +// of an inner loop. The order in which the blocks are iterated is reverse +// post order. +class HBlocksInLoopReversePostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopReversePostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(0) { + if (!blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ == blocks_.Size(); } + HBasicBlock* Current() const { return blocks_.Get(index_); } + void Advance() { + ++index_; + for (size_t e = blocks_.Size(); index_ < e; ++index_) { + if (blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const GrowableArray<HBasicBlock*>& blocks_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); +}; + inline int64_t Int64FromConstant(HConstant* constant) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b0d1433667..fe3bb1a2b4 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -71,6 +71,8 @@ class OptimizingCFITest : public CFITest { } } } + GrowableArray<HBasicBlock*> blocks(&allocator, 0); + code_gen->block_order_ = &blocks; code_gen->ComputeSpillMask(); code_gen->SetFrameSize(frame_size); code_gen->GenerateFrameEntry(); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f6ef2f7e82..8d43adaada 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,43 +318,61 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_bce"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_before_codegen"); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &inliner, + intrinsics, + fold1, + simplify1, + type_propagation, + dce1, + simplify2, + inliner, + // Run another type propagation phase: inlining will open up more opprotunities + // to remove checkast/instanceof and null checks. + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &type_propagation, - &simplify2, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, + // The codegen has a few assumptions that only the instruction simplifier can + // satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + simplify4, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b6b1bb1cad..53d052b2bc 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -19,6 +19,7 @@ #include <sstream> #include <string> +#include <type_traits> #include "atomic.h" @@ -37,8 +38,8 @@ enum MethodCompilationStat { kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, + kNotCompiledMalformedOpcode, kNotCompiledNoCodegen, - kNotCompiledNonSequentialRegPair, kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, @@ -84,14 +85,15 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + << compile_stats_[i]; } } } } private: - std::string PrintMethodCompilationStat(int stat) const { + std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { switch (stat) { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; @@ -105,8 +107,8 @@ class OptimizingCompilerStats { case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; + case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; @@ -120,9 +122,12 @@ class OptimizingCompilerStats { case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - default: LOG(FATAL) << "invalid stat"; + + case kLastStat: break; // Invalid to print out. } - return ""; + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4f1f45769d..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,35 +180,39 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); - instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true)); + is_exact = is_exact || klass->IsFinal(); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file); // Get type from dex cache assuming it was populated by the verifier. - SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx)); + SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewArray(HNewArray* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -206,18 +224,18 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache); DCHECK(field != nullptr); mirror::Class* klass = field->GetType<false>(); - SetClassAsTypeInfo(instr, klass); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -295,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 74e425fb3e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,23 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, uint16_t type_idx, const DexFile& dex_file); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 42b9182d55..65610d54a6 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -49,7 +49,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, } dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); register_mask_max_ = std::max(register_mask_max_, register_mask); current_dex_register_ = 0; } @@ -128,16 +127,25 @@ void StackMapStream::EndInlineInfoEntry() { current_inline_info_ = InlineInfoEntry(); } +uint32_t StackMapStream::ComputeMaxNativePcOffset() const { + uint32_t max_native_pc_offset = 0u; + for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { + max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + } + return max_native_pc_offset; +} + size_t StackMapStream::PrepareForFillIn() { int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; inline_info_size_ = ComputeInlineInfoSize(); dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + uint32_t max_native_pc_offset = ComputeMaxNativePcOffset(); stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_, inline_info_size_, dex_register_maps_size_, dex_pc_max_, - native_pc_offset_max_, + max_native_pc_offset, register_mask_max_); stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 274d573350..550ed70e0f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -67,7 +67,6 @@ class StackMapStream : public ValueObject { inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), - native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), @@ -126,6 +125,22 @@ class StackMapStream : public ValueObject { uint32_t num_dex_registers); void EndInlineInfoEntry(); + size_t GetNumberOfStackMaps() const { + return stack_maps_.Size(); + } + + const StackMapEntry& GetStackMap(size_t i) const { + DCHECK_LT(i, stack_maps_.Size()); + return stack_maps_.GetRawStorage()[i]; + } + + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + DCHECK_LT(i, stack_maps_.Size()); + stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + } + + uint32_t ComputeMaxNativePcOffset() const; + // Prepares the stream to fill in a memory region. Must be called before FillIn. // Returns the size (in bytes) needed to store this stream. size_t PrepareForFillIn(); @@ -163,7 +178,6 @@ class StackMapStream : public ValueObject { GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; - uint32_t native_pc_offset_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index cb51ed8fc8..facc6304e5 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -17,21 +17,21 @@ #include "trampoline_compiler.h" #include "jni_env_ext.h" -#include "utils/arm/assembler_arm.h" +#include "utils/arm/assembler_thumb2.h" #include "utils/arm64/assembler_arm64.h" #include "utils/mips/assembler_mips.h" #include "utils/mips64/assembler_mips64.h" #include "utils/x86/assembler_x86.h" #include "utils/x86_64/assembler_x86_64.h" -#define __ assembler-> +#define __ assembler. namespace art { namespace arm { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2))); + Thumb2Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI. @@ -46,10 +46,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention } __ bkpt(0); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -58,7 +59,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace arm64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64))); + Arm64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI. @@ -82,11 +83,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention break; } - assembler->EmitSlowPaths(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -95,7 +96,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips))); + MipsAssembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -112,10 +113,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -124,7 +126,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64))); + Mips64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -141,10 +143,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -152,16 +155,17 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace x86 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { - std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86))); + X86Assembler assembler; // All x86 trampolines call via the Thread* held in fs. __ fs()->jmp(Address::Absolute(offset)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -169,17 +173,17 @@ static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { namespace x86_64 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) { - std::unique_ptr<x86_64::X86_64Assembler> - assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64))); + x86_64::X86_64Assembler assembler; // All x86 trampolines call via the Thread* held in gs. __ gs()->jmp(x86_64::Address::Absolute(offset, true)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 350efca3e2..f8ca48ef57 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ +#include <type_traits> #include <vector> #include "base/bit_utils.h" @@ -33,14 +34,47 @@ namespace arm { class Arm32Assembler; class Thumb2Assembler; -// This class indicates that the label and its uses -// will fall into a range that is encodable in 16bits on thumb2. -class NearLabel : public Label { +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { public: - NearLabel() {} + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + Label* GetLabel() { + return &label_; + } + + const Label* GetLabel() const { + return &label_; + } private: - DISALLOW_COPY_AND_ASSIGN(NearLabel); + Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); }; class ShifterOperand { @@ -529,9 +563,6 @@ class ArmAssembler : public Assembler { // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; - virtual void b(NearLabel* label, Condition cond = AL) { - b(static_cast<Label*>(label), cond); - } virtual void bl(Label* label, Condition cond = AL) = 0; virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; @@ -541,9 +572,41 @@ class ArmAssembler : public Assembler { void Pad(uint32_t bytes); + // Adjust label position. + void AdjustLabelPosition(Label* label) { + DCHECK(label->IsBound()); + uint32_t old_position = static_cast<uint32_t>(label->Position()); + uint32_t new_position = GetAdjustedPosition(old_position); + label->Reinitialize(); + DCHECK_GE(static_cast<int>(new_position), 0); + label->BindTo(static_cast<int>(new_position)); + } + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + virtual uint32_t GetAdjustedPosition(uint32_t old_position) = 0; + // Macros. // Most of these are pure virtual as they need to be implemented per instruction set. + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. In the absence of + // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>. + template <typename T> + Literal* NewLiteral(typename std::decay<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + virtual Literal* NewLiteral(size_t size, const uint8_t* data) = 0; + + // Load literal. + virtual void LoadLiteral(Register rt, Literal* literal) = 0; + virtual void LoadLiteral(Register rt, Register rt2, Literal* literal) = 0; + virtual void LoadLiteral(SRegister sd, Literal* literal) = 0; + virtual void LoadLiteral(DRegister dd, Literal* literal) = 0; + // Add signed constant value to rd. May clobber IP. virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0; virtual void AddConstant(Register rd, Register rn, int32_t value, @@ -667,9 +730,6 @@ class ArmAssembler : public Assembler { virtual void Bind(Label* label) = 0; virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; - virtual void CompareAndBranchIfZero(Register r, NearLabel* label) { - CompareAndBranchIfZero(r, static_cast<Label*>(label)); - } virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; // diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index cdf62bf885..6e60ddc260 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1354,6 +1354,41 @@ int Arm32Assembler::DecodeBranchOffset(int32_t inst) { } +uint32_t Arm32Assembler::GetAdjustedPosition(uint32_t old_position ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +Literal* Arm32Assembler::NewLiteral(size_t size ATTRIBUTE_UNUSED, + const uint8_t* data ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, Register rt2 ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(SRegister sd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(DRegister dd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); } diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 3164623fd9..1c38eec12c 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -238,7 +238,16 @@ class Arm32Assembler FINAL : public ArmAssembler { // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 26cb6c3739..f9e1ac672e 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,6 +25,309 @@ namespace art { namespace arm { +void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { + CHECK(!label->IsBound()); + + while (label->IsLinked()) { + FixupId fixup_id = label->Position(); // The id for linked Fixup. + Fixup* fixup = GetFixup(fixup_id); // Get the Fixup at this id. + fixup->Resolve(bound_pc); // Fixup can be resolved now. + // Add this fixup as a dependency of all later fixups. + for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) { + GetFixup(id)->AddDependent(fixup_id); + } + uint32_t fixup_location = fixup->GetLocation(); + uint16_t next = buffer_.Load<uint16_t>(fixup_location); // Get next in chain. + buffer_.Store<int16_t>(fixup_location, 0); + label->position_ = next; // Move to next. + } + label->BindTo(bound_pc); +} + +void Thumb2Assembler::BindLiterals() { + // We don't add the padding here, that's done only after adjusting the Fixup sizes. + uint32_t code_size = buffer_.Size(); + for (Literal& lit : literals_) { + Label* label = lit.GetLabel(); + BindLabel(label, code_size); + code_size += lit.GetSize(); + } +} + +void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate) { + uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size); + if (adjustment != 0u) { + *current_code_size += adjustment; + for (FixupId dependent_id : fixup->Dependents()) { + Fixup* dependent = GetFixup(dependent_id); + dependent->IncreaseAdjustment(adjustment); + if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) { + buffer_.Store<int16_t>(dependent->GetLocation(), 1); + fixups_to_recalculate->push_back(dependent_id); + } + } + } +} + +uint32_t Thumb2Assembler::AdjustFixups() { + uint32_t current_code_size = buffer_.Size(); + std::deque<FixupId> fixups_to_recalculate; + if (kIsDebugBuild) { + // We will use the placeholders in the buffer_ to mark whether the fixup has + // been added to the fixups_to_recalculate. Make sure we start with zeros. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + for (Fixup& fixup : fixups_) { + AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); + } + while (!fixups_to_recalculate.empty()) { + // Pop the fixup. + FixupId fixup_id = fixups_to_recalculate.front(); + fixups_to_recalculate.pop_front(); + Fixup* fixup = GetFixup(fixup_id); + DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); + buffer_.Store<int16_t>(fixup->GetLocation(), 0); + // See if it needs adjustment. + AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + } + if (kIsDebugBuild) { + // Check that no fixup is marked as being in fixups_to_recalculate anymore. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + + // Adjust literal pool labels for padding. + DCHECK_EQ(current_code_size & 1u, 0u); + uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size(); + if (literals_adjustment != 0u) { + for (Literal& literal : literals_) { + Label* label = literal.GetLabel(); + DCHECK(label->IsBound()); + int old_position = label->Position(); + label->Reinitialize(); + label->BindTo(old_position + literals_adjustment); + } + } + + return current_code_size; +} + +void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { + // Move non-fixup code to its final place and emit fixups. + // Process fixups in reverse order so that we don't repeatedly move the same data. + size_t src_end = buffer_.Size(); + size_t dest_end = adjusted_code_size; + buffer_.Resize(dest_end); + DCHECK_GE(dest_end, src_end); + for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { + Fixup* fixup = &*i; + if (fixup->GetOriginalSize() == fixup->GetSize()) { + // The size of this Fixup didn't change. To avoid moving the data + // in small chunks, emit the code to its original position. + fixup->Emit(&buffer_, adjusted_code_size); + fixup->Finalize(dest_end - src_end); + } else { + // Move the data between the end of the fixup and src_end to its final location. + size_t old_fixup_location = fixup->GetLocation(); + size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); + size_t data_size = src_end - src_begin; + size_t dest_begin = dest_end - data_size; + buffer_.Move(dest_begin, src_begin, data_size); + src_end = old_fixup_location; + dest_end = dest_begin - fixup->GetSizeInBytes(); + // Finalize the Fixup and emit the data to the new location. + fixup->Finalize(dest_end - src_end); + fixup->Emit(&buffer_, adjusted_code_size); + } + } + CHECK_EQ(src_end, dest_end); +} + +void Thumb2Assembler::EmitLiterals() { + if (!literals_.empty()) { + // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment. + // We don't support byte and half-word literals. + uint32_t code_size = buffer_.Size(); + DCHECK_EQ(code_size & 1u, 0u); + if ((code_size & 2u) != 0u) { + Emit16(0); + } + for (Literal& literal : literals_) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(static_cast<size_t>(literal.GetLabel()->Position()), buffer_.Size()); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int16_t encoding = B15 | B14; + if (cond != AL) { + DCHECK(IsInt<9>(offset)); + encoding |= B12 | (static_cast<int32_t>(cond) << 8) | ((offset >> 1) & 0xff); + } else { + DCHECK(IsInt<12>(offset)); + encoding |= B13 | ((offset >> 1) & 0x7ff); + } + return encoding; +} + +inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int32_t s = (offset >> 31) & 1; // Sign bit. + int32_t encoding = B31 | B30 | B29 | B28 | B15 | + (s << 26) | // Sign bit goes to bit 26. + ((offset >> 1) & 0x7ff); // imm11 goes to bits 0-10. + if (cond != AL) { + DCHECK(IsInt<21>(offset)); + // Encode cond, move imm6 from bits 12-17 to bits 16-21 and move J1 and J2. + encoding |= (static_cast<int32_t>(cond) << 22) | ((offset & 0x3f000) << (16 - 12)) | + ((offset & (1 << 19)) >> (19 - 13)) | // Extract J1 from bit 19 to bit 13. + ((offset & (1 << 18)) >> (18 - 11)); // Extract J2 from bit 18 to bit 11. + } else { + DCHECK(IsInt<25>(offset)); + int32_t j1 = ((offset >> 23) ^ s ^ 1) & 1; // Calculate J1 from I1 extracted from bit 23. + int32_t j2 = ((offset >> 22)^ s ^ 1) & 1; // Calculate J2 from I2 extracted from bit 22. + // Move imm10 from bits 12-21 to bits 16-25 and add J1 and J2. + encoding |= B12 | ((offset & 0x3ff000) << (16 - 12)) | + (j1 << 13) | (j2 << 11); + } + return encoding; +} + +inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) { + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 1, 0); + DCHECK(IsUint<7>(offset)); + DCHECK(cond == EQ || cond == NE); + return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) | + ((offset & 0x3e) << (3 - 1)) | // Move imm5 from bits 1-5 to bits 3-7. + ((offset & 0x40) << (9 - 6)); // Move i from bit 6 to bit 11 +} + +inline int16_t Thumb2Assembler::CmpRnImm8Encoding16(Register rn, int32_t value) { + DCHECK(!IsHighRegister(rn)); + DCHECK(IsUint<8>(value)); + return B13 | B11 | (rn << 8) | value; +} + +inline int16_t Thumb2Assembler::AddRdnRmEncoding16(Register rdn, Register rm) { + // The high bit of rn is moved across 4-bit rm. + return B14 | B10 | (static_cast<int32_t>(rm) << 3) | + (static_cast<int32_t>(rdn) & 7) | ((static_cast<int32_t>(rdn) & 8) << 4); +} + +inline int32_t Thumb2Assembler::MovwEncoding32(Register rd, int32_t value) { + DCHECK(IsUint<16>(value)); + return B31 | B30 | B29 | B28 | B25 | B22 | + (static_cast<int32_t>(rd) << 8) | + ((value & 0xf000) << (16 - 12)) | // Move imm4 from bits 12-15 to bits 16-19. + ((value & 0x0800) << (26 - 11)) | // Move i from bit 11 to bit 26. + ((value & 0x0700) << (12 - 8)) | // Move imm3 from bits 8-10 to bits 12-14. + (value & 0xff); // Keep imm8 in bits 0-7. +} + +inline int32_t Thumb2Assembler::MovtEncoding32(Register rd, int32_t value) { + DCHECK_EQ(value & 0xffff, 0); + int32_t movw_encoding = MovwEncoding32(rd, (value >> 16) & 0xffff); + return movw_encoding | B25 | B23; +} + +inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) { + uint32_t mod_imm = ModifiedImmediate(value); + DCHECK_NE(mod_imm, kInvalidModifiedImmediate); + return B31 | B30 | B29 | B28 | B22 | B19 | B18 | B17 | B16 | + (static_cast<int32_t>(rd) << 8) | static_cast<int32_t>(mod_imm); +} + +inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<10>(offset)); + return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) { + // NOTE: We don't support negative offset, i.e. U=0 (B23). + return LdrRtRnImm12Encoding(rt, PC, offset); +} + +inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | + B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 | + (static_cast<int32_t>(rn) << 16) | (static_cast<int32_t>(rt) << 12) | + (static_cast<int32_t>(rt2) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | + (static_cast<int32_t>(rn) << 16) | + ((static_cast<int32_t>(sd) & 0x01) << (22 - 0)) | // Move D from bit 0 to bit 22. + ((static_cast<int32_t>(sd) & 0x1e) << (12 - 1)) | // Move Vd from bits 1-4 to bits 12-15. + (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | B8 | + (rn << 16) | + ((static_cast<int32_t>(dd) & 0x10) << (22 - 4)) | // Move D from bit 4 to bit 22. + ((static_cast<int32_t>(dd) & 0x0f) << (12 - 0)) | // Move Vd from bits 0-3 to bits 12-15. + (offset >> 2); +} + +inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<7>(offset)); + return B14 | B13 | B11 | + (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) | + (offset << (6 - 2)); // Move imm5 from bits 2-6 to bits 6-10. +} + +int32_t Thumb2Assembler::Fixup::LoadWideOrFpEncoding(Register rbase, int32_t offset) const { + switch (type_) { + case kLoadLiteralWide: + return LdrdEncoding32(rn_, rt2_, rbase, offset); + case kLoadFPLiteralSingle: + return VldrsEncoding32(sd_, rbase, offset); + case kLoadFPLiteralDouble: + return VldrdEncoding32(dd_, rbase, offset); + default: + LOG(FATAL) << "Unexpected type: " << static_cast<int>(type_); + UNREACHABLE(); + } +} + +inline int32_t Thumb2Assembler::LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset) { + DCHECK(IsUint<12>(offset)); + return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset; +} + +void Thumb2Assembler::FinalizeCode() { + ArmAssembler::FinalizeCode(); + BindLiterals(); + uint32_t adjusted_code_size = AdjustFixups(); + EmitFixups(adjusted_code_size); + EmitLiterals(); +} + bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode, @@ -671,17 +974,11 @@ void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) { EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0); } - void Thumb2Assembler::b(Label* label, Condition cond) { EmitBranch(cond, label, false, false); } -void Thumb2Assembler::b(NearLabel* label, Condition cond) { - EmitBranch(cond, label, false, false, /* is_near */ true); -} - - void Thumb2Assembler::bl(Label* label, Condition cond) { CheckCondition(cond); EmitBranch(cond, label, true, false); @@ -1308,80 +1605,359 @@ void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register } } +inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { + switch (size) { + case kBranch16Bit: + return 2u; + case kBranch32Bit: + return 4u; + + case kCbxz16Bit: + return 2u; + case kCbxz32Bit: + return 4u; + case kCbxz48Bit: + return 6u; + + case kLiteral1KiB: + return 2u; + case kLiteral4KiB: + return 4u; + case kLiteral64KiB: + return 8u; + case kLiteral1MiB: + return 10u; + case kLiteralFar: + return 14u; + + case kLongOrFPLiteral1KiB: + return 4u; + case kLongOrFPLiteral256KiB: + return 10u; + case kLongOrFPLiteralFar: + return 14u; + } + LOG(FATAL) << "Unexpected size: " << static_cast<int>(size); + UNREACHABLE(); +} + +inline uint32_t Thumb2Assembler::Fixup::GetOriginalSizeInBytes() const { + return SizeInBytes(original_size_); +} + +inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const { + return SizeInBytes(size_); +} + +inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) { + // The code size must be a multiple of 2. + DCHECK_EQ(current_code_size & 1u, 0u); + // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool. + return current_code_size & 2; +} + +inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) const { + static constexpr int32_t int32_min = std::numeric_limits<int32_t>::min(); + static constexpr int32_t int32_max = std::numeric_limits<int32_t>::max(); + DCHECK_LE(target_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(location_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max)); + int32_t diff = static_cast<int32_t>(target_) - static_cast<int32_t>(location_); + if (target_ > location_) { + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max - diff)); + diff += static_cast<int32_t>(adjustment_); + } else { + DCHECK_LE(int32_min + static_cast<int32_t>(adjustment_), diff); + diff -= static_cast<int32_t>(adjustment_); + } + // The default PC adjustment for Thumb2 is 4 bytes. + DCHECK_GE(diff, int32_min + 4); + diff -= 4; + // Add additional adjustment for instructions preceding the PC usage, padding + // before the literal pool and rounding down the PC for literal loads. + switch (GetSize()) { + case kBranch16Bit: + case kBranch32Bit: + break; + case kCbxz16Bit: + break; + case kCbxz32Bit: + case kCbxz48Bit: + DCHECK_GE(diff, int32_min + 2); + diff -= 2; // Extra CMP Rn, #0, 16-bit. + break; -void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const { - bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink; - bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX; - int32_t offset = target_ - location_; + case kLiteral1KiB: + case kLiteral4KiB: + case kLongOrFPLiteral1KiB: + DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2)); + diff += LiteralPoolPaddingSize(current_code_size); + // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target + // being aligned, current PC alignment can be inferred from diff. + DCHECK_EQ(diff & 1, 0); + diff = diff + (diff & 2); + DCHECK_GE(diff, 0); + break; + case kLiteral1MiB: + case kLiteral64KiB: + case kLongOrFPLiteral256KiB: + DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 4; // One extra 32-bit MOV. + diff += LiteralPoolPaddingSize(current_code_size); + break; + case kLiteralFar: + case kLongOrFPLiteralFar: + DCHECK_GE(diff, 8); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 8; // Extra MOVW+MOVT; both 32-bit. + diff += LiteralPoolPaddingSize(current_code_size); + break; + } + return diff; +} - if (size_ == k32Bit) { - int32_t encoding = B31 | B30 | B29 | B28 | B15; - if (link) { - // BL or BLX immediate. - encoding |= B14; - if (!x) { - encoding |= B12; - } else { - // Bottom bit of offset must be 0. - CHECK_EQ((offset & 1), 0); +inline size_t Thumb2Assembler::Fixup::IncreaseSize(Size new_size) { + DCHECK_NE(target_, kUnresolved); + Size old_size = size_; + size_ = new_size; + DCHECK_GT(SizeInBytes(new_size), SizeInBytes(old_size)); + size_t adjustment = SizeInBytes(new_size) - SizeInBytes(old_size); + if (target_ > location_) { + adjustment_ += adjustment; + } + return adjustment; +} + +uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) { + uint32_t old_code_size = current_code_size; + switch (GetSize()) { + case kBranch16Bit: + if (IsInt(cond_ != AL ? 9 : 12, GetOffset(current_code_size))) { + break; } - } else { - if (x) { - LOG(FATAL) << "Invalid use of BX"; - UNREACHABLE(); - } else { - if (cond_ == AL) { - // Can use the T4 encoding allowing a 24 bit offset. - if (!x) { - encoding |= B12; - } - } else { - // Must be T3 encoding with a 20 bit offset. - encoding |= cond_ << 22; - } + current_code_size += IncreaseSize(kBranch32Bit); + FALLTHROUGH_INTENDED; + case kBranch32Bit: + // We don't support conditional branches beyond +-1MiB + // or unconditional branches beyond +-16MiB. + break; + + case kCbxz16Bit: + if (IsUint<7>(GetOffset(current_code_size))) { + break; } - } - encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding); - buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16)); - buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff)); - } else { - if (IsCompareAndBranch()) { - offset -= 4; - uint16_t i = (offset >> 6) & 1; - uint16_t imm5 = (offset >> 1) & 31U /* 0b11111 */; - int16_t encoding = B15 | B13 | B12 | - (type_ == kCompareAndBranchNonZero ? B11 : 0) | - static_cast<uint32_t>(rn_) | - B8 | - i << 9 | - imm5 << 3; + current_code_size += IncreaseSize(kCbxz32Bit); + FALLTHROUGH_INTENDED; + case kCbxz32Bit: + if (IsInt<9>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kCbxz48Bit); + FALLTHROUGH_INTENDED; + case kCbxz48Bit: + // We don't support conditional branches beyond +-1MiB. + break; + + case kLiteral1KiB: + DCHECK(!IsHighRegister(rn_)); + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral4KiB); + FALLTHROUGH_INTENDED; + case kLiteral4KiB: + if (IsUint<12>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral64KiB); + FALLTHROUGH_INTENDED; + case kLiteral64KiB: + // Can't handle high register which we can encounter by fall-through from kLiteral4KiB. + if (!IsHighRegister(rn_) && IsUint<16>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral1MiB); + FALLTHROUGH_INTENDED; + case kLiteral1MiB: + if (IsUint<20>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralFar); + FALLTHROUGH_INTENDED; + case kLiteralFar: + // This encoding can reach any target. + break; + + case kLongOrFPLiteral1KiB: + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteral256KiB); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteral256KiB: + if (IsUint<18>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteralFar); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteralFar: + // This encoding can reach any target. + break; + } + return current_code_size - old_code_size; +} + +void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { + switch (GetSize()) { + case kBranch16Bit: { + DCHECK(type_ == kUnconditional || type_ == kConditional); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int16_t encoding = BEncoding16(GetOffset(code_size), cond_); buffer->Store<int16_t>(location_, encoding); - } else { - offset -= 4; // Account for PC offset. - int16_t encoding; - // 16 bit. - if (cond_ == AL) { - encoding = B15 | B14 | B13 | - ((offset >> 1) & 0x7ff); - } else { - encoding = B15 | B14 | B12 | - cond_ << 8 | ((offset >> 1) & 0xff); + break; + } + case kBranch32Bit: { + DCHECK(type_ == kConditional || type_ == kUnconditional || + type_ == kUnconditionalLink || type_ == kUnconditionalLinkX); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int32_t encoding = BEncoding32(GetOffset(code_size), cond_); + if (type_ == kUnconditionalLink) { + DCHECK_NE(encoding & B12, 0); + encoding |= B14; + } else if (type_ == kUnconditionalLinkX) { + DCHECK_NE(encoding & B12, 0); + encoding ^= B14 | B12; } + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + + case kCbxz16Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, encoding); + break; + } + case kCbxz32Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2, b_encoding); + break; + } + case kCbxz48Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); + buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kLiteral1KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); buffer->Store<int16_t>(location_, encoding); + break; + } + case kLiteral4KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. + int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLiteral64KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int16_t ldr_encoding = LdrRtRnImm5Encoding16(rn_, rn_, 0); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding); + break; + } + case kLiteral1MiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLiteralFar: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + + case kLongOrFPLiteral1KiB: { + int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLongOrFPLiteral256KiB: { + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_. + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLongOrFPLiteralFar: { + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(IP, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; } } } - uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) { CHECK(IsLowRegister(rn)); uint32_t location = buffer_.Size(); // This is always unresolved as it must be a forward branch. Emit16(prev); // Previous link. - return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero, - location, rn); + return AddFixup(Fixup::CompareAndBranch(location, rn, n ? NE : EQ)); } @@ -1619,47 +2195,53 @@ void Thumb2Assembler::EmitMultiMemOp(Condition cond, } } - -void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) { +void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) { + bool use32bit = IsForced32Bit() || !CanRelocateBranches(); uint32_t pc = buffer_.Size(); - Branch::Type branch_type; + Fixup::Type branch_type; if (cond == AL) { if (link) { + use32bit = true; if (x) { - branch_type = Branch::kUnconditionalLinkX; // BLX. + branch_type = Fixup::kUnconditionalLinkX; // BLX. } else { - branch_type = Branch::kUnconditionalLink; // BX. + branch_type = Fixup::kUnconditionalLink; // BX. } } else { - branch_type = Branch::kUnconditional; // B. + branch_type = Fixup::kUnconditional; // B. } } else { - branch_type = Branch::kConditional; // B<cond>. + branch_type = Fixup::kConditional; // B<cond>. } + Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit; + FixupId branch_id = AddFixup(Fixup::Branch(pc, branch_type, size, cond)); + if (label->IsBound()) { - Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond); // Resolved branch. - - // The branch is to a bound label which means that it's a backwards branch. We know the - // current size of it so we can emit the appropriate space. Note that if it's a 16 bit - // branch the size may change if it so happens that other branches change size that change - // the distance to the target and that distance puts this branch over the limit for 16 bits. - if (size == Branch::k16Bit) { - Emit16(0); // Space for a 16 bit branch. - } else { - Emit32(0); // Space for a 32 bit branch. + // The branch is to a bound label which means that it's a backwards branch. + // Record this branch as a dependency of all Fixups between the label and the branch. + GetFixup(branch_id)->Resolve(label->Position()); + for (FixupId fixup_id = branch_id; fixup_id != 0u; ) { + --fixup_id; + Fixup* fixup = GetFixup(fixup_id); + DCHECK_GE(label->Position(), 0); + if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) { + break; + } + fixup->AddDependent(branch_id); } + Emit16(0); } else { - // Branch is to an unbound label. Emit space for it. - uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near); // Unresolved branch. - if (force_32bit_ || (!CanRelocateBranches() && !is_near)) { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - Emit16(0); // another 16 bits. - } else { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - } - label->LinkTo(branch_id); // Link to the branch ID. + // Branch target is an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); } + + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); } @@ -2274,82 +2856,8 @@ void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) { } -// A branch has changed size. Make a hole for it. -void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) { - // Move the contents of the buffer using: Move(newposition, oldposition) - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Move(location + delta, location); -} - - void Thumb2Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - uint32_t bound_pc = buffer_.Size(); - std::vector<Branch*> changed_branches; - - while (label->IsLinked()) { - uint16_t position = label->Position(); // Branch id for linked branch. - Branch* branch = GetBranch(position); // Get the branch at this id. - bool changed = branch->Resolve(bound_pc); // Branch can be resolved now. - uint32_t branch_location = branch->GetLocation(); - uint16_t next = buffer_.Load<uint16_t>(branch_location); // Get next in chain. - if (changed) { - DCHECK(CanRelocateBranches()); - MakeHoleForBranch(branch->GetLocation(), 2); - if (branch->IsCompareAndBranch()) { - // A cbz/cbnz instruction has changed size. There is no valid encoding for - // a 32 bit cbz/cbnz so we need to change this to an instruction pair: - // cmp rn, #0 - // b<eq|ne> target - bool n = branch->GetType() == Branch::kCompareAndBranchNonZero; - Condition cond = n ? NE : EQ; - branch->Move(2); // Move the branch forward by 2 bytes. - branch->ResetTypeAndCondition(Branch::kConditional, cond); - branch->ResetSize(Branch::k16Bit); - - // Now add a compare instruction in the place the branch was. - buffer_.Store<int16_t>(branch_location, - B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8); - - // Since have moved made a hole in the code we need to reload the - // current pc. - bound_pc = buffer_.Size(); - - // Now resolve the newly added branch. - changed = branch->Resolve(bound_pc); - if (changed) { - MakeHoleForBranch(branch->GetLocation(), 2); - changed_branches.push_back(branch); - } - } else { - changed_branches.push_back(branch); - } - } - label->position_ = next; // Move to next. - } - label->BindTo(bound_pc); - - // Now relocate any changed branches. Do this until there are no more changes. - std::vector<Branch*> branches_to_process = changed_branches; - while (branches_to_process.size() != 0) { - changed_branches.clear(); - for (auto& changed_branch : branches_to_process) { - for (auto& branch : branches_) { - bool changed = branch->Relocate(changed_branch->GetLocation(), 2); - if (changed) { - changed_branches.push_back(branch); - } - } - branches_to_process = changed_branches; - } - } -} - - -void Thumb2Assembler::EmitBranches() { - for (auto& branch : branches_) { - branch->Emit(&buffer_); - } + BindLabel(label, buffer_.Size()); } @@ -2487,6 +2995,85 @@ int Thumb2Assembler::DecodeBranchOffset(int32_t instr) { return imm32; } +uint32_t Thumb2Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the fixups from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of fixups. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0u; + last_old_position_ = 0u; + last_fixup_id_ = 0u; + } + while (last_fixup_id_ != fixups_.size()) { + Fixup* fixup = GetFixup(last_fixup_id_); + if (fixup->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + if (fixup->GetSize() != fixup->GetOriginalSize()) { + last_position_adjustment_ += fixup->GetSizeInBytes() - fixup->GetOriginalSizeInBytes(); + } + ++last_fixup_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +Literal* Thumb2Assembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + bool use32bit = IsForced32Bit() || IsHighRegister(rt); + uint32_t location = buffer_.Size(); + Fixup::Size size = use32bit ? Fixup::kLiteral4KiB : Fixup::kLiteral1KiB; + FixupId fixup_id = AddFixup(Fixup::LoadNarrowLiteral(location, rt, size)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Register rt2, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = + AddFixup(Fixup::LoadWideLiteral(location, rt, rt2, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(SRegister sd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadSingleLiteral(location, sd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(DRegister dd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadDoubleLiteral(location, dd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); @@ -2763,16 +3350,6 @@ void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) { } -void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) { - if (IsLowRegister(r)) { - cbz(r, label); - } else { - cmp(r, ShifterOperand(0)); - b(label, EQ); - } -} - - void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { if (CanRelocateBranches() && IsLowRegister(r)) { cbnz(r, label); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 2382b74c30..5e6969b4c2 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ +#include <deque> #include <vector> #include "base/logging.h" @@ -34,13 +35,15 @@ class Thumb2Assembler FINAL : public ArmAssembler { : can_relocate_branches_(can_relocate_branches), force_32bit_(false), it_cond_index_(kNoItCondition), - next_condition_(AL) { + next_condition_(AL), + fixups_(), + literals_(), + last_position_adjustment_(0u), + last_old_position_(0u), + last_fixup_id_(0u) { } virtual ~Thumb2Assembler() { - for (auto& branch : branches_) { - delete branch; - } } bool IsThumb() const OVERRIDE { @@ -55,10 +58,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { return can_relocate_branches_; } - void FinalizeInstructions(const MemoryRegion& region) OVERRIDE { - EmitBranches(); - Assembler::FinalizeInstructions(region); - } + void FinalizeCode() OVERRIDE; // Data-processing instructions. void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; @@ -238,7 +238,6 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Branch instructions. void b(Label* label, Condition cond = AL); - void b(NearLabel* label, Condition cond = AL); void bl(Label* label, Condition cond = AL); void blx(Label* label); void blx(Register rm, Condition cond = AL) OVERRIDE; @@ -273,13 +272,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; - void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE; void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + using ArmAssembler::NewLiteral; // Make the helper template visible. + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, @@ -340,6 +349,244 @@ class Thumb2Assembler FINAL : public ArmAssembler { } private: + typedef uint16_t FixupId; + + // Fixup: branches and literal pool references. + // + // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This + // depends on both the type of branch and the offset to which it is branching. The 16-bit + // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare + // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be + // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence + // of instructions to make up for the limited range of load literal instructions (up to + // 4KiB for the 32-bit variant). When generating code for these insns we don't know the + // size before hand, so we assume it is the smallest available size and determine the final + // code offsets and sizes and emit code in FinalizeCode(). + // + // To handle this, we keep a record of every branch and literal pool load in the program. + // The actual instruction encoding for these is delayed until we know the final size of + // every instruction. When we bind a label to a branch we don't know the final location yet + // as some preceding instructions may need to be expanded, so we record a non-final offset. + // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of + // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with + // target on the other side of the expanded insn, as their offsets change and this may + // trigger further expansion. + // + // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the + // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing + // to it, using the fixup ids as links. The first link is stored in the label's position + // (the label is linked but not bound), the following links are stored in the code buffer, + // in the placeholder where we will eventually emit the actual code. + + class Fixup { + public: + // Branch type. + enum Type : uint8_t { + kConditional, // B<cond>. + kUnconditional, // B. + kUnconditionalLink, // BL. + kUnconditionalLinkX, // BLX. + kCompareAndBranchXZero, // cbz/cbnz. + kLoadLiteralNarrow, // Load narrrow integer literal. + kLoadLiteralWide, // Load wide integer literal. + kLoadFPLiteralSingle, // Load FP literal single. + kLoadFPLiteralDouble, // Load FP literal double. + }; + + // Calculated size of branch instruction based on type and offset. + enum Size : uint8_t { + // Branch variants. + kBranch16Bit, + kBranch32Bit, + // NOTE: We don't support branches which would require multiple instructions, i.e. + // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. + + // CBZ/CBNZ variants. + kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. + kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. + kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + + // Load integer literal variants. + // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. + kLiteral1KiB, + // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. + kLiteral4KiB, + // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. + kLiteral64KiB, + // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. + kLiteral1MiB, + // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. + // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. + kLiteralFar, + + // Load long or FP literal variants. + // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. + kLongOrFPLiteral1KiB, + // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. + kLongOrFPLiteral256KiB, + // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. + kLongOrFPLiteralFar, + }; + + // Unresolved branch possibly with a condition. + static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, + Condition cond = AL) { + DCHECK(type == kConditional || type == kUnconditional || + type == kUnconditionalLink || type == kUnconditionalLinkX); + DCHECK(size == kBranch16Bit || size == kBranch32Bit); + DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, + cond, type, size, location); + } + + // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). + static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { + DCHECK(cond == EQ || cond == NE); + return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, + cond, kCompareAndBranchXZero, kCbxz16Bit, location); + } + + // Load narrow literal. + static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) { + DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || + size == kLiteral1MiB || size == kLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadLiteralNarrow, size, location); + } + + // Load wide literal. + static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, rt2, kNoSRegister, kNoDRegister, + AL, kLoadLiteralWide, size, location); + } + + // Load FP single literal. + static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, + AL, kLoadFPLiteralSingle, size, location); + } + + // Load FP double literal. + static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, + AL, kLoadFPLiteralDouble, size, location); + } + + Type GetType() const { + return type_; + } + + Size GetOriginalSize() const { + return original_size_; + } + + Size GetSize() const { + return size_; + } + + uint32_t GetOriginalSizeInBytes() const; + + uint32_t GetSizeInBytes() const; + + uint32_t GetLocation() const { + return location_; + } + + uint32_t GetAdjustment() const { + return adjustment_; + } + + const std::vector<FixupId>& Dependents() const { + return dependents_; + } + + void AddDependent(FixupId dependent_id) { + dependents_.push_back(dependent_id); + } + + // Resolve a branch when the target is known. + void Resolve(uint32_t target) { + DCHECK_EQ(target_, kUnresolved); + DCHECK_NE(target, kUnresolved); + target_ = target; + } + + // Check if the current size is OK for current location_, target_ and adjustment_. + // If not, increase the size. Return the size increase, 0 if unchanged. + // If the target if after this Fixup, also add the difference to adjustment_, + // so that we don't need to consider forward Fixups as their own dependencies. + uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); + + // Increase adjustments. This is called for dependents of a Fixup when its size changes. + void IncreaseAdjustment(uint32_t increase) { + adjustment_ += increase; + } + + // Finalize the branch with an adjustment to the location. Both location and target are updated. + void Finalize(uint32_t location_adjustment) { + DCHECK_NE(target_, kUnresolved); + location_ += location_adjustment; + target_ += location_adjustment; + } + + // Emit the branch instruction into the assembler buffer. This does the + // encoding into the thumb instruction. + void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + + private: + Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, + Condition cond, Type type, Size size, uint32_t location) + : rn_(rn), + rt2_(rt2), + sd_(sd), + dd_(dd), + cond_(cond), + type_(type), + original_size_(size), size_(size), + location_(location), + target_(kUnresolved), + adjustment_(0u), + dependents_() { + } + static size_t SizeInBytes(Size size); + + // The size of padding added before the literal pool. + static size_t LiteralPoolPaddingSize(uint32_t current_code_size); + + // Returns the offset from the PC-using insn to the target. + int32_t GetOffset(uint32_t current_code_size) const; + + size_t IncreaseSize(Size new_size); + + int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. + + const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. + Register rt2_; // For kLoadLiteralWide. + SRegister sd_; // For kLoadFPLiteralSingle. + DRegister dd_; // For kLoadFPLiteralDouble. + const Condition cond_; + const Type type_; + Size original_size_; + Size size_; + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. + std::vector<FixupId> dependents_; // Fixups that require adjustment when current size changes. + }; + // Emit a single 32 or 16 bit data processing instruction. void EmitDataProcessing(Condition cond, Opcode opcode, @@ -432,7 +679,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); - void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false); + void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); int32_t EncodeTstOffset(int offset, int32_t inst); @@ -475,275 +722,53 @@ class Thumb2Assembler FINAL : public ArmAssembler { CheckCondition(cond); } - // Branches. - // - // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This - // depends on both the type of branch and the offset to which it is branching. When - // generating code for branches we don't know the size before hand (if the branch is - // going forward, because we haven't seen the target address yet), so we need to assume - // that it is going to be one of 16 or 32 bits. When we know the target (the label is 'bound') - // we can determine the actual size of the branch. However, if we had guessed wrong before - // we knew the target there will be no room in the instruction sequence for the new - // instruction (assume that we never decrease the size of a branch). - // - // To handle this, we keep a record of every branch in the program. The actual instruction - // encoding for these is delayed until we know the final size of every branch. When we - // bind a label to a branch (we then know the target address) we determine if the branch - // has changed size. If it has we need to move all the instructions in the buffer after - // the branch point forward by the change in size of the branch. This will create a gap - // in the code big enough for the new branch encoding. However, since we have moved - // a chunk of code we need to relocate the branches in that code to their new address. - // - // Creating a hole in the code for the new branch encoding might cause another branch that was - // 16 bits to become 32 bits, so we need to find this in another pass. - // - // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset - // range. We do this by converting it to two instructions: - // cmp Rn, #0 - // b<cond> target - // But we also need to handle the case where the conditional branch is out of range and - // becomes a 32 bit conditional branch. - // - // All branches have a 'branch id' which is a 16 bit unsigned number used to identify - // the branch. Unresolved labels use the branch id to link to the next unresolved branch. - - class Branch { - public: - // Branch type. - enum Type { - kUnconditional, // B. - kConditional, // B<cond>. - kCompareAndBranchZero, // cbz. - kCompareAndBranchNonZero, // cbnz. - kUnconditionalLink, // BL. - kUnconditionalLinkX, // BLX. - kUnconditionalX // BX. - }; - - // Calculated size of branch instruction based on type and offset. - enum Size { - k16Bit, - k32Bit - }; - - // Unresolved branch possibly with a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), - cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Unresolved compare-and-branch instruction with a register. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), cond_(AL), rn_(rn) { - CHECK(IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Resolved branch (can't be compare-and-branch) with a target and possibly a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target, - Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(target), cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - // Resolved branch. - size_ = CalculateSize(); - } - - bool IsCompareAndBranch() const { - return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero; - } - - // Resolve a branch when the target is known. If this causes the - // size of the branch to change return true. Otherwise return false. - bool Resolve(uint32_t target) { - uint32_t old_target = target_; - target_ = target; - if (assembler_->CanRelocateBranches()) { - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } else { - if (kIsDebugBuild) { - if (old_target == kUnresolved) { - // Check that the size has not increased. - DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit)); - } else { - DCHECK(CalculateSize() == size_); - } - } - return false; - } - } - - // Move a cbz/cbnz branch. This is always forward. - void Move(int32_t delta) { - CHECK(IsCompareAndBranch()); - CHECK_GT(delta, 0); - location_ += delta; - target_ += delta; - } - - // Relocate a branch by a given delta. This changed the location and - // target if they need to be changed. It also recalculates the - // size of the branch instruction. It returns true if the branch - // has changed size. - bool Relocate(uint32_t oldlocation, int32_t delta) { - DCHECK(assembler_->CanRelocateBranches()); - if (location_ > oldlocation) { - location_ += delta; - } - if (target_ != kUnresolved) { - if (target_ > oldlocation) { - target_ += delta; - } - } else { - return false; // Don't know the size yet. - } - - // Calculate the new size. - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } - - Size GetSize() const { - return size_; - } - - Type GetType() const { - return type_; - } - - uint32_t GetLocation() const { - return location_; - } - - // Emit the branch instruction into the assembler buffer. This does the - // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer) const; - - // Reset the type and condition to those given. This used for - // cbz/cbnz instructions when they are converted to cmp/b<cond> - void ResetTypeAndCondition(Type type, Condition cond) { - CHECK(IsCompareAndBranch()); - CHECK(cond == EQ || cond == NE); - type_ = type; - cond_ = cond; - } - - Register GetRegister() const { - return rn_; - } - - void ResetSize(Size size) { - size_ = size; - } - - private: - // Calculate the size of the branch instruction based on its type and offset. - Size CalculateSize() const { - if (target_ == kUnresolved) { - if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) { - return k32Bit; - } - if (IsCompareAndBranch()) { - // Compare and branch instructions can only be encoded on 16 bits. - return k16Bit; - } - return assembler_->CanRelocateBranches() ? k16Bit : k32Bit; - } - // When the target is resolved, we know the best encoding for it. - int32_t delta = target_ - location_ - 4; - if (delta < 0) { - delta = -delta; - } - switch (type_) { - case kUnconditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 11)) { - return k32Bit; - } else { - return k16Bit; - } - case kConditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 8)) { - return k32Bit; - } else { - return k16Bit; - } - case kCompareAndBranchZero: - case kCompareAndBranchNonZero: - if (delta >= (1 << 7)) { - return k32Bit; // Will cause this branch to become invalid. - } - return k16Bit; - - case kUnconditionalX: - case kUnconditionalLinkX: - return k16Bit; - case kUnconditionalLink: - return k32Bit; - } - LOG(FATAL) << "Cannot reach"; - return k16Bit; - } - - static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. - const Thumb2Assembler* assembler_; - Type type_; - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. - Size size_; - Condition cond_; - const Register rn_; - }; - - std::vector<Branch*> branches_; - - // Add a resolved branch and return its size. - Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target, - Condition cond = AL) { - branches_.push_back(new Branch(this, type, location, target, cond)); - return branches_[branches_.size()-1]->GetSize(); - } - - // Add a compare and branch (with a register) and return its id. - uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) { - branches_.push_back(new Branch(this, type, location, rn)); - return branches_.size() - 1; + FixupId AddFixup(Fixup fixup) { + FixupId fixup_id = static_cast<FixupId>(fixups_.size()); + fixups_.push_back(fixup); + // For iterating using FixupId, we need the next id to be representable. + DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size()); + return fixup_id; } - // Add an unresolved branch and return its id. - uint16_t AddBranch(Branch::Type type, - uint32_t location, - Condition cond = AL, - bool is_near = false) { - Branch* branch = new Branch(this, type, location, cond); - if (is_near) { - branch->ResetSize(Branch::k16Bit); - } - branches_.push_back(branch); - return branches_.size() - 1; - } - - Branch* GetBranch(uint16_t branchid) { - if (branchid >= branches_.size()) { - return nullptr; - } - return branches_[branchid]; + Fixup* GetFixup(FixupId fixup_id) { + DCHECK_LT(fixup_id, fixups_.size()); + return &fixups_[fixup_id]; } - void EmitBranches(); - void MakeHoleForBranch(uint32_t location, uint32_t size); + void BindLabel(Label* label, uint32_t bound_pc); + void BindLiterals(); + void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate); + uint32_t AdjustFixups(); + void EmitFixups(uint32_t adjusted_code_size); + void EmitLiterals(); + + static int16_t BEncoding16(int32_t offset, Condition cond); + static int32_t BEncoding32(int32_t offset, Condition cond); + static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); + static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); + static int16_t AddRdnRmEncoding16(Register rdn, Register rm); + static int32_t MovwEncoding32(Register rd, int32_t value); + static int32_t MovtEncoding32(Register rd, int32_t value); + static int32_t MovModImmEncoding32(Register rd, int32_t value); + static int16_t LdrLitEncoding16(Register rt, int32_t offset); + static int32_t LdrLitEncoding32(Register rt, int32_t offset); + static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); + static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); + static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); + static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); + static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); + + std::vector<Fixup> fixups_; + + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + std::deque<Literal> literals_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + FixupId last_fixup_id_; }; } // namespace arm diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 733441b889..68b7931a0c 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -78,13 +78,20 @@ class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, return imm_value; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + private: std::vector<arm::Register*> registers_; static constexpr const char* kThumb2AssemblyHeader = ".syntax unified\n.thumb\n"; }; - TEST_F(AssemblerThumb2Test, Toolchain) { EXPECT_TRUE(CheckTools()); } @@ -370,4 +377,577 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { DriverStr(expected, "StoreWordPairToNonThumbOffset"); } +TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 0u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 0u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzBeyondMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzBeyondMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 4u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 4u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 128; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 129; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.w 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.w 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 4u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.n r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 512; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2046; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r1, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2047; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #4096\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #0xfffc\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 1u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #(0x100000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #(0x100000 >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralFar) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #((0x100000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #((0x100000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 510; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldrd r1, r3, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) { + // The literal size must match but the type doesn't, so use an int32_t rather than float. + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::S3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralSingleMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x40000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #(0x40000 >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + } // namespace art diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index cc78002ab0..eb8de0620b 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -31,7 +31,7 @@ namespace arm64 { #define ___ vixl_masm_-> #endif -void Arm64Assembler::EmitSlowPaths() { +void Arm64Assembler::FinalizeCode() { if (!exception_blocks_.empty()) { for (size_t i = 0; i < exception_blocks_.size(); i++) { EmitExceptionPoll(exception_blocks_.at(i)); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index fa9faed66b..b53c11bc24 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -73,8 +73,8 @@ class Arm64Assembler FINAL : public Assembler { delete vixl_masm_; } - // Emit slow paths queued during assembly. - void EmitSlowPaths(); + // Finalize the code. + void FinalizeCode() OVERRIDE; // Size of generated code. size_t CodeSize() const; diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b016e74aba..6d8a98931f 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -80,10 +80,11 @@ void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) { } -void AssemblerBuffer::ExtendCapacity() { +void AssemblerBuffer::ExtendCapacity(size_t min_capacity) { size_t old_size = Size(); size_t old_capacity = Capacity(); size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB); + new_capacity = std::max(new_capacity, min_capacity); // Allocate the new data area and copy contents of the old one to it. uint8_t* new_contents = NewContents(new_capacity); diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 672e1503be..0381af3956 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -199,13 +199,18 @@ class AssemblerBuffer { *reinterpret_cast<T*>(contents_ + position) = value; } - void Move(size_t newposition, size_t oldposition) { - CHECK(HasEnsuredCapacity()); - // Move the contents of the buffer from oldposition to - // newposition by nbytes. - size_t nbytes = Size() - oldposition; - memmove(contents_ + newposition, contents_ + oldposition, nbytes); - cursor_ += newposition - oldposition; + void Resize(size_t new_size) { + if (new_size > Capacity()) { + ExtendCapacity(new_size); + } + cursor_ = contents_ + new_size; + } + + void Move(size_t newposition, size_t oldposition, size_t size) { + // Move a chunk of the buffer from oldposition to newposition. + DCHECK_LE(oldposition + size, Size()); + DCHECK_LE(newposition + size, Size()); + memmove(contents_ + newposition, contents_ + oldposition, size); } // Emit a fixup at the current location. @@ -350,7 +355,7 @@ class AssemblerBuffer { return data + capacity - kMinimumGap; } - void ExtendCapacity(); + void ExtendCapacity(size_t min_capacity = 0u); friend class AssemblerFixup; }; @@ -376,8 +381,8 @@ class Assembler { public: static Assembler* Create(InstructionSet instruction_set); - // Emit slow paths queued during assembly - virtual void EmitSlowPaths() { buffer_.EmitSlowPaths(this); } + // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. + virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index a339633efe..017402dbd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -544,6 +544,7 @@ class AssemblerTest : public testing::Test { } void DriverWrapper(std::string assembly_text, std::string test_name) { + assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 1a2c9a9000..20f61f942b 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -65,20 +65,33 @@ int CompareIgnoringSpace(const char* s1, const char* s2) { return *s1 - *s2; } -void dump(std::vector<uint8_t>& code, const char* testname) { - // This will only work on the host. There is no as, objcopy or objdump on the - // device. +void InitResults() { + if (test_results.empty()) { + setup_results(); + } +} + +std::string GetToolsDir() { #ifndef HAVE_ANDROID_OS - static bool results_ok = false; + // This will only work on the host. There is no as, objcopy or objdump on the device. static std::string toolsdir; - if (!results_ok) { + if (toolsdir.empty()) { setup_results(); toolsdir = CommonRuntimeTest::GetAndroidTargetToolsDir(kThumb2); SetAndroidData(); - results_ok = true; } + return toolsdir; +#else + return std::string(); +#endif +} + +void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) { +#ifndef HAVE_ANDROID_OS + static std::string toolsdir = GetToolsDir(); + ScratchFile file; const char* filename = file.GetFilename().c_str(); @@ -130,9 +143,6 @@ void dump(std::vector<uint8_t>& code, const char* testname) { FILE *fp = popen(cmd, "r"); ASSERT_TRUE(fp != nullptr); - std::map<std::string, const char**>::iterator results = test_results.find(testname); - ASSERT_NE(results, test_results.end()); - uint32_t lineindex = 0; while (!feof(fp)) { @@ -141,14 +151,14 @@ void dump(std::vector<uint8_t>& code, const char* testname) { if (s == nullptr) { break; } - if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) { + if (CompareIgnoringSpace(results[lineindex], testline) != 0) { LOG(FATAL) << "Output is not as expected at line: " << lineindex - << results->second[lineindex] << "/" << testline; + << results[lineindex] << "/" << testline; } ++lineindex; } // Check that we are at the end. - ASSERT_TRUE(results->second[lineindex] == nullptr); + ASSERT_TRUE(results[lineindex] == nullptr); fclose(fp); } @@ -163,8 +173,31 @@ void dump(std::vector<uint8_t>& code, const char* testname) { #define __ assembler-> +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname, + const char* const* results) { + __ FinalizeCode(); + size_t cs = __ CodeSize(); + std::vector<uint8_t> managed_code(cs); + MemoryRegion code(&managed_code[0], managed_code.size()); + __ FinalizeInstructions(code); + + DumpAndCheck(managed_code, testname, results); +} + +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname) { + InitResults(); + std::map<std::string, const char* const*>::iterator results = test_results.find(testname); + ASSERT_NE(results, test_results.end()); + + EmitAndCheck(assembler, testname, results->second); +} + +#undef __ + +#define __ assembler. + TEST(Thumb2AssemblerTest, SimpleMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); @@ -172,46 +205,31 @@ TEST(Thumb2AssemblerTest, SimpleMov) { __ mov(R0, ShifterOperand(1)); __ mov(R8, ShifterOperand(9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov"); } TEST(Thumb2AssemblerTest, SimpleMov32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov32"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov32"); } TEST(Thumb2AssemblerTest, SimpleMovAdd) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ add(R0, R1, ShifterOperand(R2)); __ add(R0, R1, ShifterOperand()); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMovAdd"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMovAdd"); } TEST(Thumb2AssemblerTest, DataProcessingRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mvn(R0, ShifterOperand(R1)); @@ -249,16 +267,11 @@ TEST(Thumb2AssemblerTest, DataProcessingRegister) { // 32 bit variants. __ add(R12, R1, ShifterOperand(R0)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingRegister"); } TEST(Thumb2AssemblerTest, DataProcessingImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x55)); __ mvn(R0, ShifterOperand(0x55)); @@ -283,16 +296,11 @@ TEST(Thumb2AssemblerTest, DataProcessingImmediate) { __ movs(R0, ShifterOperand(0x55)); __ mvns(R0, ShifterOperand(0x55)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mvn(R0, ShifterOperand(0x550055)); @@ -311,17 +319,12 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { __ cmp(R0, ShifterOperand(0x550055)); __ cmn(R0, ShifterOperand(0x550055)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mov(R0, ShifterOperand(0x55005500)); @@ -331,16 +334,11 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { __ mov(R0, ShifterOperand(0x350)); // rotated to 2nd last position __ mov(R0, ShifterOperand(0x1a8)); // rotated to last position - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediates"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediates"); } TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R3, ShifterOperand(R4, LSL, 4)); __ mov(R3, ShifterOperand(R4, LSR, 5)); @@ -355,17 +353,12 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { __ mov(R8, ShifterOperand(R4, ROR, 7)); __ mov(R8, ShifterOperand(R4, RRX)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingShiftedRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } TEST(Thumb2AssemblerTest, BasicLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24)); __ ldrb(R3, Address(R4, 24)); @@ -382,17 +375,12 @@ TEST(Thumb2AssemblerTest, BasicLoad) { __ ldrsb(R8, Address(R4, 24)); __ ldrsh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicLoad"); - delete assembler; + EmitAndCheck(&assembler, "BasicLoad"); } TEST(Thumb2AssemblerTest, BasicStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24)); __ strb(R3, Address(R4, 24)); @@ -405,16 +393,11 @@ TEST(Thumb2AssemblerTest, BasicStore) { __ strb(R8, Address(R4, 24)); __ strh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicStore"); - delete assembler; + EmitAndCheck(&assembler, "BasicStore"); } TEST(Thumb2AssemblerTest, ComplexLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24, Address::Mode::Offset)); __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -451,17 +434,12 @@ TEST(Thumb2AssemblerTest, ComplexLoad) { __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoad"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoad"); } TEST(Thumb2AssemblerTest, ComplexStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24, Address::Mode::Offset)); __ str(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -484,16 +462,11 @@ TEST(Thumb2AssemblerTest, ComplexStore) { __ strh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexStore"); - delete assembler; + EmitAndCheck(&assembler, "ComplexStore"); } TEST(Thumb2AssemblerTest, NegativeLoadStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, -24, Address::Mode::Offset)); __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex)); @@ -551,30 +524,20 @@ TEST(Thumb2AssemblerTest, NegativeLoadStore) { __ strh(R3, Address(R4, -24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStore"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStore"); } TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ ldrd(R2, Address(R0, 24, Address::Mode::Offset)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "SimpleLoadStoreDual"); } TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ strd(R2, Address(R0, 24, Address::Mode::PreIndex)); @@ -590,16 +553,11 @@ TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { __ ldrd(R2, Address(R0, 24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoadStoreDual"); } TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, -24, Address::Mode::Offset)); __ strd(R2, Address(R0, -24, Address::Mode::PreIndex)); @@ -615,16 +573,11 @@ TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { __ ldrd(R2, Address(R0, -24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStoreDual"); } TEST(Thumb2AssemblerTest, SimpleBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ mov(R0, ShifterOperand(2)); @@ -658,17 +611,12 @@ TEST(Thumb2AssemblerTest, SimpleBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleBranch"); - delete assembler; + EmitAndCheck(&assembler, "SimpleBranch"); } TEST(Thumb2AssemblerTest, LongBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); // 32 bit branches. Label l1; __ mov(R0, ShifterOperand(2)); @@ -703,16 +651,11 @@ TEST(Thumb2AssemblerTest, LongBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LongBranch"); - delete assembler; + EmitAndCheck(&assembler, "LongBranch"); } TEST(Thumb2AssemblerTest, LoadMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldm(DB_W, R4, (1 << R0 | 1 << R3)); @@ -724,16 +667,11 @@ TEST(Thumb2AssemblerTest, LoadMultiple) { // Single reg is converted to ldr __ ldm(DB_W, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadMultiple"); - delete assembler; + EmitAndCheck(&assembler, "LoadMultiple"); } TEST(Thumb2AssemblerTest, StoreMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ stm(IA_W, R4, (1 << R0 | 1 << R3)); @@ -746,16 +684,11 @@ TEST(Thumb2AssemblerTest, StoreMultiple) { __ stm(IA_W, R4, (1 << R5)); __ stm(IA, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreMultiple"); - delete assembler; + EmitAndCheck(&assembler, "StoreMultiple"); } TEST(Thumb2AssemblerTest, MovWMovT) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ movw(R4, 0); // 16 bit. __ movw(R4, 0x34); // 16 bit. @@ -768,16 +701,11 @@ TEST(Thumb2AssemblerTest, MovWMovT) { __ movt(R0, 0x1234); __ movt(R1, 0xffff); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MovWMovT"); - delete assembler; + EmitAndCheck(&assembler, "MovWMovT"); } TEST(Thumb2AssemblerTest, SpecialAddSub) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ add(R2, SP, ShifterOperand(0x50)); // 16 bit. __ add(SP, SP, ShifterOperand(0x50)); // 16 bit. @@ -792,16 +720,11 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SpecialAddSub"); - delete assembler; + EmitAndCheck(&assembler, "SpecialAddSub"); } TEST(Thumb2AssemblerTest, StoreToOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. @@ -809,17 +732,12 @@ TEST(Thumb2AssemblerTest, StoreToOffset) { __ StoreToOffset(kStoreHalfword, R0, R12, 12); __ StoreToOffset(kStoreByte, R2, R12, 12); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreToOffset"); - delete assembler; + EmitAndCheck(&assembler, "StoreToOffset"); } TEST(Thumb2AssemblerTest, IfThen) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ it(EQ); __ mov(R1, ShifterOperand(1), EQ); @@ -848,16 +766,11 @@ TEST(Thumb2AssemblerTest, IfThen) { __ mov(R3, ShifterOperand(3), EQ); __ mov(R4, ShifterOperand(4), NE); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "IfThen"); - delete assembler; + EmitAndCheck(&assembler, "IfThen"); } TEST(Thumb2AssemblerTest, CbzCbnz) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R2, &l1); @@ -873,16 +786,11 @@ TEST(Thumb2AssemblerTest, CbzCbnz) { __ Bind(&l2); __ mov(R2, ShifterOperand(4)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CbzCbnz"); - delete assembler; + EmitAndCheck(&assembler, "CbzCbnz"); } TEST(Thumb2AssemblerTest, Multiply) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mul(R0, R1, R0); __ mul(R0, R1, R2); @@ -898,16 +806,11 @@ TEST(Thumb2AssemblerTest, Multiply) { __ umull(R0, R1, R2, R3); __ umull(R8, R9, R10, R11); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Multiply"); - delete assembler; + EmitAndCheck(&assembler, "Multiply"); } TEST(Thumb2AssemblerTest, Divide) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ sdiv(R0, R1, R2); __ sdiv(R8, R9, R10); @@ -915,16 +818,11 @@ TEST(Thumb2AssemblerTest, Divide) { __ udiv(R0, R1, R2); __ udiv(R8, R9, R10); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Divide"); - delete assembler; + EmitAndCheck(&assembler, "Divide"); } TEST(Thumb2AssemblerTest, VMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vmovs(S1, 1.0); __ vmovd(D1, 1.0); @@ -932,17 +830,12 @@ TEST(Thumb2AssemblerTest, VMov) { __ vmovs(S1, S2); __ vmovd(D1, D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VMov"); - delete assembler; + EmitAndCheck(&assembler, "VMov"); } TEST(Thumb2AssemblerTest, BasicFloatingPoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vadds(S0, S1, S2); __ vsubs(S0, S1, S2); @@ -964,16 +857,11 @@ TEST(Thumb2AssemblerTest, BasicFloatingPoint) { __ vnegd(D0, D1); __ vsqrtd(D0, D1); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicFloatingPoint"); - delete assembler; + EmitAndCheck(&assembler, "BasicFloatingPoint"); } TEST(Thumb2AssemblerTest, FloatingPointConversions) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcvtsd(S2, D2); __ vcvtds(D2, S2); @@ -990,16 +878,11 @@ TEST(Thumb2AssemblerTest, FloatingPointConversions) { __ vcvtud(S1, D2); __ vcvtdu(D1, S2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointConversions"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointConversions"); } TEST(Thumb2AssemblerTest, FloatingPointComparisons) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcmps(S0, S1); __ vcmpd(D0, D1); @@ -1007,57 +890,37 @@ TEST(Thumb2AssemblerTest, FloatingPointComparisons) { __ vcmpsz(S2); __ vcmpdz(D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointComparisons"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointComparisons"); } TEST(Thumb2AssemblerTest, Calls) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ blx(LR); __ bx(LR); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Calls"); - delete assembler; + EmitAndCheck(&assembler, "Calls"); } TEST(Thumb2AssemblerTest, Breakpoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ bkpt(0); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Breakpoint"); - delete assembler; + EmitAndCheck(&assembler, "Breakpoint"); } TEST(Thumb2AssemblerTest, StrR1) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R1, Address(SP, 68)); __ str(R1, Address(SP, 1068)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StrR1"); - delete assembler; + EmitAndCheck(&assembler, "StrR1"); } TEST(Thumb2AssemblerTest, VPushPop) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vpushs(S2, 4); __ vpushd(D2, 4); @@ -1065,16 +928,11 @@ TEST(Thumb2AssemblerTest, VPushPop) { __ vpops(S2, 4); __ vpopd(D2, 4); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VPushPop"); - delete assembler; + EmitAndCheck(&assembler, "VPushPop"); } TEST(Thumb2AssemblerTest, Max16BitBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1084,16 +942,11 @@ TEST(Thumb2AssemblerTest, Max16BitBranch) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Max16BitBranch"); - delete assembler; + EmitAndCheck(&assembler, "Max16BitBranch"); } TEST(Thumb2AssemblerTest, Branch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1103,16 +956,11 @@ TEST(Thumb2AssemblerTest, Branch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Branch32"); - delete assembler; + EmitAndCheck(&assembler, "Branch32"); } TEST(Thumb2AssemblerTest, CompareAndBranchMax) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1122,16 +970,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchMax) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchMax"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchMax"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1141,16 +984,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation16"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation16"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1160,16 +998,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation32"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation32"); } TEST(Thumb2AssemblerTest, MixedBranch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; Label l2; @@ -1184,16 +1017,11 @@ TEST(Thumb2AssemblerTest, MixedBranch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MixedBranch32"); - delete assembler; + EmitAndCheck(&assembler, "MixedBranch32"); } TEST(Thumb2AssemblerTest, Shifts) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit __ Lsl(R0, R1, 5); @@ -1240,16 +1068,11 @@ TEST(Thumb2AssemblerTest, Shifts) { __ Lsr(R0, R8, R2, true); __ Asr(R0, R1, R8, true); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Shifts"); - delete assembler; + EmitAndCheck(&assembler, "Shifts"); } TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldr(R0, Address(R1, R2)); @@ -1272,16 +1095,11 @@ TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { __ ldr(R0, Address(R1, R8)); __ str(R0, Address(R1, R8)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreRegOffset"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreRegOffset"); } TEST(Thumb2AssemblerTest, LoadStoreLiteral) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(4)); __ str(R0, Address(4)); @@ -1295,16 +1113,11 @@ TEST(Thumb2AssemblerTest, LoadStoreLiteral) { __ str(R0, Address(0x3ff)); // 32 bit (no 16 bit str(literal)). __ str(R0, Address(0x7ff)); // 11 bits (32 bit). - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLiteral"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLiteral"); } TEST(Thumb2AssemblerTest, LoadStoreLimits) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(R4, 124)); // 16 bit. __ ldr(R0, Address(R4, 128)); // 32 bit. @@ -1330,30 +1143,20 @@ TEST(Thumb2AssemblerTest, LoadStoreLimits) { __ strh(R0, Address(R4, 62)); // 16 bit. __ strh(R0, Address(R4, 64)); // 32 bit. - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLimits"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLimits"); } TEST(Thumb2AssemblerTest, CompareAndBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; - arm::NearLabel label; + Label label; __ CompareAndBranchIfZero(arm::R0, &label); __ CompareAndBranchIfZero(arm::R11, &label); __ CompareAndBranchIfNonZero(arm::R0, &label); __ CompareAndBranchIfNonZero(arm::R11, &label); __ Bind(&label); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranch"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranch"); } #undef __ diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 841d6a00c0..280ed779b3 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -4832,7 +4832,7 @@ const char* CompareAndBranchResults[] = { nullptr }; -std::map<std::string, const char**> test_results; +std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; test_results["SimpleMov32"] = SimpleMov32Results; |