diff options
Diffstat (limited to 'compiler')
42 files changed, 1392 insertions, 640 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 3f5271d31f..67536f00b9 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -234,6 +234,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS += $(ART_HOST_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) LOCAL_LDLIBS := $(ART_HOST_LDLIBS) ifeq ($$(art_ndebug_or_debug),debug) LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h index 83dfc28844..e9402e39e5 100644 --- a/compiler/dex/dataflow_iterator-inl.h +++ b/compiler/dex/dataflow_iterator-inl.h @@ -181,10 +181,16 @@ inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) { idx_ += 1; BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx]); DCHECK(bb != nullptr); + if ((*loop_ends_)[idx] != 0u) { + // If bb->visited is false, the loop needs to be processed from scratch. + // Otherwise we mark it as recalculating; for a natural loop we will not + // need to recalculate any block in the loop anyway, and for unnatural + // loops we will recalculate the loop head only if one of its predecessors + // actually changes. + bool recalculating = bb->visited; + loop_head_stack_->push_back(std::make_pair(idx, recalculating)); + } if (!bb->visited) { - if ((*loop_ends_)[idx] != 0u) { - loop_head_stack_->push_back(std::make_pair(idx, false)); // Not recalculating. - } return bb; } } diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc index b3ad0407e2..49b7511b42 100644 --- a/compiler/dex/mir_graph_test.cc +++ b/compiler/dex/mir_graph_test.cc @@ -15,6 +15,7 @@ */ #include "compiler_ir.h" +#include "dataflow_iterator-inl.h" #include "mir_graph.h" #include "gtest/gtest.h" @@ -374,4 +375,72 @@ TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) { CheckLoopEnds(loop_ends); } +TEST_F(TopologicalSortOrderTest, UnnaturalLoops) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(11, 3)), // Unnatural loop head (top-level). + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(9, 7), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED2(10, 7)), // Unnatural loop head (nested). + DEF_BB(kDalvikByteCode, DEF_SUCC1(10), DEF_PRED2(6, 8)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 11), DEF_PRED1(9)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 2), DEF_PRED1(10)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2 + }; + const uint16_t loop_ends[] = { + 0, 0, 10, 0, 0, 0, 9, 0, 0, 0, 0, + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); + + const std::pair<BasicBlockId, bool> expected_and_change[] = { + { 1, false }, + { 3, false }, + { 4, true }, // Initial run of the outer loop. + { 5, true }, + { 6, true }, + { 7, true }, + { 8, true }, // Initial run of the inner loop. + { 9, true }, + { 10, true }, + { 8, true }, // Recalculation of the inner loop - changed. + { 9, true }, + { 10, true }, + { 8, false }, // Recalculation of the inner loop - unchanged. + { 11, true }, + { 4, true }, // Recalculation of the outer loop - changed. + { 5, true }, + { 6, true }, + { 7, false }, // No change: skip inner loop head because inputs are unchanged. + { 9, true }, + { 10, true }, + { 8, true }, // Recalculation of the inner loop - changed. + { 9, true }, + { 10, true }, + { 8, false }, // Recalculation of the inner loop - unchanged. + { 11, true }, + { 4, false }, // Recalculation of the outer loop - unchanged. + { 2, false }, + }; + size_t pos = 0; + LoopRepeatingTopologicalSortIterator iter(cu_.mir_graph.get()); + bool change = false; + for (BasicBlock* bb = iter.Next(change); bb != nullptr; bb = iter.Next(change)) { + ASSERT_NE(arraysize(expected_and_change), pos); + ASSERT_EQ(expected_and_change[pos].first, bb->id) << pos; + change = expected_and_change[pos].second; + ++pos; + } + ASSERT_EQ(arraysize(expected_and_change), pos); +} + } // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 7b1ec398d0..645511ed9f 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -1790,7 +1790,8 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { pred_mask_union |= pred_mask; } } - DCHECK_EQ(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u), + // DCHECK_EQ() may not hold for unnatural loop heads, so use DCHECK_GE(). + DCHECK_GE(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u), pred_mask_union); suspend_checks_in_loops &= pred_mask_union; } diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc index ac7a4a7758..6d485986f6 100644 --- a/compiler/dex/verified_method.cc +++ b/compiler/dex/verified_method.cc @@ -98,7 +98,7 @@ bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) { } size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte; // There are 2 bytes to encode the number of entries. - if (num_entries >= 65536) { + if (num_entries > std::numeric_limits<uint16_t>::max()) { LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: " << PrettyMethod(method_verifier->GetMethodReference().dex_method_index, *method_verifier->GetMethodReference().dex_file); diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index 242e3dfe6e..07f9a9bd9f 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -120,7 +120,7 @@ class VerifiedMethod { DequickenMap dequicken_map_; SafeCastSet safe_cast_set_; - bool has_verification_failures_; + bool has_verification_failures_ = false; // Copy of mapping generated by verifier of dex PCs of string init invocations // to the set of other registers that the receiver has been copied into. diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 4971f0ef10..4d423d007f 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -26,11 +26,11 @@ namespace art { namespace dwarf { -constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; - // Run the tests only on host since we need objdump. #ifndef HAVE_ANDROID_OS +constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; + TEST_F(DwarfTest, DebugFrame) { const bool is64bit = false; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 32bde8e3b4..73e121f1cd 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -110,10 +110,6 @@ bool ImageWriter::PrepareImageAddressSpace() { CheckNoDexObjects(); } - if (!AllocMemory()) { - return false; - } - if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); CheckNonImageClassesRemoved(); @@ -123,6 +119,12 @@ bool ImageWriter::PrepareImageAddressSpace() { CalculateNewObjectOffsets(); Thread::Current()->TransitionFromRunnableToSuspended(kNative); + // This needs to happen after CalculateNewObjectOffsets since it relies on intern_table_bytes_ and + // bin size sums being calculated. + if (!AllocMemory()) { + return false; + } + return true; } @@ -205,7 +207,7 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image bitmap at the page aligned start of the image end. - const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); CHECK_ALIGNED(bitmap_section.Offset(), kPageSize); if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), bitmap_section.Size(), bitmap_section.Offset())) { @@ -222,26 +224,10 @@ bool ImageWriter::Write(const std::string& image_filename, return true; } -void ImageWriter::SetImageOffset(mirror::Object* object, - ImageWriter::BinSlot bin_slot, - size_t offset) { +void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) { DCHECK(object != nullptr); DCHECK_NE(offset, 0U); - mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset); - DCHECK_ALIGNED(obj, kObjectAlignment); - static size_t max_offset = 0; - max_offset = std::max(max_offset, offset); - image_bitmap_->Set(obj); // Mark the obj as mutated, since we will end up changing it. - { - // Remember the object-inside-of-the-image's hash code so we can restore it after the copy. - auto hash_it = saved_hashes_map_.find(bin_slot); - if (hash_it != saved_hashes_map_.end()) { - std::pair<BinSlot, uint32_t> slot_hash = *hash_it; - saved_hashes_.push_back(std::make_pair(obj, slot_hash.second)); - saved_hashes_map_.erase(hash_it); - } - } // The object is already deflated from when we set the bin slot. Just overwrite the lock word. object->SetLockWord(LockWord::FromForwardingAddress(offset), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); @@ -262,7 +248,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); - SetImageOffset(object, bin_slot, new_offset); + SetImageOffset(object, new_offset); DCHECK_LT(new_offset, image_end_); } @@ -302,14 +288,14 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { // No hash, don't need to save it. break; case LockWord::kHashCode: - saved_hashes_map_[bin_slot] = lw.GetHashCode(); + DCHECK(saved_hashcode_map_.find(object) == saved_hashcode_map_.end()); + saved_hashcode_map_.emplace(object, lw.GetHashCode()); break; default: LOG(FATAL) << "Unreachable."; UNREACHABLE(); } - object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)), - false); + object->SetLockWord(LockWord::FromForwardingAddress(bin_slot.Uint32Value()), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); DCHECK(IsImageBinSlotAssigned(object)); } @@ -487,11 +473,8 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { ++bin_slot_count_[bin]; - DCHECK_LT(GetBinSizeSum(), image_->Size()); - // Grow the image closer to the end by the object we just assigned. image_end_ += offset_delta; - DCHECK_LT(image_end_, image_->Size()); } bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const { @@ -535,10 +518,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const } bool ImageWriter::AllocMemory() { - auto* runtime = Runtime::Current(); - const size_t heap_size = runtime->GetHeap()->GetTotalMemory(); - // Add linear alloc usage since we need to have room for the ArtFields. - const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize); + const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_, + kPageSize); std::string error_msg; image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE, false, false, &error_msg)); @@ -547,9 +528,10 @@ bool ImageWriter::AllocMemory() { return false; } - // Create the image bitmap. - image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(), - RoundUp(length, kPageSize))); + // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. + CHECK_LE(image_end_, length); + image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( + "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize))); if (image_bitmap_.get() == nullptr) { LOG(ERROR) << "Failed to allocate memory for image bitmap"; return false; @@ -569,42 +551,6 @@ bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) { return true; } -// Collect all the java.lang.String in the heap and put them in the output strings_ array. -class StringCollector { - public: - StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index) - : strings_(strings), index_(index) { - } - static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto* collector = reinterpret_cast<StringCollector*>(arg); - if (obj->GetClass()->IsStringClass()) { - collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString()); - } - } - size_t GetIndex() const { - return index_; - } - - private: - Handle<mirror::ObjectArray<mirror::String>> strings_; - size_t index_; -}; - -// Compare strings based on length, used for sorting strings by length / reverse length. -class LexicographicalStringComparator { - public: - bool operator()(const mirror::HeapReference<mirror::String>& lhs, - const mirror::HeapReference<mirror::String>& rhs) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - mirror::String* lhs_s = lhs.AsMirrorPtr(); - mirror::String* rhs_s = rhs.AsMirrorPtr(); - uint16_t* lhs_begin = lhs_s->GetValue(); - uint16_t* rhs_begin = rhs_s->GetValue(); - return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(), - rhs_begin, rhs_begin + rhs_s->GetLength()); - } -}; - void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) { if (!obj->GetClass()->IsStringClass()) { return; @@ -769,7 +715,8 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = obj->AsString()->Intern(); + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( + obj->AsString()->Intern()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early @@ -965,7 +912,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // know where image_roots is going to end up image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment - DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; // Prepare bin slots for dex cache arrays. PrepareDexCacheArraySlots(); @@ -997,7 +943,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); @@ -1010,6 +955,11 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_slot_previous_sizes_[native_reloc.bin_type]; } + // Calculate how big the intern table will be after being serialized. + auto* const intern_table = Runtime::Current()->GetInternTable(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Note that image_end_ is left at end of used mirror object section. } @@ -1039,6 +989,10 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); + // Calculate the size of the interned strings. + auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); + cur_pos = interned_strings_section->End(); // Finally bitmap section. const size_t bitmap_bytes = image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; @@ -1046,16 +1000,19 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { cur_pos = bitmap_section->End(); if (kIsDebugBuild) { size_t idx = 0; - for (auto& section : sections) { + for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; ++idx; } LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_; } + const size_t image_end = static_cast<uint32_t>(interned_strings_section->End()); + CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) << + "Oat file should be right after the image."; // Create the header. new (image_->Begin()) ImageHeader( - PointerToLowMemUInt32(image_begin_), static_cast<uint32_t>(methods_section->End()), sections, - image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), + PointerToLowMemUInt32(image_begin_), image_end, + sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_), PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_, compile_pic_); @@ -1068,6 +1025,37 @@ ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = ImageAddress(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + } + } + + private: + ImageWriter* const image_writer_; + + mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const size_t offset = image_writer_->GetImageOffset(obj); + auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); + VLOG(compiler) << "Update root from " << obj << " to " << dest; + return dest; + } +}; + void ImageWriter::CopyAndFixupNativeData() { // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_reloc_) { @@ -1088,7 +1076,7 @@ void ImageWriter::CopyAndFixupNativeData() { } // Fixup the image method roots. auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - const auto& methods_section = image_header->GetMethodsSection(); + const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { auto* m = image_methods_[i]; CHECK(m != nullptr); @@ -1101,18 +1089,35 @@ void ImageWriter::CopyAndFixupNativeData() { auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); } + // Write the intern table into the image. + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = Runtime::Current()->GetInternTable(); + uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(memory_ptr); + CHECK_EQ(temp_table.Size(), intern_table->Size()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); + CHECK_EQ(intern_table_bytes, intern_table_bytes_); } void ImageWriter::CopyAndFixupObjects() { gc::Heap* heap = Runtime::Current()->GetHeap(); heap->VisitObjects(CopyAndFixupObjectsCallback, this); // Fix up the object previously had hash codes. - for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) { + for (const auto& hash_pair : saved_hashcode_map_) { Object* obj = hash_pair.first; DCHECK_EQ(obj->GetLockWord<kVerifyNone>(false).ReadBarrierState(), 0U); obj->SetLockWord<kVerifyNone>(LockWord::FromHashCode(hash_pair.second, 0U), false); } - saved_hashes_.clear(); + saved_hashcode_map_.clear(); } void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { @@ -1155,18 +1160,22 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } void ImageWriter::CopyAndFixupObject(Object* obj) { - // see GetLocalAddress for similar computation size_t offset = GetImageOffset(obj); auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); - const uint8_t* src = reinterpret_cast<const uint8_t*>(obj); + DCHECK_LT(offset, image_end_); + const auto* src = reinterpret_cast<const uint8_t*>(obj); + + image_bitmap_->Set(dst); // Mark the obj as live. - size_t n = obj->SizeOf(); + const size_t n = obj->SizeOf(); DCHECK_LE(offset + n, image_->Size()); memcpy(dst, src, n); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor // word. - dst->SetLockWord(LockWord::Default(), false); + const auto it = saved_hashcode_map_.find(obj); + dst->SetLockWord(it != saved_hashcode_map_.end() ? + LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false); FixupObject(obj, dst); } @@ -1176,7 +1185,7 @@ class FixupVisitor { FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) { } - void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const + void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset); // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the @@ -1186,7 +1195,7 @@ class FixupVisitor { } // java.lang.ref.Reference visitor. - void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const + void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( @@ -1490,4 +1499,11 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } +uint8_t* ImageWriter::GetOatFileBegin() const { + DCHECK_GT(intern_table_bytes_, 0u); + return image_begin_ + RoundUp( + image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + + bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize); +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index a35d6ad9c9..9d45ce2bd4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -54,7 +54,7 @@ class ImageWriter FINAL { quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), - dirty_methods_(0u), clean_methods_(0u) { + intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) { CHECK_NE(image_begin, 0U); std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); } @@ -84,11 +84,7 @@ class ImageWriter FINAL { image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); } - uint8_t* GetOatFileBegin() const { - return image_begin_ + RoundUp( - image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + - bin_slot_sizes_[kBinArtMethodClean], kPageSize); - } + uint8_t* GetOatFileBegin() const; bool Write(const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) @@ -158,7 +154,7 @@ class ImageWriter FINAL { // The offset in bytes from the beginning of the bin. Aligned to object size. uint32_t GetIndex() const; // Pack into a single uint32_t, for storing into a lock word. - explicit operator uint32_t() const { return lockword_; } + uint32_t Uint32Value() const { return lockword_; } // Comparison operator for map support bool operator<(const BinSlot& other) const { return lockword_ < other.lockword_; } @@ -170,7 +166,7 @@ class ImageWriter FINAL { // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset) + void SetImageOffset(mirror::Object* object, size_t offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsImageOffsetAssigned(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -330,11 +326,9 @@ class ImageWriter FINAL { // The start offsets of the dex cache arrays. SafeMap<const DexFile*, size_t> dex_cache_array_starts_; - // Saved hashes (objects are inside of the image so that they don't move). - std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; - - // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address). - std::map<BinSlot, uint32_t> saved_hashes_map_; + // Saved hash codes. We use these to restore lockwords which were temporarily used to have + // forwarding addresses as well as copying over hash codes. + std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_; // Beginning target oat address for the pointers from the output image to its oat file. const uint8_t* oat_data_begin_; @@ -360,6 +354,9 @@ class ImageWriter FINAL { size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_; + // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. @@ -376,8 +373,9 @@ class ImageWriter FINAL { uint64_t dirty_methods_; uint64_t clean_methods_; - friend class FixupVisitor; friend class FixupClassVisitor; + friend class FixupRootVisitor; + friend class FixupVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); }; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index b2b54965b5..9d5e3fdca1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -126,11 +126,14 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { - // Null check on the NewArray should have been eliminated by instruction - // simplifier already. - if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return instruction->InputAt(0)->AsNewArray(); + static HInstruction* FromArrayLengthToArray(HInstruction* instruction) { + DCHECK(instruction->IsArrayLength() || instruction->IsNewArray()); + if (instruction->IsArrayLength()) { + HInstruction* input = instruction->InputAt(0); + if (input->IsNullCheck()) { + input = input->AsNullCheck()->InputAt(0); + } + return input; } return instruction; } @@ -146,8 +149,9 @@ class ValueBound : public ValueObject { // Some bounds are created with HNewArray* as the instruction instead // of HArrayLength*. They are treated the same. - instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); - instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + // HArrayLength with the same array input are considered equal also. + instruction1 = FromArrayLengthToArray(instruction1); + instruction2 = FromArrayLengthToArray(instruction2); return instruction1 == instruction2; } @@ -271,7 +275,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // Loop header of loop_info. Exiting loop is normal. return false; } - const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); for (size_t i = 0; i < successors.Size(); i++) { if (!loop_info->Contains(*successors.Get(i))) { // One of the successors exits the loop. @@ -293,8 +297,14 @@ class ArrayAccessInsideLoopFinder : public ValueObject { void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - HBasicBlock* block = it_loop.Current(); + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + HBasicBlock* block = it_loop.Current(); + DCHECK(block == induction_variable_->GetBlock()); + // Skip loop header. Since narrowed value range of a MonotonicValueRange only + // applies to the loop body (after the test at the end of the loop header). + it_loop.Advance(); + for (; !it_loop.Done(); it_loop.Advance()) { + block = it_loop.Current(); DCHECK(block->IsInLoop()); if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure @@ -308,30 +318,35 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // that the loop will loop through the full monotonic value range from // initial_ to end_. So adding deoptimization might be too aggressive and can // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. Otherwise, the loop induction variable is going to cover the full - // monotonic value range from initial_ to end_, and deoptimizations are added - // iff the loop will throw AIOOBE. + // AIOOBE. found_array_length_ = nullptr; return; } for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr; instruction = instruction->GetNext()) { - if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + if (!instruction->IsBoundsCheck()) { continue; } - HInstruction* index = instruction->InputAt(1); - if (!index->IsBoundsCheck()) { + + HInstruction* length_value = instruction->InputAt(1); + if (length_value->IsIntConstant()) { + // TODO: may optimize for constant case. continue; } - HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); - if (array_length == nullptr) { - DCHECK(index->InputAt(1)->IsIntConstant()); - // TODO: may optimize for constant case. + DCHECK(!length_value->IsPhi()); + if (length_value->IsPhi()) { + // Outer loop shouldn't collect bounds checks inside inner + // loop because the inner loop body doen't dominate + // outer loop's back edges. However just to be on the safe side, + // if there are any such cases, we just skip over them. continue; } + DCHECK(length_value->IsArrayLength()); + HArrayLength* array_length = length_value->AsArrayLength(); + HInstruction* array = array_length->InputAt(0); if (array->IsNullCheck()) { array = array->AsNullCheck()->InputAt(0); @@ -347,7 +362,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { continue; } - index = index->AsBoundsCheck()->InputAt(0); + HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); HInstruction* left = index; int32_t right = 0; if (left == induction_variable_ || @@ -375,7 +390,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // The instruction that corresponds to a MonotonicValueRange. HInstruction* induction_variable_; - // The array length of the array that's accessed inside the loop. + // The array length of the array that's accessed inside the loop body. HArrayLength* found_array_length_; // The lowest and highest constant offsets relative to induction variable @@ -411,6 +426,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } + bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { if (other_range == nullptr) { @@ -495,13 +512,30 @@ class MonotonicValueRange : public ValueRange { ValueBound GetBound() const { return bound_; } void SetEnd(HInstruction* end) { end_ = end; } void SetInclusive(bool inclusive) { inclusive_ = inclusive; } - HBasicBlock* GetLoopHead() const { + HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + HBasicBlock* GetLoopHeaderSuccesorInLoop() { + HBasicBlock* header = GetLoopHeader(); + HInstruction* instruction = header->GetLastInstruction(); + DCHECK(instruction->IsIf()); + HIf* h_if = instruction->AsIf(); + HLoopInformation* loop_info = header->GetLoopInformation(); + bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); + bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); + + // Just in case it's some strange loop structure. + if (true_successor_in_loop && false_successor_in_loop) { + return nullptr; + } + DCHECK(true_successor_in_loop || false_successor_in_loop); + return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); + } + // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -593,12 +627,114 @@ class MonotonicValueRange : public ValueRange { } } + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + // For example, this loop: + // + // for (int i = start; i < end; i++) { + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // will be transformed to: + // + // int array_length_in_loop_body_if_needed; + // if (start >= end) { + // array_length_in_loop_body_if_needed = 0; + // } else { + // if (start < 1) deoptimize(); + // if (array == null) deoptimize(); + // array_length = array.length; + // if (end > array_length - 1) deoptimize; + // array_length_in_loop_body_if_needed = array_length; + // } + // for (int i = start; i < end; i++) { + // // No more null check and bounds check. + // // array.length value is replaced with array_length_in_loop_body_if_needed + // // in the loop body. + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // We basically first go through the loop body and find those array accesses whose + // index is at a constant offset from the induction variable ('i' in the above example), + // and update offset_low and offset_high along the way. We then add the following + // deoptimizations in the loop pre-header (suppose end is not inclusive). + // if (start < -offset_low) deoptimize(); + // if (end >= array.length - offset_high) deoptimize(); + // It might be necessary to first hoist array.length (and the null check on it) out of + // the loop with another deoptimization. + // + // In order not to trigger deoptimization unnecessarily, we want to make a strong + // guarantee that no deoptimization is triggered if the loop body itself doesn't + // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop + // body must throw AIOOBE). + // This is achieved by the following: + // 1) We only process loops that iterate through the full monotonic range from + // initial_ to end_. We do the following checks to make sure that's the case: + // a) The loop doesn't have early exit (via break, return, etc.) + // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. + // 2) We only collect array accesses of blocks in the loop body that dominate + // all loop back edges, these array accesses are guaranteed to happen + // at each loop iteration. + // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses + // when the induction variable is at initial_ and end_ must be in a legal range. + // Since the added deoptimizations are basically checking the induction variable + // at initial_ and end_ values, no deoptimization will be triggered either. + // + // A special case is the loop body isn't entered at all. In that case, we may still + // add deoptimization due to the analysis described above. In order not to trigger + // deoptimization, we do a test between initial_ and end_ first and skip over + // the added deoptimization. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // In order not to trigger deoptimization unnecessarily, we want to + // make sure the loop iterates through the full range from initial_ to + // end_ so that boundaries are covered by the loop. An increment of 2, + // for example, may skip end_. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't add a check in loop pre-header if the value isn't available there. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + // Returns true if adding a (constant >= value) check for deoptimization // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, - int32_t constant, - bool* is_proven) { + bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + // See if we can prove the relationship first. if (value->IsIntConstant()) { if (value->AsIntConstant()->GetValue() >= constant) { @@ -615,22 +751,118 @@ class MonotonicValueRange : public ValueRange { return true; } + // Try to filter out cases that the loop entry test will never be true. + bool LoopEntryTestUseful() { + if (initial_->IsIntConstant() && end_->IsIntConstant()) { + int32_t initial_val = initial_->AsIntConstant()->GetValue(); + int32_t end_val = end_->AsIntConstant()->GetValue(); + if (increment_ == 1) { + if (inclusive_) { + return initial_val > end_val; + } else { + return initial_val >= end_val; + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + return initial_val < end_val; + } else { + return initial_val <= end_val; + } + } + } + return true; + } + + // Returns the block for adding deoptimization. + HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + // Deoptimization is only added when both initial_ and end_ are defined + // before the loop. + DCHECK(initial_->GetBlock()->Dominates(pre_header)); + DCHECK(end_->GetBlock()->Dominates(pre_header)); + + // If it can be proven the loop body is definitely entered (unless exception + // is thrown in the loop header for which triggering deoptimization is fine), + // there is no need for tranforming the loop. In that case, deoptimization + // will just be added in the loop pre-header. + if (!LoopEntryTestUseful()) { + return pre_header; + } + + HGraph* graph = header->GetGraph(); + graph->TransformLoopHeaderForBCE(header); + HBasicBlock* new_pre_header = header->GetDominator(); + DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); + HBasicBlock* if_block = new_pre_header->GetDominator(); + HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0); // True successor. + HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1); // False successor. + + dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); + deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); + new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); + return deopt_block; + } + + // Adds a test between initial_ and end_ to see if the loop body is entered. + // If the loop body isn't entered at all, it jumps to the loop pre-header (after + // transformation) to avoid any deoptimization. + void AddLoopBodyEntryTest() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + HBasicBlock* if_block = pre_header->GetDominator(); + HGraph* graph = header->GetGraph(); + + HCondition* cond; + if (increment_ == 1) { + if (inclusive_) { + cond = new (graph->GetArena()) HGreaterThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + cond = new (graph->GetArena()) HLessThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); + } + } + HIf* h_if = new (graph->GetArena()) HIf(cond); + if_block->AddInstruction(cond); + if_block->AddInstruction(h_if); + } + // Adds a check that (value >= constant), and HDeoptimize otherwise. void AddDeoptimizationConstant(HInstruction* value, - int32_t constant) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t constant, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + if (loop_entry_test_block_added) { + DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1)); + } + HIntConstant* const_instr = graph->GetIntConstant(constant); HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); HDeoptimize* deoptimize = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } // Returns true if adding a (value <= array_length + offset) check for deoptimization @@ -640,6 +872,26 @@ class MonotonicValueRange : public ValueRange { int32_t offset, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + + if (array_length->GetBlock() == header) { + // array_length_in_loop_body_if_needed only has correct value when the loop + // body is entered. We bail out in this case. Usually array_length defined + // in the loop header is already hoisted by licm. + return false; + } else { + // array_length is defined either before the loop header already, or in + // the loop body since it's used in the loop body. If it's defined in the loop body, + // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, + // all the uses of array_length must be dominated by its definition in the loop + // body. array_length_in_loop_body_if_needed is guaranteed to be the same as + // array_length once the loop body is entered so all the uses of the phi will + // use the correct value. + } + if (offset > 0) { // There might be overflow issue. // TODO: handle this, possibly with some distance relationship between @@ -667,56 +919,99 @@ class MonotonicValueRange : public ValueRange { // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. void AddDeoptimizationArrayLength(HInstruction* value, HArrayLength* array_length, - int32_t offset) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t offset, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(pre_header)) { + if (!array_length->GetBlock()->Dominates(deopt_block)) { + // array_length must be defined in the loop body. + DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); + DCHECK(array_length->GetBlock() != header); + HInstruction* array = array_length->InputAt(0); HNullCheck* null_check = array->AsNullCheck(); if (null_check != nullptr) { array = null_check->InputAt(0); } - // We've already made sure array is defined before the loop when collecting + // We've already made sure the array is defined before the loop when collecting // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(pre_header)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + DCHECK(array->GetBlock()->Dominates(deopt_block)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { // Hoist null check out of loop with a deoptimization. HNullConstant* null_constant = graph->GetNullConstant(); HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); // TODO: for one dex_pc, share the same deoptimization slow path. HDeoptimize* null_check_deoptimize = new (graph->GetArena()) HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore( - null_check_deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_deoptimize, deopt_block->GetLastInstruction()); // Eliminate null check in the loop. null_check->ReplaceWith(array); null_check->GetBlock()->RemoveInstruction(null_check); null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } - // Hoist array_length out of loop. - array_length->MoveBefore(pre_header->GetLastInstruction()); + + HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array); + deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); + + if (loop_entry_test_block_added) { + // Replace array_length defined inside the loop body with a phi + // array_length_in_loop_body_if_needed. This is a synthetic phi so there is + // no vreg number for it. + HPhi* phi = new (graph->GetArena()) HPhi( + graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); + // Set to 0 if the loop body isn't entered. + phi->SetRawInputAt(0, graph->GetIntConstant(0)); + // Set to array.length if the loop body is entered. + phi->SetRawInputAt(1, new_array_length); + pre_header->AddPhi(phi); + array_length->ReplaceWith(phi); + // Make sure phi is only used after the loop body is entered. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> it(phi->GetUses()); + !it.Done(); + it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); + } + } + } else { + array_length->ReplaceWith(new_array_length); + } + + array_length->GetBlock()->RemoveInstruction(array_length); + // Use new_array_length for deopt. + array_length = new_array_length; } - HIntConstant* offset_instr = graph->GetIntConstant(offset); - HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + HInstruction* added = array_length; + if (offset != 0) { + HIntConstant* offset_instr = graph->GetIntConstant(offset); + added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); + } + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); + HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); + deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); } - // Add deoptimizations in loop pre-header with the collected array access + // Adds deoptimizations in loop pre-header with the collected array access // data so that value ranges can be established in loop body. // Returns true if deoptimizations are successfully added, or if it's proven // it's not necessary. @@ -733,70 +1028,60 @@ class MonotonicValueRange : public ValueRange { return false; } + HBasicBlock* deopt_block; + bool loop_entry_test_block_added = false; bool is_constant_proven, is_length_proven; + + HInstruction* const_comparing_instruction; + int32_t const_compared_to; + HInstruction* array_length_comparing_instruction; + int32_t array_length_offset; if (increment_ == 1) { // Increasing from initial_ to end_. - int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; - if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && - CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(initial_, -offset_low); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(end_, array_length, offset); + const_comparing_instruction = initial_; + const_compared_to = -offset_low; + array_length_comparing_instruction = end_; + array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; + } else { + const_comparing_instruction = end_; + const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; + array_length_comparing_instruction = initial_; + array_length_offset = -offset_high - 1; + } + + if (CanAddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + &is_constant_proven) && + CanAddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + &is_length_proven)) { + if (!is_constant_proven || !is_length_proven) { + deopt_block = TransformLoopForDeoptimizationIfNeeded(); + loop_entry_test_block_added = (deopt_block != pre_header); + if (loop_entry_test_block_added) { + // Loop body may be entered. + AddLoopBodyEntryTest(); } - return true; } - } else if (increment_ == -1) { - // Decreasing from initial_ to end_. - int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; - if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && - CanAddDeoptimizationArrayLength( - initial_, array_length, -offset_high - 1, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(end_, constant); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); - } - return true; + if (!is_constant_proven) { + AddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + deopt_block, + loop_entry_test_block_added); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + deopt_block, + loop_entry_test_block_added); } + return true; } return false; } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // TODO: possibly handle overflow/underflow issues with deoptimization. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. @@ -819,12 +1104,17 @@ class BCEVisitor : public HGraphVisitor { // it's likely some AIOOBE will be thrown. static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + // Added blocks for loop body entry test. + bool IsAddedBlock(HBasicBlock* block) const { + return block->GetBlockId() >= initial_block_size_; + } + explicit BCEVisitor(HGraph* graph) - : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()), - need_to_revisit_block_(false) {} + : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + DCHECK(!IsAddedBlock(block)); first_constant_index_bounds_check_map_.clear(); HGraphVisitor::VisitBasicBlock(block); if (need_to_revisit_block_) { @@ -839,6 +1129,10 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + if (IsAddedBlock(basic_block)) { + // Added blocks don't keep value ranges. + return nullptr; + } int block_id = basic_block->GetBlockId(); if (maps_.at(block_id) == nullptr) { std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( @@ -853,8 +1147,12 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); - if (map->find(instruction->GetId()) != map->end()) { - return map->Get(instruction->GetId()); + if (map != nullptr) { + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + } else { + DCHECK(IsAddedBlock(basic_block)); } basic_block = basic_block->GetDominator(); } @@ -971,7 +1269,7 @@ class BCEVisitor : public HGraphVisitor { if (left_range != nullptr) { left_monotonic_range = left_range->AsMonotonicValueRange(); if (left_monotonic_range != nullptr) { - HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + HBasicBlock* loop_head = left_monotonic_range->GetLoopHeader(); if (instruction->GetBlock() != loop_head) { // For monotonic value range, don't handle `instruction` // if it's not defined in the loop header. @@ -1013,7 +1311,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondLT); @@ -1047,7 +1345,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondGT); @@ -1083,7 +1381,16 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + DCHECK(array_length->IsIntConstant() || + array_length->IsArrayLength() || + array_length->IsPhi()); + + if (array_length->IsPhi()) { + // Input 1 of the phi contains the real array.length once the loop body is + // entered. That value will be used for bound analysis. The graph is still + // strickly in SSA form. + array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); + } if (!index->IsIntConstant()) { ValueRange* index_range = LookupValueRange(index, block); @@ -1238,25 +1545,26 @@ class BCEVisitor : public HGraphVisitor { } if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { // The comparison is for an induction variable in the loop header. DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor; - if (LIKELY(block->GetLoopInformation()-> - Contains(*instruction->IfFalseSuccessor()))) { - loop_body_successor = instruction->IfFalseSuccessor(); - } else { - loop_body_successor = instruction->IfTrueSuccessor(); + HBasicBlock* loop_body_successor = + left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); + if (loop_body_successor == nullptr) { + // In case it's some strange loop structure. + return; } ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if (new_left_range == left_range) { + if ((new_left_range == left_range) || + // Range narrowed with deoptimization is usually more useful than + // a constant range. + new_left_range->IsConstantValueRange()) { // We are not successful in narrowing the monotonic value range to // a regular value range. Try using deoptimization. new_left_range = left_range->AsMonotonicValueRange()-> NarrowWithDeoptimization(); if (new_left_range != left_range) { - GetValueRangeMap(instruction->IfFalseSuccessor())-> - Overwrite(left->GetId(), new_left_range); + GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); } } } @@ -1511,6 +1819,9 @@ class BCEVisitor : public HGraphVisitor { // eliminate those bounds checks. bool need_to_revisit_block_; + // Initial number of blocks. + int32_t initial_block_size_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; @@ -1527,7 +1838,22 @@ void BoundsCheckElimination::Run() { // value can be narrowed further down in the dominator tree. // // TODO: only visit blocks that dominate some array accesses. - visitor.VisitReversePostOrder(); + HBasicBlock* last_visited_block = nullptr; + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == last_visited_block) { + // We may insert blocks into the reverse post order list when processing + // a loop header. Don't process it again. + DCHECK(current->IsLoopHeader()); + continue; + } + if (visitor.IsAddedBlock(current)) { + // Skip added blocks. Their effects are already taken care of. + continue; + } + visitor.VisitBasicBlock(current); + last_visited_block = current; + } } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 48090a3de4..4701bddd48 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -440,22 +440,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -464,6 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -472,6 +467,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -481,6 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); @@ -489,6 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); @@ -579,22 +577,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph2(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -603,6 +595,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -611,6 +604,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); @@ -619,6 +613,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); @@ -646,8 +641,13 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - HInstruction* new_array = new (allocator) - HNewArray(constant_10, 0, Primitive::kPrimInt, graph->GetDexFile(), kQuickAllocArray); + HInstruction* new_array = new (allocator) HNewArray( + constant_10, + graph->GetCurrentMethod(), + 0, + Primitive::kPrimInt, + graph->GetDexFile(), + kQuickAllocArray); block->AddInstruction(new_array); block->AddInstruction(new (allocator) HGoto()); @@ -705,15 +705,17 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -723,6 +725,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -732,6 +735,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); @@ -823,22 +827,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph4(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -847,6 +845,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -1022,6 +1021,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f98029da03..1f9287cbfc 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -723,10 +723,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -748,13 +754,11 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); - if (!is_range && is_wide && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_pc; - // We do not implement non sequential register pair. - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonSequentialRegPair); - return false; - } + // Longs and doubles should be in pairs, that is, sequential registers. The verifier should + // reject any class where this is violated. + DCHECK(is_range || !is_wide || (args[i] + 1 == args[i + 1])) + << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() + << " at " << dex_pc; HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); invoke->SetArgumentAt(argument_index, arg); if (is_wide) { @@ -763,6 +767,11 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } DCHECK_EQ(argument_index, number_of_arguments); + if (invoke->IsInvokeStaticOrDirect()) { + invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod()); + argument_index++; + } + if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { // Add the class initialization check as last input of `invoke`. DCHECK(clinit_check != nullptr); @@ -1045,6 +1054,7 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; HInstruction* object = new (arena_) HNewArray(length, + graph_->GetCurrentMethod(), dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), @@ -2003,7 +2013,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 : kQuickAllocObject; current_block_->AddInstruction(new (arena_) HNewInstance( - dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint)); + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + entrypoint)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } break; @@ -2015,8 +2029,12 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; - current_block_->AddInstruction(new (arena_) HNewArray( - length, dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint)); + current_block_->AddInstruction(new (arena_) HNewArray(length, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + entrypoint)); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 792ad9b544..130f0e970f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -292,7 +292,6 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) { ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall); - locations->AddTemp(visitor->GetMethodLocation()); for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); @@ -300,6 +299,20 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( } locations->SetOut(visitor->GetReturnLocation(invoke->GetType())); + + if (invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); + if (call->IsStringInit()) { + locations->AddTemp(visitor->GetMethodLocation()); + } else if (call->IsRecursive()) { + locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + } else { + locations->AddTemp(visitor->GetMethodLocation()); + locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + } + } else { + locations->AddTemp(visitor->GetMethodLocation()); + } } void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ec0d56d291..d14594562e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1274,11 +1274,6 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok HandleInvoke(invoke); } -void CodeGeneratorARM::LoadCurrentMethod(Register reg) { - DCHECK(RequiresCurrentMethod()); - __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); -} - static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorARM intrinsic(codegen); @@ -1297,9 +1292,9 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec return; } - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - codegen_->GenerateStaticOrDirectCall(invoke, temp); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1330,12 +1325,8 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); - __ LoadFromOffset(kLoadWord, temp, temp, class_offset); - } else { - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); - } + DCHECK(receiver.IsRegister()); + __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( @@ -2724,13 +2715,12 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -2743,14 +2733,13 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(R0)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -4216,9 +4205,7 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { - DCHECK_EQ(temp, kArtMethodRegister); - +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // TODO: Implement all kinds of calls: // 1) boot -> boot // 2) app -> boot @@ -4227,32 +4214,40 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + Register reg = temp.AsRegister<Register>(); // temp = thread->string_init_entrypoint - __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset()); + __ LoadFromOffset(kLoadWord, reg, TR, invoke->GetStringInitOffset()); // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, + __ LoadFromOffset(kLoadWord, LR, reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value()); // LR() __ blx(LR); + } else if (invoke->IsRecursive()) { + __ bl(GetFrameEntryLabel()); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); - // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value()); - // LR() - __ blx(LR); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register method_reg; + Register reg = temp.AsRegister<Register>(); + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ bl(GetFrameEntryLabel()); + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } + // reg = current_method->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // reg = reg[index_in_cache] + __ LoadFromOffset( + kLoadWord, reg, reg, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = reg[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c5a28bacce..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -139,10 +139,16 @@ class LocationsBuilderARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -163,10 +169,16 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + ArmAssembler* GetAssembler() const { return assembler_; } private: @@ -271,9 +283,6 @@ class CodeGeneratorARM : public CodeGenerator { // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); - // Load current method into `reg`. - void LoadCurrentMethod(Register reg); - // Generate code to invoke a runtime entry point. void InvokeRuntime( int32_t offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path); @@ -303,7 +312,7 @@ class CodeGeneratorARM : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); private: // Labels for each block that will be compiled. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2f607f70a3..3c8f117011 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -484,7 +484,7 @@ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type } Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { - return LocationFrom(x0); + return LocationFrom(kArtMethodRegister); } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, @@ -1071,12 +1071,6 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } } -void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { - DCHECK(RequiresCurrentMethod()); - CHECK(current_method.IsX()); - __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); -} - void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, @@ -2242,9 +2236,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege return false; } -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. - DCHECK(temp.Is(kArtMethodRegister)); size_t index_in_cache = GetCachePointerOffset(invoke->GetDexMethodIndex()); // TODO: Implement all kinds of calls: @@ -2255,30 +2248,39 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + Register reg = XRegisterFrom(temp); // temp = thread->string_init_entrypoint - __ Ldr(temp.X(), MemOperand(tr, invoke->GetStringInitOffset())); + __ Ldr(reg.X(), MemOperand(tr, invoke->GetStringInitOffset())); // LR = temp->entry_point_from_quick_compiled_code_; __ Ldr(lr, MemOperand( - temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); + reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); // lr() __ Blr(lr); + } else if (invoke->IsRecursive()) { + __ Bl(&frame_entry_label_); } else { - // temp = method; - LoadCurrentMethod(temp.X()); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ Ldr(temp.W(), MemOperand(temp.X(), - ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache]; - __ Ldr(temp.X(), MemOperand(temp, index_in_cache)); - // lr = temp->entry_point_from_quick_compiled_code_; - __ Ldr(lr, MemOperand(temp.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64WordSize).Int32Value())); - // lr(); - __ Blr(lr); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register reg = XRegisterFrom(temp); + Register method_reg; + if (current_method.IsRegister()) { + method_reg = XRegisterFrom(current_method); } else { - __ Bl(&frame_entry_label_); + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } + + // temp = current_method->dex_cache_resolved_methods_; + __ Ldr(reg.W(), MemOperand(method_reg.X(), + ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache]; + __ Ldr(reg.X(), MemOperand(reg, index_in_cache)); + // lr = temp->entry_point_from_quick_compiled_code_; + __ Ldr(lr, MemOperand(reg.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64WordSize).Int32Value())); + // lr(); + __ Blr(lr); } DCHECK(!IsLeafMethod()); @@ -2294,8 +2296,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir } BlockPoolsScope block_pools(GetVIXLAssembler()); - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); - codegen_->GenerateStaticOrDirectCall(invoke, temp); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2314,14 +2317,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { BlockPoolsScope block_pools(GetVIXLAssembler()); - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex())); - __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); - } else { - DCHECK(receiver.IsRegister()); - __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); - } + DCHECK(receiver.IsRegister()); + __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2523,9 +2520,9 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -2535,9 +2532,6 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); - Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimLong); - DCHECK(current_method.Is(x2)); - codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), @@ -2552,7 +2546,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } @@ -2561,9 +2555,6 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) LocationSummary* locations = instruction->GetLocations(); Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); - Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); - DCHECK(current_method.Is(w1)); - codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), @@ -2674,7 +2665,7 @@ void InstructionCodeGeneratorARM64::VisitParameterValue( void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetOut(LocationFrom(x0)); + locations->SetOut(LocationFrom(kArtMethodRegister)); } void InstructionCodeGeneratorARM64::VisitCurrentMethod( diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index c62ba951cd..f96810ff80 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -147,10 +147,16 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION - void LoadCurrentMethod(XRegister reg); + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } Arm64Assembler* GetAssembler() const { return assembler_; } vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } @@ -190,9 +196,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); @@ -328,7 +342,6 @@ class CodeGeneratorARM64 : public CodeGenerator { Primitive::Type type = Primitive::kPrimVoid); void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); - void LoadCurrentMethod(vixl::Register current_method); void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); @@ -344,7 +357,7 @@ class CodeGeneratorARM64 : public CodeGenerator { return false; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, vixl::Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); private: // Labels for each block that will be compiled. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8a7b52e549..e39a1c2bd5 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -527,11 +527,6 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -void CodeGeneratorX86::LoadCurrentMethod(Register reg) { - DCHECK(RequiresCurrentMethod()); - __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); -} - Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -1235,6 +1230,17 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok } HandleInvoke(invoke); + + if (codegen_->IsBaseline()) { + // Baseline does not have enough registers if the current method also + // needs a register. We therefore do not require a register for it, and let + // the code generation of the invoke handle it. + LocationSummary* locations = invoke->GetLocations(); + Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + } + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -1255,8 +1261,9 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec return; } + LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( - invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>()); + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1276,13 +1283,8 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ movl(temp, Address(ESP, receiver.GetStackIndex())); - __ movl(temp, Address(temp, class_offset)); - } else { - __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); - } + DCHECK(receiver.IsRegister()); + __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); @@ -2961,14 +2963,12 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2981,13 +2981,12 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); @@ -3201,7 +3200,7 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Register temp) { + Location temp) { // TODO: Implement all kinds of calls: // 1) boot -> boot // 2) app -> boot @@ -3211,25 +3210,34 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, if (invoke->IsStringInit()) { // temp = thread->string_init_entrypoint - __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); + Register reg = temp.AsRegister<Register>(); + __ fs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() __ call(Address( - temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + } else if (invoke->IsRecursive()) { + __ call(GetFrameEntryLabel()); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, - CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + + Register method_reg; + Register reg = temp.AsRegister<Register>(); + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ call(GetFrameEntryLabel()); + DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } + // temp = temp->dex_cache_resolved_methods_; + __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(reg, Address(reg, + CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(reg, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 61827a45ab..696d8d549e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -124,10 +124,16 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); @@ -148,10 +154,16 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86Assembler* GetAssembler() const { return assembler_; } private: @@ -263,7 +275,7 @@ class CodeGeneratorX86 : public CodeGenerator { void Move64(Location destination, Location source); // Generate a call to a static or direct method. - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); // Emit a write barrier. void MarkGCCard(Register temp, @@ -272,8 +284,6 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); - void LoadCurrentMethod(Register reg); - Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index a2a3cf523c..bfc827de1c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -360,7 +360,7 @@ inline Condition X86_64Condition(IfCondition cond) { } void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - CpuRegister temp) { + Location temp) { // All registers are assumed to be correctly set up. // TODO: Implement all kinds of calls: @@ -371,26 +371,35 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + CpuRegister reg = temp.AsRegister<CpuRegister>(); // temp = thread->string_init_entrypoint - __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); + __ gs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); + } else if (invoke->IsRecursive()) { + __ call(&frame_entry_label_); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movq(temp, Address( - temp, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); + CpuRegister reg = temp.AsRegister<CpuRegister>(); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register method_reg; + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ call(&frame_entry_label_); - } + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg.AsRegister(); + __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + } + // temp = temp->dex_cache_resolved_methods_; + __ movl(reg, Address(CpuRegister(method_reg), + ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movq(reg, Address( + reg, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); } DCHECK(!IsLeafMethod()); @@ -585,11 +594,6 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { - DCHECK(RequiresCurrentMethod()); - __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); -} - Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -1358,9 +1362,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi return; } + LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( - invoke, - invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1390,12 +1394,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); - __ movl(temp, Address(temp, class_offset)); - } else { - __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); - } + DCHECK(receiver.IsRegister()); + __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); @@ -3020,13 +3020,12 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); __ gs()->call( @@ -3041,14 +3040,13 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(RAX)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c19e686c10..215754cd46 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -134,10 +134,16 @@ class LocationsBuilderX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -158,10 +164,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86_64Assembler* GetAssembler() const { return assembler_; } private: @@ -263,8 +275,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Helper method to move a value between two locations. void Move(Location destination, Location source); - void LoadCurrentMethod(CpuRegister reg); - Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } @@ -277,7 +287,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { return false; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 07d0dd6b49..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -63,7 +63,7 @@ void HInliner::Run() { if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method // works properly. - if (!TryInline(call, call->GetDexMethodIndex(), call->GetOriginalInvokeType())) { + if (!TryInline(call, call->GetDexMethodIndex())) { if (kIsDebugBuild) { std::string callee_name = PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); @@ -160,27 +160,29 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol } } -bool HInliner::TryInline(HInvoke* invoke_instruction, - uint32_t method_index, - InvokeType invoke_type) const { +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t referrer_index) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + } +} + +bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file); - ArtMethod* resolved_method = nullptr; - { - // Don't keep this handle scope on stack, otherwise we cannot do a reference type - // propagation while inlining. - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache( - hs.NewHandle(caller_compilation_unit_.GetClassLinker()->FindDexCache(caller_dex_file))); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(caller_compilation_unit_.GetClassLoader()))); - resolved_method = compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type); - } + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + // We can query the dex cache directly. The verifier has populated it already. + ArtMethod* resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod( + method_index, class_linker->GetImagePointerSize()); if (resolved_method == nullptr) { + // Method cannot be resolved if it is in another dex file we do not have access to. VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file); return false; } @@ -190,7 +192,16 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, if (resolved_method == nullptr) { VLOG(compiler) << "Interface or virtual call to " << PrettyMethod(method_index, caller_dex_file) - << "could not be statically determined"; + << " could not be statically determined"; + return false; + } + // We have found a method, but we need to find where that method is for the caller's + // dex file. + method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(resolved_method) + << " cannot be inlined because unaccessible to caller"; return false; } } @@ -245,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -256,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -292,13 +303,19 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } + InvokeType invoke_type = invoke_instruction->GetOriginalInvokeType(); + if (invoke_type == kInterface) { + // We have statically resolved the dispatch. To please the class linker + // at runtime, we change this call as if it was a virtual call. + invoke_type = kVirtual; + } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), - invoke_instruction->GetOriginalInvokeType(), + invoke_type, graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); @@ -311,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -321,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -368,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -382,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -393,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -407,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index ca713329f5..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -49,10 +49,9 @@ class HInliner : public HOptimization { static constexpr const char* kInlinerPassName = "inliner"; private: - bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const; + bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fcb3471821..98a5841f80 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -186,33 +186,92 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins return false; } -void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (!check_cast->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { - check_cast->ClearMustDoNullCheck(); - } - - if (!load_class->IsResolved()) { +// Returns whether doing a type test between the class of `object` against `klass` has +// a statically known outcome. The result of the test is stored in `outcome`. +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { + if (!klass->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type // and thus producing wrong results. - return; + return false; } - ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (class_rti.IsSupertypeOf(obj_rti)) { + *outcome = true; + return true; + } else if (obj_rti.IsExact()) { + // The test failed at compile time so will also fail at runtime. + *outcome = false; + return true; + } else if (!class_rti.IsInterface() && !obj_rti.IsSupertypeOf(class_rti)) { + // Different type hierarchy. The test will fail. + *outcome = false; + return true; + } + return false; +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* object = check_cast->InputAt(0); + if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + check_cast->ClearMustDoNullCheck(); + } + + if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } + } else { + // Don't do anything for exceptional cases for now. Ideally we should remove + // all instructions and blocks this instruction dominates. + } } } void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { - if (!instruction->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + HInstruction* object = instruction->InputAt(0); + bool can_be_null = true; + if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + can_be_null = false; instruction->ClearMustDoNullCheck(); } + + HGraph* graph = GetGraph(); + if (object->IsNullConstant()) { + instruction->ReplaceWith(graph->GetIntConstant(0)); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome && can_be_null) { + // Type test will succeed, we just need a null test. + HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + instruction->GetBlock()->InsertInstructionBefore(test, instruction); + instruction->ReplaceWith(test); + } else { + // We've statically determined the result of the instanceof. + instruction->ReplaceWith(graph->GetIntConstant(outcome)); + } + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + } } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 5436ec2dd9..749bedf99e 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -101,7 +101,8 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + Location::RegisterLocation(kArtMethodRegister)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d1dc5b3843..c108ad5daa 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -110,7 +110,8 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + LocationFrom(kArtMethodRegister)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 5bbbc72020..424ac7c855 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -138,7 +138,8 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + Location::RegisterLocation(EAX)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; @@ -732,7 +733,8 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); - codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), + Location::RegisterLocation(EAX)); codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index d6c90ff510..891531435e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -129,7 +129,8 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; @@ -609,7 +610,8 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); - codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->GenerateStaticOrDirectCall( + invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 09bbb33042..f41a782fe6 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -481,7 +481,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { bool intrinsified = false); void SetInAt(uint32_t at, Location location) { - DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid()); inputs_.Put(at, location); } @@ -525,6 +524,8 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return temps_.Size(); } + bool HasTemps() const { return !temps_.IsEmpty(); } + Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd91d2c87b..4baa05c80c 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1510,6 +1510,81 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +/* + * Loop will be transformed to: + * old_pre_header + * | + * if_block + * / \ + * dummy_block deopt_block + * \ / + * new_pre_header + * | + * header + */ +void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + + // Need this to avoid critical edge. + HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + // Need this to avoid critical edge. + HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(if_block); + AddBlock(dummy_block); + AddBlock(deopt_block); + AddBlock(new_pre_header); + + header->ReplacePredecessor(pre_header, new_pre_header); + pre_header->successors_.Reset(); + pre_header->dominated_blocks_.Reset(); + + pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(dummy_block); // True successor + if_block->AddSuccessor(deopt_block); // False successor + dummy_block->AddSuccessor(new_pre_header); + deopt_block->AddSuccessor(new_pre_header); + + pre_header->dominated_blocks_.Add(if_block); + if_block->SetDominator(pre_header); + if_block->dominated_blocks_.Add(dummy_block); + dummy_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(deopt_block); + deopt_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(new_pre_header); + new_pre_header->SetDominator(if_block); + new_pre_header->dominated_blocks_.Add(header); + header->SetDominator(new_pre_header); + + size_t index_of_header = 0; + while (reverse_post_order_.Get(index_of_header) != header) { + index_of_header++; + } + MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); + reverse_post_order_.Put(index_of_header++, if_block); + reverse_post_order_.Put(index_of_header++, dummy_block); + reverse_post_order_.Put(index_of_header++, deopt_block); + reverse_post_order_.Put(index_of_header++, new_pre_header); + + HLoopInformation* info = pre_header->GetLoopInformation(); + if (info != nullptr) { + if_block->SetLoopInformation(info); + dummy_block->SetLoopInformation(info); + deopt_block->SetLoopInformation(info); + new_pre_header->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*pre_header); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(if_block); + loop_it.Current()->Add(dummy_block); + loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(new_pre_header); + } + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 47927340f4..7ef69559de 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Need to add a couple of blocks to test if the loop body is entered and + // put deoptimization instructions, etc. + void TransformLoopHeaderForBCE(HBasicBlock* header); + // Removes `block` from the graph. void DeleteDeadBlock(HBasicBlock* block); @@ -824,7 +828,7 @@ class HLoopInformationOutwardIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator); }; -#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -894,6 +898,21 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ @@ -1281,6 +1300,9 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } bool IsTop() const { return is_top_; } + bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return !IsTop() && GetTypeHandle()->IsInterface(); + } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } @@ -2461,7 +2483,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -2528,7 +2550,9 @@ class HInvokeStaticOrDirect : public HInvoke { ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u, + // There is one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit. + clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 2u : 1u, return_type, dex_pc, dex_method_index, @@ -2550,6 +2574,7 @@ class HInvokeStaticOrDirect : public HInvoke { bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } bool IsStringInit() const { return string_init_offset_ != 0; } int32_t GetStringInitOffset() const { return string_init_offset_; } + uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } // Is this instruction a call to a static method? bool IsStatic() const { @@ -2665,9 +2690,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<0> { +class HNewInstance : public HExpression<1> { public: - HNewInstance(uint32_t dex_pc, + HNewInstance(HCurrentMethod* current_method, + uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, QuickEntrypointEnum entrypoint) @@ -2675,7 +2701,9 @@ class HNewInstance : public HExpression<0> { dex_pc_(dex_pc), type_index_(type_index), dex_file_(dex_file), - entrypoint_(entrypoint) {} + entrypoint_(entrypoint) { + SetRawInputAt(0, current_method); + } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } @@ -2718,9 +2746,10 @@ class HNeg : public HUnaryOperation { DISALLOW_COPY_AND_ASSIGN(HNeg); }; -class HNewArray : public HExpression<1> { +class HNewArray : public HExpression<2> { public: HNewArray(HInstruction* length, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -2731,6 +2760,7 @@ class HNewArray : public HExpression<1> { dex_file_(dex_file), entrypoint_(entrypoint) { SetRawInputAt(0, length); + SetRawInputAt(1, current_method); } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } @@ -3573,7 +3603,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -4238,6 +4268,39 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +// Iterator over the blocks that art part of the loop. Includes blocks part +// of an inner loop. The order in which the blocks are iterated is reverse +// post order. +class HBlocksInLoopReversePostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopReversePostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(0) { + if (!blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ == blocks_.Size(); } + HBasicBlock* Current() const { return blocks_.Get(index_); } + void Advance() { + ++index_; + for (size_t e = blocks_.Size(); index_ < e; ++index_) { + if (blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const GrowableArray<HBasicBlock*>& blocks_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); +}; + inline int64_t Int64FromConstant(HConstant* constant) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bf0b9fac0f..303a7cb1fd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,49 +318,55 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier simplify3(graph, stats, "last_instruction_simplifier"); - ReferenceTypePropagation type_propagation2(graph, handles); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "last_instruction_simplifier"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &type_propagation, - &simplify2, - &inliner, + intrinsics, + dce1, + fold1, + simplify1, + type_propagation, + simplify2, + inliner, // Run another type propagation phase: inlining will open up more opprotunities // to remove checkast/instanceof and null checks. - &type_propagation2, + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &simplify3, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b6b1bb1cad..b988813f75 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -19,6 +19,7 @@ #include <sstream> #include <string> +#include <type_traits> #include "atomic.h" @@ -38,7 +39,6 @@ enum MethodCompilationStat { kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledNoCodegen, - kNotCompiledNonSequentialRegPair, kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, @@ -84,14 +84,15 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + << compile_stats_[i]; } } } } private: - std::string PrintMethodCompilationStat(int stat) const { + std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { switch (stat) { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; @@ -106,7 +107,6 @@ class OptimizingCompilerStats { case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; @@ -120,9 +120,12 @@ class OptimizingCompilerStats { case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - default: LOG(FATAL) << "invalid stat"; + + case kLastStat: break; // Invalid to print out. } - return ""; + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4edadef1a4..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,20 +180,21 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, - mirror::Class* klass, - bool is_exact) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); + is_exact = is_exact || klass->IsFinal(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); @@ -188,16 +203,16 @@ void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { +void RTPVisitor::VisitNewArray(HNewArray* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -212,15 +227,15 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -298,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 0a1d4c496e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,26 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a381315bac..e38e49cd19 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -714,13 +714,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { if (defined_by != nullptr && !current->IsSplit()) { LocationSummary* locations = defined_by->GetLocations(); if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { - for (HInputIterator it(defined_by); !it.Done(); it.Advance()) { + for (size_t i = 0, e = defined_by->InputCount(); i < e; ++i) { // Take the last interval of the input. It is the location of that interval // that will be used at `defined_by`. - LiveInterval* interval = it.Current()->GetLiveInterval()->GetLastSibling(); + LiveInterval* interval = defined_by->InputAt(i)->GetLiveInterval()->GetLastSibling(); // Note that interval may have not been processed yet. // TODO: Handle non-split intervals last in the work list. - if (interval->HasRegister() && interval->SameRegisterKind(*current)) { + if (locations->InAt(i).IsValid() + && interval->HasRegister() + && interval->SameRegisterKind(*current)) { // The input must be live until the end of `defined_by`, to comply to // the linear scan algorithm. So we use `defined_by`'s end lifetime // position to check whether the input is dead or is inactive after diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index d5f977feec..701dbb019b 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -242,7 +242,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { HInstruction* input = current->InputAt(i); // Some instructions 'inline' their inputs, that is they do not need // to be materialized. - if (input->HasSsaIndex()) { + if (input->HasSsaIndex() && current->GetLocations()->InAt(i).IsValid()) { live_in->SetBit(input->GetSsaIndex()); input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 4667825a62..220ee6a8d0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -394,7 +394,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { first_range_->start_ = from; } else { // Instruction without uses. - DCHECK(!defined_by_->HasNonEnvironmentUses()); + DCHECK(first_use_ == nullptr); DCHECK(from == defined_by_->GetLifetimePosition()); first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(from, from + 2, nullptr); |