diff options
166 files changed, 4851 insertions, 1729 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index b84154b307..ace6a73654 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -68,6 +68,9 @@ art_default_gc_type_cflags := -DART_DEFAULT_GC_TYPE_IS_$(ART_DEFAULT_GC_TYPE) ART_HOST_CFLAGS := ART_TARGET_CFLAGS := +ART_HOST_ASFLAGS := +ART_TARGET_ASFLAGS := + # Clang build support. # Host. @@ -199,6 +202,9 @@ art_cflags := \ -fvisibility=protected \ $(art_default_gc_type_cflags) +# Base set of asflags used by all things ART. +art_asflags := + # Missing declarations: too many at the moment, as we use "extern" quite a bit. # -Wmissing-declarations \ @@ -217,10 +223,12 @@ endif ifeq ($(ART_HEAP_POISONING),true) art_cflags += -DART_HEAP_POISONING=1 + art_asflags += -DART_HEAP_POISONING=1 endif ifeq ($(ART_USE_READ_BARRIER),true) art_cflags += -DART_USE_READ_BARRIER=1 + art_asflags += -DART_USE_READ_BARRIER=1 endif ifeq ($(ART_USE_TLAB),true) @@ -258,11 +266,13 @@ ifndef LIBART_IMG_HOST_BASE_ADDRESS endif ART_HOST_CFLAGS += $(art_cflags) -DART_BASE_ADDRESS=$(LIBART_IMG_HOST_BASE_ADDRESS) ART_HOST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=default +ART_HOST_ASFLAGS += $(art_asflags) ifndef LIBART_IMG_TARGET_BASE_ADDRESS $(error LIBART_IMG_TARGET_BASE_ADDRESS unset) endif ART_TARGET_CFLAGS += $(art_cflags) -DART_TARGET -DART_BASE_ADDRESS=$(LIBART_IMG_TARGET_BASE_ADDRESS) +ART_TARGET_ASFLAGS += $(art_asflags) ART_HOST_NON_DEBUG_CFLAGS := $(art_host_non_debug_cflags) ART_TARGET_NON_DEBUG_CFLAGS := $(art_target_non_debug_cflags) @@ -292,6 +302,7 @@ ART_TARGET_CFLAGS += -DART_BASE_ADDRESS_MAX_DELTA=$(LIBART_IMG_TARGET_MAX_BASE_A # Clear locals now they've served their purpose. art_cflags := +art_asflags := art_debug_cflags := art_non_debug_cflags := art_host_non_debug_cflags := @@ -311,6 +322,7 @@ ART_TARGET_LDFLAGS := define set-target-local-cflags-vars LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) LOCAL_CFLAGS_x86 += $(ART_TARGET_CFLAGS_x86) + LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS) LOCAL_LDFLAGS += $(ART_TARGET_LDFLAGS) art_target_cflags_ndebug_or_debug := $(1) ifeq ($$(art_target_cflags_ndebug_or_debug),debug) diff --git a/build/Android.executable.mk b/build/Android.executable.mk index dfea6e191e..7b036825e7 100644 --- a/build/Android.executable.mk +++ b/build/Android.executable.mk @@ -70,13 +70,14 @@ define build-art-executable endif ifeq ($$(art_target_or_host),target) - $(call set-target-local-clang-vars) - $(call set-target-local-cflags-vars,$(6)) + $(call set-target-local-clang-vars) + $(call set-target-local-cflags-vars,$(6)) LOCAL_SHARED_LIBRARIES += libdl else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_LDLIBS := $(ART_HOST_LDLIBS) LOCAL_CFLAGS += $(ART_HOST_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) ifeq ($$(art_ndebug_or_debug),debug) LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) else diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 5052187794..4fc184ecd9 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -291,6 +291,7 @@ LOCAL_MODULE := libart-gtest LOCAL_MODULE_TAGS := optional LOCAL_CPP_EXTENSION := cc LOCAL_CFLAGS := $(ART_HOST_CFLAGS) +LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/compiler LOCAL_SHARED_LIBRARIES := libartd libartd-compiler @@ -489,6 +490,7 @@ test-art-target-gtest-$$(art_gtest_name): $$(ART_TEST_TARGET_GTEST_$$(art_gtest_ else # host LOCAL_CLANG := $$(ART_HOST_CLANG) LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) $$(ART_HOST_DEBUG_CFLAGS) + LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) LOCAL_SHARED_LIBRARIES += libicuuc-host libicui18n-host libnativehelper libziparchive-host libz-host libvixld LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -lpthread -ldl LOCAL_IS_HOST_MODULE := true diff --git a/build/Android.oat.mk b/build/Android.oat.mk index 710b130282..728469c2c4 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -113,7 +113,7 @@ $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency) --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \ --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \ --instruction-set-features=$$($(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES) \ - --host --android-root=$$(HOST_OUT) --include-patch-information \ + --host --android-root=$$(HOST_OUT) --include-patch-information --generate-debug-info \ $$(PRIVATE_CORE_COMPILE_OPTIONS) $$(core_oat_name): $$(core_image_name) @@ -232,7 +232,7 @@ $$(core_image_name): $$(TARGET_CORE_DEX_FILES) $$(core_dex2oat_dependency) --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \ --instruction-set-variant=$$($(3)DEX2OAT_TARGET_CPU_VARIANT) \ --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \ - --android-root=$$(PRODUCT_OUT)/system --include-patch-information \ + --android-root=$$(PRODUCT_OUT)/system --include-patch-information --generate-debug-info \ $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1) $$(core_oat_name): $$(core_image_name) diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc index 1386439f72..6192be75a4 100644 --- a/cmdline/cmdline_parser_test.cc +++ b/cmdline/cmdline_parser_test.cc @@ -262,6 +262,13 @@ TEST_F(CmdlineParserTest, TestLogVerbosity) { EXPECT_SINGLE_PARSE_FAIL("-verbose:blablabla", CmdlineResult::kUsage); // invalid verbose opt { + const char* log_args = "-verbose:deopt"; + LogVerbosity log_verbosity = LogVerbosity(); + log_verbosity.deopt = true; + EXPECT_SINGLE_PARSE_VALUE(log_verbosity, log_args, M::Verbose); + } + + { const char* log_args = "-verbose:oat"; LogVerbosity log_verbosity = LogVerbosity(); log_verbosity.oat = true; diff --git a/compiler/Android.mk b/compiler/Android.mk index 3f5271d31f..67536f00b9 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -234,6 +234,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS += $(ART_HOST_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) LOCAL_LDLIBS := $(ART_HOST_LDLIBS) ifeq ($$(art_ndebug_or_debug),debug) LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) diff --git a/compiler/dex/dataflow_iterator-inl.h b/compiler/dex/dataflow_iterator-inl.h index 83dfc28844..e9402e39e5 100644 --- a/compiler/dex/dataflow_iterator-inl.h +++ b/compiler/dex/dataflow_iterator-inl.h @@ -181,10 +181,16 @@ inline BasicBlock* LoopRepeatingTopologicalSortIterator::Next(bool had_change) { idx_ += 1; BasicBlock* bb = mir_graph_->GetBasicBlock((*block_id_list_)[idx]); DCHECK(bb != nullptr); + if ((*loop_ends_)[idx] != 0u) { + // If bb->visited is false, the loop needs to be processed from scratch. + // Otherwise we mark it as recalculating; for a natural loop we will not + // need to recalculate any block in the loop anyway, and for unnatural + // loops we will recalculate the loop head only if one of its predecessors + // actually changes. + bool recalculating = bb->visited; + loop_head_stack_->push_back(std::make_pair(idx, recalculating)); + } if (!bb->visited) { - if ((*loop_ends_)[idx] != 0u) { - loop_head_stack_->push_back(std::make_pair(idx, false)); // Not recalculating. - } return bb; } } diff --git a/compiler/dex/mir_graph_test.cc b/compiler/dex/mir_graph_test.cc index b3ad0407e2..49b7511b42 100644 --- a/compiler/dex/mir_graph_test.cc +++ b/compiler/dex/mir_graph_test.cc @@ -15,6 +15,7 @@ */ #include "compiler_ir.h" +#include "dataflow_iterator-inl.h" #include "mir_graph.h" #include "gtest/gtest.h" @@ -374,4 +375,72 @@ TEST_F(TopologicalSortOrderTest, LoopWithTwoEntryPoints) { CheckLoopEnds(loop_ends); } +TEST_F(TopologicalSortOrderTest, UnnaturalLoops) { + const BBDef bbs[] = { + DEF_BB(kNullBlock, DEF_SUCC0(), DEF_PRED0()), + DEF_BB(kEntryBlock, DEF_SUCC1(3), DEF_PRED0()), + DEF_BB(kExitBlock, DEF_SUCC0(), DEF_PRED1(10)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(5, 4), DEF_PRED1(1)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(5), DEF_PRED2(11, 3)), // Unnatural loop head (top-level). + DEF_BB(kDalvikByteCode, DEF_SUCC1(6), DEF_PRED2(3, 4)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(9, 7), DEF_PRED1(5)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(8), DEF_PRED1(6)), + DEF_BB(kDalvikByteCode, DEF_SUCC1(9), DEF_PRED2(10, 7)), // Unnatural loop head (nested). + DEF_BB(kDalvikByteCode, DEF_SUCC1(10), DEF_PRED2(6, 8)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(8, 11), DEF_PRED1(9)), + DEF_BB(kDalvikByteCode, DEF_SUCC2(4, 2), DEF_PRED1(10)), + }; + const BasicBlockId expected_order[] = { + 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 2 + }; + const uint16_t loop_ends[] = { + 0, 0, 10, 0, 0, 0, 9, 0, 0, 0, 0, + }; + + PrepareBasicBlocks(bbs); + ComputeTopologicalSortOrder(); + CheckOrder(expected_order); + CheckLoopEnds(loop_ends); + + const std::pair<BasicBlockId, bool> expected_and_change[] = { + { 1, false }, + { 3, false }, + { 4, true }, // Initial run of the outer loop. + { 5, true }, + { 6, true }, + { 7, true }, + { 8, true }, // Initial run of the inner loop. + { 9, true }, + { 10, true }, + { 8, true }, // Recalculation of the inner loop - changed. + { 9, true }, + { 10, true }, + { 8, false }, // Recalculation of the inner loop - unchanged. + { 11, true }, + { 4, true }, // Recalculation of the outer loop - changed. + { 5, true }, + { 6, true }, + { 7, false }, // No change: skip inner loop head because inputs are unchanged. + { 9, true }, + { 10, true }, + { 8, true }, // Recalculation of the inner loop - changed. + { 9, true }, + { 10, true }, + { 8, false }, // Recalculation of the inner loop - unchanged. + { 11, true }, + { 4, false }, // Recalculation of the outer loop - unchanged. + { 2, false }, + }; + size_t pos = 0; + LoopRepeatingTopologicalSortIterator iter(cu_.mir_graph.get()); + bool change = false; + for (BasicBlock* bb = iter.Next(change); bb != nullptr; bb = iter.Next(change)) { + ASSERT_NE(arraysize(expected_and_change), pos); + ASSERT_EQ(expected_and_change[pos].first, bb->id) << pos; + change = expected_and_change[pos].second; + ++pos; + } + ASSERT_EQ(arraysize(expected_and_change), pos); +} + } // namespace art diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 7b1ec398d0..645511ed9f 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -1790,7 +1790,8 @@ bool MIRGraph::EliminateSuspendChecks(BasicBlock* bb) { pred_mask_union |= pred_mask; } } - DCHECK_EQ(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u), + // DCHECK_EQ() may not hold for unnatural loop heads, so use DCHECK_GE(). + DCHECK_GE(((1u << (IsLoopHead(bb->id) ? bb->nesting_depth - 1u: bb->nesting_depth)) - 1u), pred_mask_union); suspend_checks_in_loops &= pred_mask_union; } diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc index ac7a4a7758..6d485986f6 100644 --- a/compiler/dex/verified_method.cc +++ b/compiler/dex/verified_method.cc @@ -98,7 +98,7 @@ bool VerifiedMethod::GenerateGcMap(verifier::MethodVerifier* method_verifier) { } size_t ref_bitmap_bytes = RoundUp(ref_bitmap_bits, kBitsPerByte) / kBitsPerByte; // There are 2 bytes to encode the number of entries. - if (num_entries >= 65536) { + if (num_entries > std::numeric_limits<uint16_t>::max()) { LOG(WARNING) << "Cannot encode GC map for method with " << num_entries << " entries: " << PrettyMethod(method_verifier->GetMethodReference().dex_method_index, *method_verifier->GetMethodReference().dex_file); diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index 242e3dfe6e..07f9a9bd9f 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -120,7 +120,7 @@ class VerifiedMethod { DequickenMap dequicken_map_; SafeCastSet safe_cast_set_; - bool has_verification_failures_; + bool has_verification_failures_ = false; // Copy of mapping generated by verifier of dex PCs of string init invocations // to the set of other registers that the receiver has been copied into. diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 4971f0ef10..4d423d007f 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -26,11 +26,11 @@ namespace art { namespace dwarf { -constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; - // Run the tests only on host since we need objdump. #ifndef HAVE_ANDROID_OS +constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; + TEST_F(DwarfTest, DebugFrame) { const bool is64bit = false; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 32bde8e3b4..73e121f1cd 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -110,10 +110,6 @@ bool ImageWriter::PrepareImageAddressSpace() { CheckNoDexObjects(); } - if (!AllocMemory()) { - return false; - } - if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); CheckNonImageClassesRemoved(); @@ -123,6 +119,12 @@ bool ImageWriter::PrepareImageAddressSpace() { CalculateNewObjectOffsets(); Thread::Current()->TransitionFromRunnableToSuspended(kNative); + // This needs to happen after CalculateNewObjectOffsets since it relies on intern_table_bytes_ and + // bin size sums being calculated. + if (!AllocMemory()) { + return false; + } + return true; } @@ -205,7 +207,7 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image bitmap at the page aligned start of the image end. - const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); CHECK_ALIGNED(bitmap_section.Offset(), kPageSize); if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), bitmap_section.Size(), bitmap_section.Offset())) { @@ -222,26 +224,10 @@ bool ImageWriter::Write(const std::string& image_filename, return true; } -void ImageWriter::SetImageOffset(mirror::Object* object, - ImageWriter::BinSlot bin_slot, - size_t offset) { +void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) { DCHECK(object != nullptr); DCHECK_NE(offset, 0U); - mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset); - DCHECK_ALIGNED(obj, kObjectAlignment); - static size_t max_offset = 0; - max_offset = std::max(max_offset, offset); - image_bitmap_->Set(obj); // Mark the obj as mutated, since we will end up changing it. - { - // Remember the object-inside-of-the-image's hash code so we can restore it after the copy. - auto hash_it = saved_hashes_map_.find(bin_slot); - if (hash_it != saved_hashes_map_.end()) { - std::pair<BinSlot, uint32_t> slot_hash = *hash_it; - saved_hashes_.push_back(std::make_pair(obj, slot_hash.second)); - saved_hashes_map_.erase(hash_it); - } - } // The object is already deflated from when we set the bin slot. Just overwrite the lock word. object->SetLockWord(LockWord::FromForwardingAddress(offset), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); @@ -262,7 +248,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); - SetImageOffset(object, bin_slot, new_offset); + SetImageOffset(object, new_offset); DCHECK_LT(new_offset, image_end_); } @@ -302,14 +288,14 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { // No hash, don't need to save it. break; case LockWord::kHashCode: - saved_hashes_map_[bin_slot] = lw.GetHashCode(); + DCHECK(saved_hashcode_map_.find(object) == saved_hashcode_map_.end()); + saved_hashcode_map_.emplace(object, lw.GetHashCode()); break; default: LOG(FATAL) << "Unreachable."; UNREACHABLE(); } - object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)), - false); + object->SetLockWord(LockWord::FromForwardingAddress(bin_slot.Uint32Value()), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); DCHECK(IsImageBinSlotAssigned(object)); } @@ -487,11 +473,8 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { ++bin_slot_count_[bin]; - DCHECK_LT(GetBinSizeSum(), image_->Size()); - // Grow the image closer to the end by the object we just assigned. image_end_ += offset_delta; - DCHECK_LT(image_end_, image_->Size()); } bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const { @@ -535,10 +518,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const } bool ImageWriter::AllocMemory() { - auto* runtime = Runtime::Current(); - const size_t heap_size = runtime->GetHeap()->GetTotalMemory(); - // Add linear alloc usage since we need to have room for the ArtFields. - const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize); + const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_, + kPageSize); std::string error_msg; image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE, false, false, &error_msg)); @@ -547,9 +528,10 @@ bool ImageWriter::AllocMemory() { return false; } - // Create the image bitmap. - image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(), - RoundUp(length, kPageSize))); + // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. + CHECK_LE(image_end_, length); + image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( + "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize))); if (image_bitmap_.get() == nullptr) { LOG(ERROR) << "Failed to allocate memory for image bitmap"; return false; @@ -569,42 +551,6 @@ bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) { return true; } -// Collect all the java.lang.String in the heap and put them in the output strings_ array. -class StringCollector { - public: - StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index) - : strings_(strings), index_(index) { - } - static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto* collector = reinterpret_cast<StringCollector*>(arg); - if (obj->GetClass()->IsStringClass()) { - collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString()); - } - } - size_t GetIndex() const { - return index_; - } - - private: - Handle<mirror::ObjectArray<mirror::String>> strings_; - size_t index_; -}; - -// Compare strings based on length, used for sorting strings by length / reverse length. -class LexicographicalStringComparator { - public: - bool operator()(const mirror::HeapReference<mirror::String>& lhs, - const mirror::HeapReference<mirror::String>& rhs) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - mirror::String* lhs_s = lhs.AsMirrorPtr(); - mirror::String* rhs_s = rhs.AsMirrorPtr(); - uint16_t* lhs_begin = lhs_s->GetValue(); - uint16_t* rhs_begin = rhs_s->GetValue(); - return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(), - rhs_begin, rhs_begin + rhs_s->GetLength()); - } -}; - void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) { if (!obj->GetClass()->IsStringClass()) { return; @@ -769,7 +715,8 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = obj->AsString()->Intern(); + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( + obj->AsString()->Intern()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early @@ -965,7 +912,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // know where image_roots is going to end up image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment - DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; // Prepare bin slots for dex cache arrays. PrepareDexCacheArraySlots(); @@ -997,7 +943,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); @@ -1010,6 +955,11 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_slot_previous_sizes_[native_reloc.bin_type]; } + // Calculate how big the intern table will be after being serialized. + auto* const intern_table = Runtime::Current()->GetInternTable(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Note that image_end_ is left at end of used mirror object section. } @@ -1039,6 +989,10 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); + // Calculate the size of the interned strings. + auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); + cur_pos = interned_strings_section->End(); // Finally bitmap section. const size_t bitmap_bytes = image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; @@ -1046,16 +1000,19 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { cur_pos = bitmap_section->End(); if (kIsDebugBuild) { size_t idx = 0; - for (auto& section : sections) { + for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; ++idx; } LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_; } + const size_t image_end = static_cast<uint32_t>(interned_strings_section->End()); + CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) << + "Oat file should be right after the image."; // Create the header. new (image_->Begin()) ImageHeader( - PointerToLowMemUInt32(image_begin_), static_cast<uint32_t>(methods_section->End()), sections, - image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), + PointerToLowMemUInt32(image_begin_), image_end, + sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_), PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_, compile_pic_); @@ -1068,6 +1025,37 @@ ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = ImageAddress(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + } + } + + private: + ImageWriter* const image_writer_; + + mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const size_t offset = image_writer_->GetImageOffset(obj); + auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); + VLOG(compiler) << "Update root from " << obj << " to " << dest; + return dest; + } +}; + void ImageWriter::CopyAndFixupNativeData() { // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_reloc_) { @@ -1088,7 +1076,7 @@ void ImageWriter::CopyAndFixupNativeData() { } // Fixup the image method roots. auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - const auto& methods_section = image_header->GetMethodsSection(); + const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { auto* m = image_methods_[i]; CHECK(m != nullptr); @@ -1101,18 +1089,35 @@ void ImageWriter::CopyAndFixupNativeData() { auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); } + // Write the intern table into the image. + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = Runtime::Current()->GetInternTable(); + uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(memory_ptr); + CHECK_EQ(temp_table.Size(), intern_table->Size()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); + CHECK_EQ(intern_table_bytes, intern_table_bytes_); } void ImageWriter::CopyAndFixupObjects() { gc::Heap* heap = Runtime::Current()->GetHeap(); heap->VisitObjects(CopyAndFixupObjectsCallback, this); // Fix up the object previously had hash codes. - for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) { + for (const auto& hash_pair : saved_hashcode_map_) { Object* obj = hash_pair.first; DCHECK_EQ(obj->GetLockWord<kVerifyNone>(false).ReadBarrierState(), 0U); obj->SetLockWord<kVerifyNone>(LockWord::FromHashCode(hash_pair.second, 0U), false); } - saved_hashes_.clear(); + saved_hashcode_map_.clear(); } void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { @@ -1155,18 +1160,22 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } void ImageWriter::CopyAndFixupObject(Object* obj) { - // see GetLocalAddress for similar computation size_t offset = GetImageOffset(obj); auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); - const uint8_t* src = reinterpret_cast<const uint8_t*>(obj); + DCHECK_LT(offset, image_end_); + const auto* src = reinterpret_cast<const uint8_t*>(obj); + + image_bitmap_->Set(dst); // Mark the obj as live. - size_t n = obj->SizeOf(); + const size_t n = obj->SizeOf(); DCHECK_LE(offset + n, image_->Size()); memcpy(dst, src, n); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor // word. - dst->SetLockWord(LockWord::Default(), false); + const auto it = saved_hashcode_map_.find(obj); + dst->SetLockWord(it != saved_hashcode_map_.end() ? + LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false); FixupObject(obj, dst); } @@ -1176,7 +1185,7 @@ class FixupVisitor { FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) { } - void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const + void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset); // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the @@ -1186,7 +1195,7 @@ class FixupVisitor { } // java.lang.ref.Reference visitor. - void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const + void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( @@ -1490,4 +1499,11 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } +uint8_t* ImageWriter::GetOatFileBegin() const { + DCHECK_GT(intern_table_bytes_, 0u); + return image_begin_ + RoundUp( + image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + + bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize); +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index a35d6ad9c9..9d45ce2bd4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -54,7 +54,7 @@ class ImageWriter FINAL { quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), - dirty_methods_(0u), clean_methods_(0u) { + intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) { CHECK_NE(image_begin, 0U); std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); } @@ -84,11 +84,7 @@ class ImageWriter FINAL { image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); } - uint8_t* GetOatFileBegin() const { - return image_begin_ + RoundUp( - image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + - bin_slot_sizes_[kBinArtMethodClean], kPageSize); - } + uint8_t* GetOatFileBegin() const; bool Write(const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) @@ -158,7 +154,7 @@ class ImageWriter FINAL { // The offset in bytes from the beginning of the bin. Aligned to object size. uint32_t GetIndex() const; // Pack into a single uint32_t, for storing into a lock word. - explicit operator uint32_t() const { return lockword_; } + uint32_t Uint32Value() const { return lockword_; } // Comparison operator for map support bool operator<(const BinSlot& other) const { return lockword_ < other.lockword_; } @@ -170,7 +166,7 @@ class ImageWriter FINAL { // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset) + void SetImageOffset(mirror::Object* object, size_t offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsImageOffsetAssigned(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -330,11 +326,9 @@ class ImageWriter FINAL { // The start offsets of the dex cache arrays. SafeMap<const DexFile*, size_t> dex_cache_array_starts_; - // Saved hashes (objects are inside of the image so that they don't move). - std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; - - // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address). - std::map<BinSlot, uint32_t> saved_hashes_map_; + // Saved hash codes. We use these to restore lockwords which were temporarily used to have + // forwarding addresses as well as copying over hash codes. + std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_; // Beginning target oat address for the pointers from the output image to its oat file. const uint8_t* oat_data_begin_; @@ -360,6 +354,9 @@ class ImageWriter FINAL { size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_; + // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. @@ -376,8 +373,9 @@ class ImageWriter FINAL { uint64_t dirty_methods_; uint64_t clean_methods_; - friend class FixupVisitor; friend class FixupClassVisitor; + friend class FixupRootVisitor; + friend class FixupVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); }; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index b2b54965b5..9d5e3fdca1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -126,11 +126,14 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { - // Null check on the NewArray should have been eliminated by instruction - // simplifier already. - if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return instruction->InputAt(0)->AsNewArray(); + static HInstruction* FromArrayLengthToArray(HInstruction* instruction) { + DCHECK(instruction->IsArrayLength() || instruction->IsNewArray()); + if (instruction->IsArrayLength()) { + HInstruction* input = instruction->InputAt(0); + if (input->IsNullCheck()) { + input = input->AsNullCheck()->InputAt(0); + } + return input; } return instruction; } @@ -146,8 +149,9 @@ class ValueBound : public ValueObject { // Some bounds are created with HNewArray* as the instruction instead // of HArrayLength*. They are treated the same. - instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); - instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + // HArrayLength with the same array input are considered equal also. + instruction1 = FromArrayLengthToArray(instruction1); + instruction2 = FromArrayLengthToArray(instruction2); return instruction1 == instruction2; } @@ -271,7 +275,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // Loop header of loop_info. Exiting loop is normal. return false; } - const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); for (size_t i = 0; i < successors.Size(); i++) { if (!loop_info->Contains(*successors.Get(i))) { // One of the successors exits the loop. @@ -293,8 +297,14 @@ class ArrayAccessInsideLoopFinder : public ValueObject { void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - HBasicBlock* block = it_loop.Current(); + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + HBasicBlock* block = it_loop.Current(); + DCHECK(block == induction_variable_->GetBlock()); + // Skip loop header. Since narrowed value range of a MonotonicValueRange only + // applies to the loop body (after the test at the end of the loop header). + it_loop.Advance(); + for (; !it_loop.Done(); it_loop.Advance()) { + block = it_loop.Current(); DCHECK(block->IsInLoop()); if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure @@ -308,30 +318,35 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // that the loop will loop through the full monotonic value range from // initial_ to end_. So adding deoptimization might be too aggressive and can // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. Otherwise, the loop induction variable is going to cover the full - // monotonic value range from initial_ to end_, and deoptimizations are added - // iff the loop will throw AIOOBE. + // AIOOBE. found_array_length_ = nullptr; return; } for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr; instruction = instruction->GetNext()) { - if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + if (!instruction->IsBoundsCheck()) { continue; } - HInstruction* index = instruction->InputAt(1); - if (!index->IsBoundsCheck()) { + + HInstruction* length_value = instruction->InputAt(1); + if (length_value->IsIntConstant()) { + // TODO: may optimize for constant case. continue; } - HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); - if (array_length == nullptr) { - DCHECK(index->InputAt(1)->IsIntConstant()); - // TODO: may optimize for constant case. + DCHECK(!length_value->IsPhi()); + if (length_value->IsPhi()) { + // Outer loop shouldn't collect bounds checks inside inner + // loop because the inner loop body doen't dominate + // outer loop's back edges. However just to be on the safe side, + // if there are any such cases, we just skip over them. continue; } + DCHECK(length_value->IsArrayLength()); + HArrayLength* array_length = length_value->AsArrayLength(); + HInstruction* array = array_length->InputAt(0); if (array->IsNullCheck()) { array = array->AsNullCheck()->InputAt(0); @@ -347,7 +362,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { continue; } - index = index->AsBoundsCheck()->InputAt(0); + HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); HInstruction* left = index; int32_t right = 0; if (left == induction_variable_ || @@ -375,7 +390,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // The instruction that corresponds to a MonotonicValueRange. HInstruction* induction_variable_; - // The array length of the array that's accessed inside the loop. + // The array length of the array that's accessed inside the loop body. HArrayLength* found_array_length_; // The lowest and highest constant offsets relative to induction variable @@ -411,6 +426,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } + bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { if (other_range == nullptr) { @@ -495,13 +512,30 @@ class MonotonicValueRange : public ValueRange { ValueBound GetBound() const { return bound_; } void SetEnd(HInstruction* end) { end_ = end; } void SetInclusive(bool inclusive) { inclusive_ = inclusive; } - HBasicBlock* GetLoopHead() const { + HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + HBasicBlock* GetLoopHeaderSuccesorInLoop() { + HBasicBlock* header = GetLoopHeader(); + HInstruction* instruction = header->GetLastInstruction(); + DCHECK(instruction->IsIf()); + HIf* h_if = instruction->AsIf(); + HLoopInformation* loop_info = header->GetLoopInformation(); + bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); + bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); + + // Just in case it's some strange loop structure. + if (true_successor_in_loop && false_successor_in_loop) { + return nullptr; + } + DCHECK(true_successor_in_loop || false_successor_in_loop); + return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); + } + // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -593,12 +627,114 @@ class MonotonicValueRange : public ValueRange { } } + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + // For example, this loop: + // + // for (int i = start; i < end; i++) { + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // will be transformed to: + // + // int array_length_in_loop_body_if_needed; + // if (start >= end) { + // array_length_in_loop_body_if_needed = 0; + // } else { + // if (start < 1) deoptimize(); + // if (array == null) deoptimize(); + // array_length = array.length; + // if (end > array_length - 1) deoptimize; + // array_length_in_loop_body_if_needed = array_length; + // } + // for (int i = start; i < end; i++) { + // // No more null check and bounds check. + // // array.length value is replaced with array_length_in_loop_body_if_needed + // // in the loop body. + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // We basically first go through the loop body and find those array accesses whose + // index is at a constant offset from the induction variable ('i' in the above example), + // and update offset_low and offset_high along the way. We then add the following + // deoptimizations in the loop pre-header (suppose end is not inclusive). + // if (start < -offset_low) deoptimize(); + // if (end >= array.length - offset_high) deoptimize(); + // It might be necessary to first hoist array.length (and the null check on it) out of + // the loop with another deoptimization. + // + // In order not to trigger deoptimization unnecessarily, we want to make a strong + // guarantee that no deoptimization is triggered if the loop body itself doesn't + // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop + // body must throw AIOOBE). + // This is achieved by the following: + // 1) We only process loops that iterate through the full monotonic range from + // initial_ to end_. We do the following checks to make sure that's the case: + // a) The loop doesn't have early exit (via break, return, etc.) + // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. + // 2) We only collect array accesses of blocks in the loop body that dominate + // all loop back edges, these array accesses are guaranteed to happen + // at each loop iteration. + // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses + // when the induction variable is at initial_ and end_ must be in a legal range. + // Since the added deoptimizations are basically checking the induction variable + // at initial_ and end_ values, no deoptimization will be triggered either. + // + // A special case is the loop body isn't entered at all. In that case, we may still + // add deoptimization due to the analysis described above. In order not to trigger + // deoptimization, we do a test between initial_ and end_ first and skip over + // the added deoptimization. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // In order not to trigger deoptimization unnecessarily, we want to + // make sure the loop iterates through the full range from initial_ to + // end_ so that boundaries are covered by the loop. An increment of 2, + // for example, may skip end_. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't add a check in loop pre-header if the value isn't available there. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + // Returns true if adding a (constant >= value) check for deoptimization // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, - int32_t constant, - bool* is_proven) { + bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + // See if we can prove the relationship first. if (value->IsIntConstant()) { if (value->AsIntConstant()->GetValue() >= constant) { @@ -615,22 +751,118 @@ class MonotonicValueRange : public ValueRange { return true; } + // Try to filter out cases that the loop entry test will never be true. + bool LoopEntryTestUseful() { + if (initial_->IsIntConstant() && end_->IsIntConstant()) { + int32_t initial_val = initial_->AsIntConstant()->GetValue(); + int32_t end_val = end_->AsIntConstant()->GetValue(); + if (increment_ == 1) { + if (inclusive_) { + return initial_val > end_val; + } else { + return initial_val >= end_val; + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + return initial_val < end_val; + } else { + return initial_val <= end_val; + } + } + } + return true; + } + + // Returns the block for adding deoptimization. + HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + // Deoptimization is only added when both initial_ and end_ are defined + // before the loop. + DCHECK(initial_->GetBlock()->Dominates(pre_header)); + DCHECK(end_->GetBlock()->Dominates(pre_header)); + + // If it can be proven the loop body is definitely entered (unless exception + // is thrown in the loop header for which triggering deoptimization is fine), + // there is no need for tranforming the loop. In that case, deoptimization + // will just be added in the loop pre-header. + if (!LoopEntryTestUseful()) { + return pre_header; + } + + HGraph* graph = header->GetGraph(); + graph->TransformLoopHeaderForBCE(header); + HBasicBlock* new_pre_header = header->GetDominator(); + DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); + HBasicBlock* if_block = new_pre_header->GetDominator(); + HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0); // True successor. + HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1); // False successor. + + dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); + deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); + new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); + return deopt_block; + } + + // Adds a test between initial_ and end_ to see if the loop body is entered. + // If the loop body isn't entered at all, it jumps to the loop pre-header (after + // transformation) to avoid any deoptimization. + void AddLoopBodyEntryTest() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + HBasicBlock* if_block = pre_header->GetDominator(); + HGraph* graph = header->GetGraph(); + + HCondition* cond; + if (increment_ == 1) { + if (inclusive_) { + cond = new (graph->GetArena()) HGreaterThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + cond = new (graph->GetArena()) HLessThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); + } + } + HIf* h_if = new (graph->GetArena()) HIf(cond); + if_block->AddInstruction(cond); + if_block->AddInstruction(h_if); + } + // Adds a check that (value >= constant), and HDeoptimize otherwise. void AddDeoptimizationConstant(HInstruction* value, - int32_t constant) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t constant, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + if (loop_entry_test_block_added) { + DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1)); + } + HIntConstant* const_instr = graph->GetIntConstant(constant); HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); HDeoptimize* deoptimize = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } // Returns true if adding a (value <= array_length + offset) check for deoptimization @@ -640,6 +872,26 @@ class MonotonicValueRange : public ValueRange { int32_t offset, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + + if (array_length->GetBlock() == header) { + // array_length_in_loop_body_if_needed only has correct value when the loop + // body is entered. We bail out in this case. Usually array_length defined + // in the loop header is already hoisted by licm. + return false; + } else { + // array_length is defined either before the loop header already, or in + // the loop body since it's used in the loop body. If it's defined in the loop body, + // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, + // all the uses of array_length must be dominated by its definition in the loop + // body. array_length_in_loop_body_if_needed is guaranteed to be the same as + // array_length once the loop body is entered so all the uses of the phi will + // use the correct value. + } + if (offset > 0) { // There might be overflow issue. // TODO: handle this, possibly with some distance relationship between @@ -667,56 +919,99 @@ class MonotonicValueRange : public ValueRange { // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. void AddDeoptimizationArrayLength(HInstruction* value, HArrayLength* array_length, - int32_t offset) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t offset, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(pre_header)) { + if (!array_length->GetBlock()->Dominates(deopt_block)) { + // array_length must be defined in the loop body. + DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); + DCHECK(array_length->GetBlock() != header); + HInstruction* array = array_length->InputAt(0); HNullCheck* null_check = array->AsNullCheck(); if (null_check != nullptr) { array = null_check->InputAt(0); } - // We've already made sure array is defined before the loop when collecting + // We've already made sure the array is defined before the loop when collecting // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(pre_header)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + DCHECK(array->GetBlock()->Dominates(deopt_block)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { // Hoist null check out of loop with a deoptimization. HNullConstant* null_constant = graph->GetNullConstant(); HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); // TODO: for one dex_pc, share the same deoptimization slow path. HDeoptimize* null_check_deoptimize = new (graph->GetArena()) HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore( - null_check_deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_deoptimize, deopt_block->GetLastInstruction()); // Eliminate null check in the loop. null_check->ReplaceWith(array); null_check->GetBlock()->RemoveInstruction(null_check); null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } - // Hoist array_length out of loop. - array_length->MoveBefore(pre_header->GetLastInstruction()); + + HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array); + deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); + + if (loop_entry_test_block_added) { + // Replace array_length defined inside the loop body with a phi + // array_length_in_loop_body_if_needed. This is a synthetic phi so there is + // no vreg number for it. + HPhi* phi = new (graph->GetArena()) HPhi( + graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); + // Set to 0 if the loop body isn't entered. + phi->SetRawInputAt(0, graph->GetIntConstant(0)); + // Set to array.length if the loop body is entered. + phi->SetRawInputAt(1, new_array_length); + pre_header->AddPhi(phi); + array_length->ReplaceWith(phi); + // Make sure phi is only used after the loop body is entered. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> it(phi->GetUses()); + !it.Done(); + it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); + } + } + } else { + array_length->ReplaceWith(new_array_length); + } + + array_length->GetBlock()->RemoveInstruction(array_length); + // Use new_array_length for deopt. + array_length = new_array_length; } - HIntConstant* offset_instr = graph->GetIntConstant(offset); - HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + HInstruction* added = array_length; + if (offset != 0) { + HIntConstant* offset_instr = graph->GetIntConstant(offset); + added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); + } + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); + HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); + deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); } - // Add deoptimizations in loop pre-header with the collected array access + // Adds deoptimizations in loop pre-header with the collected array access // data so that value ranges can be established in loop body. // Returns true if deoptimizations are successfully added, or if it's proven // it's not necessary. @@ -733,70 +1028,60 @@ class MonotonicValueRange : public ValueRange { return false; } + HBasicBlock* deopt_block; + bool loop_entry_test_block_added = false; bool is_constant_proven, is_length_proven; + + HInstruction* const_comparing_instruction; + int32_t const_compared_to; + HInstruction* array_length_comparing_instruction; + int32_t array_length_offset; if (increment_ == 1) { // Increasing from initial_ to end_. - int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; - if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && - CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(initial_, -offset_low); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(end_, array_length, offset); + const_comparing_instruction = initial_; + const_compared_to = -offset_low; + array_length_comparing_instruction = end_; + array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; + } else { + const_comparing_instruction = end_; + const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; + array_length_comparing_instruction = initial_; + array_length_offset = -offset_high - 1; + } + + if (CanAddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + &is_constant_proven) && + CanAddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + &is_length_proven)) { + if (!is_constant_proven || !is_length_proven) { + deopt_block = TransformLoopForDeoptimizationIfNeeded(); + loop_entry_test_block_added = (deopt_block != pre_header); + if (loop_entry_test_block_added) { + // Loop body may be entered. + AddLoopBodyEntryTest(); } - return true; } - } else if (increment_ == -1) { - // Decreasing from initial_ to end_. - int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; - if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && - CanAddDeoptimizationArrayLength( - initial_, array_length, -offset_high - 1, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(end_, constant); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); - } - return true; + if (!is_constant_proven) { + AddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + deopt_block, + loop_entry_test_block_added); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + deopt_block, + loop_entry_test_block_added); } + return true; } return false; } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // TODO: possibly handle overflow/underflow issues with deoptimization. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. @@ -819,12 +1104,17 @@ class BCEVisitor : public HGraphVisitor { // it's likely some AIOOBE will be thrown. static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + // Added blocks for loop body entry test. + bool IsAddedBlock(HBasicBlock* block) const { + return block->GetBlockId() >= initial_block_size_; + } + explicit BCEVisitor(HGraph* graph) - : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()), - need_to_revisit_block_(false) {} + : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + DCHECK(!IsAddedBlock(block)); first_constant_index_bounds_check_map_.clear(); HGraphVisitor::VisitBasicBlock(block); if (need_to_revisit_block_) { @@ -839,6 +1129,10 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + if (IsAddedBlock(basic_block)) { + // Added blocks don't keep value ranges. + return nullptr; + } int block_id = basic_block->GetBlockId(); if (maps_.at(block_id) == nullptr) { std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( @@ -853,8 +1147,12 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); - if (map->find(instruction->GetId()) != map->end()) { - return map->Get(instruction->GetId()); + if (map != nullptr) { + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + } else { + DCHECK(IsAddedBlock(basic_block)); } basic_block = basic_block->GetDominator(); } @@ -971,7 +1269,7 @@ class BCEVisitor : public HGraphVisitor { if (left_range != nullptr) { left_monotonic_range = left_range->AsMonotonicValueRange(); if (left_monotonic_range != nullptr) { - HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + HBasicBlock* loop_head = left_monotonic_range->GetLoopHeader(); if (instruction->GetBlock() != loop_head) { // For monotonic value range, don't handle `instruction` // if it's not defined in the loop header. @@ -1013,7 +1311,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondLT); @@ -1047,7 +1345,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondGT); @@ -1083,7 +1381,16 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + DCHECK(array_length->IsIntConstant() || + array_length->IsArrayLength() || + array_length->IsPhi()); + + if (array_length->IsPhi()) { + // Input 1 of the phi contains the real array.length once the loop body is + // entered. That value will be used for bound analysis. The graph is still + // strickly in SSA form. + array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); + } if (!index->IsIntConstant()) { ValueRange* index_range = LookupValueRange(index, block); @@ -1238,25 +1545,26 @@ class BCEVisitor : public HGraphVisitor { } if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { // The comparison is for an induction variable in the loop header. DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor; - if (LIKELY(block->GetLoopInformation()-> - Contains(*instruction->IfFalseSuccessor()))) { - loop_body_successor = instruction->IfFalseSuccessor(); - } else { - loop_body_successor = instruction->IfTrueSuccessor(); + HBasicBlock* loop_body_successor = + left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); + if (loop_body_successor == nullptr) { + // In case it's some strange loop structure. + return; } ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if (new_left_range == left_range) { + if ((new_left_range == left_range) || + // Range narrowed with deoptimization is usually more useful than + // a constant range. + new_left_range->IsConstantValueRange()) { // We are not successful in narrowing the monotonic value range to // a regular value range. Try using deoptimization. new_left_range = left_range->AsMonotonicValueRange()-> NarrowWithDeoptimization(); if (new_left_range != left_range) { - GetValueRangeMap(instruction->IfFalseSuccessor())-> - Overwrite(left->GetId(), new_left_range); + GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); } } } @@ -1511,6 +1819,9 @@ class BCEVisitor : public HGraphVisitor { // eliminate those bounds checks. bool need_to_revisit_block_; + // Initial number of blocks. + int32_t initial_block_size_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; @@ -1527,7 +1838,22 @@ void BoundsCheckElimination::Run() { // value can be narrowed further down in the dominator tree. // // TODO: only visit blocks that dominate some array accesses. - visitor.VisitReversePostOrder(); + HBasicBlock* last_visited_block = nullptr; + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == last_visited_block) { + // We may insert blocks into the reverse post order list when processing + // a loop header. Don't process it again. + DCHECK(current->IsLoopHeader()); + continue; + } + if (visitor.IsAddedBlock(current)) { + // Skip added blocks. Their effects are already taken care of. + continue; + } + visitor.VisitBasicBlock(current); + last_visited_block = current; + } } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 48090a3de4..4701bddd48 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -440,22 +440,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -464,6 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -472,6 +467,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -481,6 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); @@ -489,6 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); @@ -579,22 +577,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph2(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -603,6 +595,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -611,6 +604,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); @@ -619,6 +613,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); @@ -646,8 +641,13 @@ static HGraph* BuildSSAGraph3(ArenaAllocator* allocator, HBasicBlock* block = new (allocator) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - HInstruction* new_array = new (allocator) - HNewArray(constant_10, 0, Primitive::kPrimInt, graph->GetDexFile(), kQuickAllocArray); + HInstruction* new_array = new (allocator) HNewArray( + constant_10, + graph->GetCurrentMethod(), + 0, + Primitive::kPrimInt, + graph->GetDexFile(), + kQuickAllocArray); block->AddInstruction(new_array); block->AddInstruction(new (allocator) HGoto()); @@ -705,15 +705,17 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -723,6 +725,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -732,6 +735,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); @@ -823,22 +827,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph4(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -847,6 +845,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -1022,6 +1021,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f98029da03..1f9287cbfc 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -723,10 +723,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -748,13 +754,11 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); - if (!is_range && is_wide && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_pc; - // We do not implement non sequential register pair. - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonSequentialRegPair); - return false; - } + // Longs and doubles should be in pairs, that is, sequential registers. The verifier should + // reject any class where this is violated. + DCHECK(is_range || !is_wide || (args[i] + 1 == args[i + 1])) + << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() + << " at " << dex_pc; HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); invoke->SetArgumentAt(argument_index, arg); if (is_wide) { @@ -763,6 +767,11 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } DCHECK_EQ(argument_index, number_of_arguments); + if (invoke->IsInvokeStaticOrDirect()) { + invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod()); + argument_index++; + } + if (clinit_check_requirement == HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit) { // Add the class initialization check as last input of `invoke`. DCHECK(clinit_check != nullptr); @@ -1045,6 +1054,7 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc, ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; HInstruction* object = new (arena_) HNewArray(length, + graph_->GetCurrentMethod(), dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), @@ -2003,7 +2013,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 : kQuickAllocObject; current_block_->AddInstruction(new (arena_) HNewInstance( - dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint)); + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + entrypoint)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } break; @@ -2015,8 +2029,12 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index) ? kQuickAllocArrayWithAccessCheck : kQuickAllocArray; - current_block_->AddInstruction(new (arena_) HNewArray( - length, dex_pc, type_index, *dex_compilation_unit_->GetDexFile(), entrypoint)); + current_block_->AddInstruction(new (arena_) HNewArray(length, + graph_->GetCurrentMethod(), + dex_pc, + type_index, + *dex_compilation_unit_->GetDexFile(), + entrypoint)); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); break; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 792ad9b544..130f0e970f 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -292,7 +292,6 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) { ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCall); - locations->AddTemp(visitor->GetMethodLocation()); for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); @@ -300,6 +299,20 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( } locations->SetOut(visitor->GetReturnLocation(invoke->GetType())); + + if (invoke->IsInvokeStaticOrDirect()) { + HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect(); + if (call->IsStringInit()) { + locations->AddTemp(visitor->GetMethodLocation()); + } else if (call->IsRecursive()) { + locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation()); + } else { + locations->AddTemp(visitor->GetMethodLocation()); + locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister()); + } + } else { + locations->AddTemp(visitor->GetMethodLocation()); + } } void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index ec0d56d291..d14594562e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1274,11 +1274,6 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok HandleInvoke(invoke); } -void CodeGeneratorARM::LoadCurrentMethod(Register reg) { - DCHECK(RequiresCurrentMethod()); - __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); -} - static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorARM intrinsic(codegen); @@ -1297,9 +1292,9 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec return; } - Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); - - codegen_->GenerateStaticOrDirectCall(invoke, temp); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1330,12 +1325,8 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); - __ LoadFromOffset(kLoadWord, temp, temp, class_offset); - } else { - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); - } + DCHECK(receiver.IsRegister()); + __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset( @@ -2724,13 +2715,12 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(R0)); } void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -2743,14 +2733,13 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(R0)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex()); codegen_->InvokeRuntime(GetThreadOffset<kArmWordSize>(instruction->GetEntrypoint()).Int32Value(), instruction, @@ -4216,9 +4205,7 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { - DCHECK_EQ(temp, kArtMethodRegister); - +void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // TODO: Implement all kinds of calls: // 1) boot -> boot // 2) app -> boot @@ -4227,32 +4214,40 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + Register reg = temp.AsRegister<Register>(); // temp = thread->string_init_entrypoint - __ LoadFromOffset(kLoadWord, temp, TR, invoke->GetStringInitOffset()); + __ LoadFromOffset(kLoadWord, reg, TR, invoke->GetStringInitOffset()); // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, + __ LoadFromOffset(kLoadWord, LR, reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kArmWordSize).Int32Value()); // LR() __ blx(LR); + } else if (invoke->IsRecursive()) { + __ bl(GetFrameEntryLabel()); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset( - kLoadWord, temp, temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); - // temp = temp[index_in_cache] - __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); - // LR = temp[offset_of_quick_compiled_code] - __ LoadFromOffset(kLoadWord, LR, temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArmWordSize).Int32Value()); - // LR() - __ blx(LR); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register method_reg; + Register reg = temp.AsRegister<Register>(); + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ bl(GetFrameEntryLabel()); + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); } + // reg = current_method->dex_cache_resolved_methods_; + __ LoadFromOffset( + kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); + // reg = reg[index_in_cache] + __ LoadFromOffset( + kLoadWord, reg, reg, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); + // LR = reg[offset_of_quick_compiled_code] + __ LoadFromOffset(kLoadWord, LR, reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArmWordSize).Int32Value()); + // LR() + __ blx(LR); } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index c5a28bacce..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -139,10 +139,16 @@ class LocationsBuilderARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -163,10 +169,16 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + ArmAssembler* GetAssembler() const { return assembler_; } private: @@ -271,9 +283,6 @@ class CodeGeneratorARM : public CodeGenerator { // Helper method to move a 64bits value between two locations. void Move64(Location destination, Location source); - // Load current method into `reg`. - void LoadCurrentMethod(Register reg); - // Generate code to invoke a runtime entry point. void InvokeRuntime( int32_t offset, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path); @@ -303,7 +312,7 @@ class CodeGeneratorARM : public CodeGenerator { Label* GetFrameEntryLabel() { return &frame_entry_label_; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); private: // Labels for each block that will be compiled. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 2f607f70a3..3c8f117011 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -484,7 +484,7 @@ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type } Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { - return LocationFrom(x0); + return LocationFrom(kArtMethodRegister); } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, @@ -1071,12 +1071,6 @@ void CodeGeneratorARM64::StoreRelease(Primitive::Type type, } } -void CodeGeneratorARM64::LoadCurrentMethod(vixl::Register current_method) { - DCHECK(RequiresCurrentMethod()); - CHECK(current_method.IsX()); - __ Ldr(current_method, MemOperand(sp, kCurrentMethodStackOffset)); -} - void CodeGeneratorARM64::InvokeRuntime(int32_t entry_point_offset, HInstruction* instruction, uint32_t dex_pc, @@ -2242,9 +2236,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege return false; } -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. - DCHECK(temp.Is(kArtMethodRegister)); size_t index_in_cache = GetCachePointerOffset(invoke->GetDexMethodIndex()); // TODO: Implement all kinds of calls: @@ -2255,30 +2248,39 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + Register reg = XRegisterFrom(temp); // temp = thread->string_init_entrypoint - __ Ldr(temp.X(), MemOperand(tr, invoke->GetStringInitOffset())); + __ Ldr(reg.X(), MemOperand(tr, invoke->GetStringInitOffset())); // LR = temp->entry_point_from_quick_compiled_code_; __ Ldr(lr, MemOperand( - temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); + reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value())); // lr() __ Blr(lr); + } else if (invoke->IsRecursive()) { + __ Bl(&frame_entry_label_); } else { - // temp = method; - LoadCurrentMethod(temp.X()); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ Ldr(temp.W(), MemOperand(temp.X(), - ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache]; - __ Ldr(temp.X(), MemOperand(temp, index_in_cache)); - // lr = temp->entry_point_from_quick_compiled_code_; - __ Ldr(lr, MemOperand(temp.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64WordSize).Int32Value())); - // lr(); - __ Blr(lr); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register reg = XRegisterFrom(temp); + Register method_reg; + if (current_method.IsRegister()) { + method_reg = XRegisterFrom(current_method); } else { - __ Bl(&frame_entry_label_); + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); } + + // temp = current_method->dex_cache_resolved_methods_; + __ Ldr(reg.W(), MemOperand(method_reg.X(), + ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache]; + __ Ldr(reg.X(), MemOperand(reg, index_in_cache)); + // lr = temp->entry_point_from_quick_compiled_code_; + __ Ldr(lr, MemOperand(reg.X(), ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64WordSize).Int32Value())); + // lr(); + __ Blr(lr); } DCHECK(!IsLeafMethod()); @@ -2294,8 +2296,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir } BlockPoolsScope block_pools(GetVIXLAssembler()); - Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0)); - codegen_->GenerateStaticOrDirectCall(invoke, temp); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2314,14 +2317,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { BlockPoolsScope block_pools(GetVIXLAssembler()); - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ Ldr(temp.W(), MemOperand(sp, receiver.GetStackIndex())); - __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); - } else { - DCHECK(receiver.IsRegister()); - __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); - } + DCHECK(receiver.IsRegister()); + __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); @@ -2523,9 +2520,9 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2))); CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>(); } @@ -2535,9 +2532,6 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); - Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimLong); - DCHECK(current_method.Is(x2)); - codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), @@ -2552,7 +2546,7 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0))); - locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>(); } @@ -2561,9 +2555,6 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) LocationSummary* locations = instruction->GetLocations(); Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt); DCHECK(type_index.Is(w0)); - Register current_method = RegisterFrom(locations->GetTemp(1), Primitive::kPrimNot); - DCHECK(current_method.Is(w1)); - codegen_->LoadCurrentMethod(current_method.X()); __ Mov(type_index, instruction->GetTypeIndex()); codegen_->InvokeRuntime( GetThreadOffset<kArm64WordSize>(instruction->GetEntrypoint()).Int32Value(), @@ -2674,7 +2665,7 @@ void InstructionCodeGeneratorARM64::VisitParameterValue( void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - locations->SetOut(LocationFrom(x0)); + locations->SetOut(LocationFrom(kArtMethodRegister)); } void InstructionCodeGeneratorARM64::VisitCurrentMethod( diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index c62ba951cd..f96810ff80 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -147,10 +147,16 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION - void LoadCurrentMethod(XRegister reg); + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } Arm64Assembler* GetAssembler() const { return assembler_; } vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } @@ -190,9 +196,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); @@ -328,7 +342,6 @@ class CodeGeneratorARM64 : public CodeGenerator { Primitive::Type type = Primitive::kPrimVoid); void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src); void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); - void LoadCurrentMethod(vixl::Register current_method); void LoadAcquire(HInstruction* instruction, vixl::CPURegister dst, const vixl::MemOperand& src); void StoreRelease(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst); @@ -344,7 +357,7 @@ class CodeGeneratorARM64 : public CodeGenerator { return false; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, vixl::Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); private: // Labels for each block that will be compiled. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8a7b52e549..e39a1c2bd5 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -527,11 +527,6 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -void CodeGeneratorX86::LoadCurrentMethod(Register reg) { - DCHECK(RequiresCurrentMethod()); - __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); -} - Location CodeGeneratorX86::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -1235,6 +1230,17 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok } HandleInvoke(invoke); + + if (codegen_->IsBaseline()) { + // Baseline does not have enough registers if the current method also + // needs a register. We therefore do not require a register for it, and let + // the code generation of the invoke handle it. + LocationSummary* locations = invoke->GetLocations(); + Location location = locations->InAt(invoke->GetCurrentMethodInputIndex()); + if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { + locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation()); + } + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86* codegen) { @@ -1255,8 +1261,9 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec return; } + LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( - invoke, invoke->GetLocations()->GetTemp(0).AsRegister<Register>()); + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1276,13 +1283,8 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ movl(temp, Address(ESP, receiver.GetStackIndex())); - __ movl(temp, Address(temp, class_offset)); - } else { - __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); - } + DCHECK(receiver.IsRegister()); + __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movl(temp, Address(temp, method_offset)); @@ -2961,14 +2963,12 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(1)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); - __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); @@ -2981,13 +2981,12 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(calling_convention.GetRegisterAt(2)); __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex())); __ fs()->call(Address::Absolute(GetThreadOffset<kX86WordSize>(instruction->GetEntrypoint()))); @@ -3201,7 +3200,7 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Register temp) { + Location temp) { // TODO: Implement all kinds of calls: // 1) boot -> boot // 2) app -> boot @@ -3211,25 +3210,34 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, if (invoke->IsStringInit()) { // temp = thread->string_init_entrypoint - __ fs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); + Register reg = temp.AsRegister<Register>(); + __ fs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() __ call(Address( - temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + } else if (invoke->IsRecursive()) { + __ call(GetFrameEntryLabel()); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, - CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + + Register method_reg; + Register reg = temp.AsRegister<Register>(); + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ call(GetFrameEntryLabel()); + DCHECK(IsBaseline() || invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg; + __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); } + // temp = temp->dex_cache_resolved_methods_; + __ movl(reg, Address(method_reg, ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); + // temp = temp[index_in_cache] + __ movl(reg, Address(reg, + CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(reg, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); } DCHECK(!IsLeafMethod()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 61827a45ab..696d8d549e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -124,10 +124,16 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); @@ -148,10 +154,16 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86Assembler* GetAssembler() const { return assembler_; } private: @@ -263,7 +275,7 @@ class CodeGeneratorX86 : public CodeGenerator { void Move64(Location destination, Location source); // Generate a call to a static or direct method. - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Register temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); // Emit a write barrier. void MarkGCCard(Register temp, @@ -272,8 +284,6 @@ class CodeGeneratorX86 : public CodeGenerator { Register value, bool value_can_be_null); - void LoadCurrentMethod(Register reg); - Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index a2a3cf523c..bfc827de1c 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -360,7 +360,7 @@ inline Condition X86_64Condition(IfCondition cond) { } void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - CpuRegister temp) { + Location temp) { // All registers are assumed to be correctly set up. // TODO: Implement all kinds of calls: @@ -371,26 +371,35 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo // Currently we implement the app -> app logic, which looks up in the resolve cache. if (invoke->IsStringInit()) { + CpuRegister reg = temp.AsRegister<CpuRegister>(); // temp = thread->string_init_entrypoint - __ gs()->movl(temp, Address::Absolute(invoke->GetStringInitOffset())); + __ gs()->movl(reg, Address::Absolute(invoke->GetStringInitOffset())); // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64WordSize).SizeValue())); + } else if (invoke->IsRecursive()) { + __ call(&frame_entry_label_); } else { - // temp = method; - LoadCurrentMethod(temp); - if (!invoke->IsRecursive()) { - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movq(temp, Address( - temp, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); + CpuRegister reg = temp.AsRegister<CpuRegister>(); + Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + Register method_reg; + if (current_method.IsRegister()) { + method_reg = current_method.AsRegister<Register>(); } else { - __ call(&frame_entry_label_); - } + DCHECK(invoke->GetLocations()->Intrinsified()); + DCHECK(!current_method.IsValid()); + method_reg = reg.AsRegister(); + __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + } + // temp = temp->dex_cache_resolved_methods_; + __ movl(reg, Address(CpuRegister(method_reg), + ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movq(reg, Address( + reg, CodeGenerator::GetCachePointerOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(reg, ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); } DCHECK(!IsLeafMethod()); @@ -585,11 +594,6 @@ void CodeGeneratorX86_64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -void CodeGeneratorX86_64::LoadCurrentMethod(CpuRegister reg) { - DCHECK(RequiresCurrentMethod()); - __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); -} - Location CodeGeneratorX86_64::GetStackLocation(HLoadLocal* load) const { switch (load->GetType()) { case Primitive::kPrimLong: @@ -1358,9 +1362,9 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi return; } + LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( - invoke, - invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>()); + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -1390,12 +1394,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); // temp = object->GetClass(); - if (receiver.IsStackSlot()) { - __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); - __ movl(temp, Address(temp, class_offset)); - } else { - __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); - } + DCHECK(receiver.IsRegister()); + __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); codegen_->MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ movq(temp, Address(temp, method_offset)); @@ -3020,13 +3020,12 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(1))); codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); __ gs()->call( @@ -3041,14 +3040,13 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall); InvokeRuntimeCallingConvention calling_convention; locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetOut(Location::RegisterLocation(RAX)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { InvokeRuntimeCallingConvention calling_convention; - codegen_->LoadCurrentMethod(CpuRegister(calling_convention.GetRegisterAt(2))); codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)), instruction->GetTypeIndex()); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c19e686c10..215754cd46 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -134,10 +134,16 @@ class LocationsBuilderX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -158,10 +164,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86_64Assembler* GetAssembler() const { return assembler_; } private: @@ -263,8 +275,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Helper method to move a value between two locations. void Move(Location destination, Location source); - void LoadCurrentMethod(CpuRegister reg); - Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); } @@ -277,7 +287,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { return false; } - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 07d0dd6b49..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -63,7 +63,7 @@ void HInliner::Run() { if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method // works properly. - if (!TryInline(call, call->GetDexMethodIndex(), call->GetOriginalInvokeType())) { + if (!TryInline(call, call->GetDexMethodIndex())) { if (kIsDebugBuild) { std::string callee_name = PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile()); @@ -160,27 +160,29 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol } } -bool HInliner::TryInline(HInvoke* invoke_instruction, - uint32_t method_index, - InvokeType invoke_type) const { +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t referrer_index) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + } +} + +bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file); - ArtMethod* resolved_method = nullptr; - { - // Don't keep this handle scope on stack, otherwise we cannot do a reference type - // propagation while inlining. - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache( - hs.NewHandle(caller_compilation_unit_.GetClassLinker()->FindDexCache(caller_dex_file))); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader*>(caller_compilation_unit_.GetClassLoader()))); - resolved_method = compiler_driver_->ResolveMethod( - soa, dex_cache, class_loader, &caller_compilation_unit_, method_index, invoke_type); - } + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + // We can query the dex cache directly. The verifier has populated it already. + ArtMethod* resolved_method = class_linker->FindDexCache(caller_dex_file)->GetResolvedMethod( + method_index, class_linker->GetImagePointerSize()); if (resolved_method == nullptr) { + // Method cannot be resolved if it is in another dex file we do not have access to. VLOG(compiler) << "Method cannot be resolved " << PrettyMethod(method_index, caller_dex_file); return false; } @@ -190,7 +192,16 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, if (resolved_method == nullptr) { VLOG(compiler) << "Interface or virtual call to " << PrettyMethod(method_index, caller_dex_file) - << "could not be statically determined"; + << " could not be statically determined"; + return false; + } + // We have found a method, but we need to find where that method is for the caller's + // dex file. + method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(resolved_method) + << " cannot be inlined because unaccessible to caller"; return false; } } @@ -245,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -256,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -292,13 +303,19 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } + InvokeType invoke_type = invoke_instruction->GetOriginalInvokeType(); + if (invoke_type == kInterface) { + // We have statically resolved the dispatch. To please the class linker + // at runtime, we change this call as if it was a virtual call. + invoke_type = kVirtual; + } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), - invoke_instruction->GetOriginalInvokeType(), + invoke_type, graph_->IsDebuggable(), graph_->GetCurrentInstructionId()); @@ -311,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -321,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -368,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -382,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -393,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -407,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index ca713329f5..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -49,10 +49,9 @@ class HInliner : public HOptimization { static constexpr const char* kInlinerPassName = "inliner"; private: - bool TryInline(HInvoke* invoke_instruction, uint32_t method_index, InvokeType invoke_type) const; + bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fcb3471821..98a5841f80 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -186,33 +186,92 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins return false; } -void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (!check_cast->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { - check_cast->ClearMustDoNullCheck(); - } - - if (!load_class->IsResolved()) { +// Returns whether doing a type test between the class of `object` against `klass` has +// a statically known outcome. The result of the test is stored in `outcome`. +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { + if (!klass->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type // and thus producing wrong results. - return; + return false; } - ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (class_rti.IsSupertypeOf(obj_rti)) { + *outcome = true; + return true; + } else if (obj_rti.IsExact()) { + // The test failed at compile time so will also fail at runtime. + *outcome = false; + return true; + } else if (!class_rti.IsInterface() && !obj_rti.IsSupertypeOf(class_rti)) { + // Different type hierarchy. The test will fail. + *outcome = false; + return true; + } + return false; +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* object = check_cast->InputAt(0); + if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + check_cast->ClearMustDoNullCheck(); + } + + if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } + } else { + // Don't do anything for exceptional cases for now. Ideally we should remove + // all instructions and blocks this instruction dominates. + } } } void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { - if (!instruction->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + HInstruction* object = instruction->InputAt(0); + bool can_be_null = true; + if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + can_be_null = false; instruction->ClearMustDoNullCheck(); } + + HGraph* graph = GetGraph(); + if (object->IsNullConstant()) { + instruction->ReplaceWith(graph->GetIntConstant(0)); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome && can_be_null) { + // Type test will succeed, we just need a null test. + HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + instruction->GetBlock()->InsertInstructionBefore(test, instruction); + instruction->ReplaceWith(test); + } else { + // We've statically determined the result of the instanceof. + instruction->ReplaceWith(graph->GetIntConstant(outcome)); + } + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + } } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 5436ec2dd9..749bedf99e 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -101,7 +101,8 @@ class IntrinsicSlowPathARM : public SlowPathCodeARM { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + Location::RegisterLocation(kArtMethodRegister)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index d1dc5b3843..c108ad5daa 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -110,7 +110,8 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), kArtMethodRegister); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + LocationFrom(kArtMethodRegister)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 5bbbc72020..424ac7c855 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -138,7 +138,8 @@ class IntrinsicSlowPathX86 : public SlowPathCodeX86 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), + Location::RegisterLocation(EAX)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; @@ -732,7 +733,8 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); - codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX); + codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), + Location::RegisterLocation(EAX)); codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index d6c90ff510..891531435e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -129,7 +129,8 @@ class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); RecordPcInfo(codegen, invoke_, invoke_->GetDexPc()); } else { UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; @@ -609,7 +610,8 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo MoveArguments(invoke, codegen); DCHECK(invoke->IsInvokeStaticOrDirect()); - codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI)); + codegen->GenerateStaticOrDirectCall( + invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 09bbb33042..f41a782fe6 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -481,7 +481,6 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { bool intrinsified = false); void SetInAt(uint32_t at, Location location) { - DCHECK(inputs_.Get(at).IsUnallocated() || inputs_.Get(at).IsInvalid()); inputs_.Put(at, location); } @@ -525,6 +524,8 @@ class LocationSummary : public ArenaObject<kArenaAllocMisc> { return temps_.Size(); } + bool HasTemps() const { return !temps_.IsEmpty(); } + Location Out() const { return output_; } bool CanCall() const { return call_kind_ != kNoCall; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd91d2c87b..4baa05c80c 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1510,6 +1510,81 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +/* + * Loop will be transformed to: + * old_pre_header + * | + * if_block + * / \ + * dummy_block deopt_block + * \ / + * new_pre_header + * | + * header + */ +void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + + // Need this to avoid critical edge. + HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + // Need this to avoid critical edge. + HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(if_block); + AddBlock(dummy_block); + AddBlock(deopt_block); + AddBlock(new_pre_header); + + header->ReplacePredecessor(pre_header, new_pre_header); + pre_header->successors_.Reset(); + pre_header->dominated_blocks_.Reset(); + + pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(dummy_block); // True successor + if_block->AddSuccessor(deopt_block); // False successor + dummy_block->AddSuccessor(new_pre_header); + deopt_block->AddSuccessor(new_pre_header); + + pre_header->dominated_blocks_.Add(if_block); + if_block->SetDominator(pre_header); + if_block->dominated_blocks_.Add(dummy_block); + dummy_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(deopt_block); + deopt_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(new_pre_header); + new_pre_header->SetDominator(if_block); + new_pre_header->dominated_blocks_.Add(header); + header->SetDominator(new_pre_header); + + size_t index_of_header = 0; + while (reverse_post_order_.Get(index_of_header) != header) { + index_of_header++; + } + MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); + reverse_post_order_.Put(index_of_header++, if_block); + reverse_post_order_.Put(index_of_header++, dummy_block); + reverse_post_order_.Put(index_of_header++, deopt_block); + reverse_post_order_.Put(index_of_header++, new_pre_header); + + HLoopInformation* info = pre_header->GetLoopInformation(); + if (info != nullptr) { + if_block->SetLoopInformation(info); + dummy_block->SetLoopInformation(info); + deopt_block->SetLoopInformation(info); + new_pre_header->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*pre_header); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(if_block); + loop_it.Current()->Add(dummy_block); + loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(new_pre_header); + } + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 47927340f4..7ef69559de 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Need to add a couple of blocks to test if the loop body is entered and + // put deoptimization instructions, etc. + void TransformLoopHeaderForBCE(HBasicBlock* header); + // Removes `block` from the graph. void DeleteDeadBlock(HBasicBlock* block); @@ -824,7 +828,7 @@ class HLoopInformationOutwardIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator); }; -#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -894,6 +898,21 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ @@ -1281,6 +1300,9 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } bool IsTop() const { return is_top_; } + bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return !IsTop() && GetTypeHandle()->IsInterface(); + } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } @@ -2461,7 +2483,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -2528,7 +2550,9 @@ class HInvokeStaticOrDirect : public HInvoke { ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u, + // There is one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit. + clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 2u : 1u, return_type, dex_pc, dex_method_index, @@ -2550,6 +2574,7 @@ class HInvokeStaticOrDirect : public HInvoke { bool NeedsDexCache() const OVERRIDE { return !IsRecursive(); } bool IsStringInit() const { return string_init_offset_ != 0; } int32_t GetStringInitOffset() const { return string_init_offset_; } + uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); } // Is this instruction a call to a static method? bool IsStatic() const { @@ -2665,9 +2690,10 @@ class HInvokeInterface : public HInvoke { DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; -class HNewInstance : public HExpression<0> { +class HNewInstance : public HExpression<1> { public: - HNewInstance(uint32_t dex_pc, + HNewInstance(HCurrentMethod* current_method, + uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, QuickEntrypointEnum entrypoint) @@ -2675,7 +2701,9 @@ class HNewInstance : public HExpression<0> { dex_pc_(dex_pc), type_index_(type_index), dex_file_(dex_file), - entrypoint_(entrypoint) {} + entrypoint_(entrypoint) { + SetRawInputAt(0, current_method); + } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } uint16_t GetTypeIndex() const { return type_index_; } @@ -2718,9 +2746,10 @@ class HNeg : public HUnaryOperation { DISALLOW_COPY_AND_ASSIGN(HNeg); }; -class HNewArray : public HExpression<1> { +class HNewArray : public HExpression<2> { public: HNewArray(HInstruction* length, + HCurrentMethod* current_method, uint32_t dex_pc, uint16_t type_index, const DexFile& dex_file, @@ -2731,6 +2760,7 @@ class HNewArray : public HExpression<1> { dex_file_(dex_file), entrypoint_(entrypoint) { SetRawInputAt(0, length); + SetRawInputAt(1, current_method); } uint32_t GetDexPc() const OVERRIDE { return dex_pc_; } @@ -3573,7 +3603,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -4238,6 +4268,39 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +// Iterator over the blocks that art part of the loop. Includes blocks part +// of an inner loop. The order in which the blocks are iterated is reverse +// post order. +class HBlocksInLoopReversePostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopReversePostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(0) { + if (!blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ == blocks_.Size(); } + HBasicBlock* Current() const { return blocks_.Get(index_); } + void Advance() { + ++index_; + for (size_t e = blocks_.Size(); index_ < e; ++index_) { + if (blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const GrowableArray<HBasicBlock*>& blocks_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); +}; + inline int64_t Int64FromConstant(HConstant* constant) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bf0b9fac0f..303a7cb1fd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,49 +318,55 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier simplify3(graph, stats, "last_instruction_simplifier"); - ReferenceTypePropagation type_propagation2(graph, handles); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "last_instruction_simplifier"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &type_propagation, - &simplify2, - &inliner, + intrinsics, + dce1, + fold1, + simplify1, + type_propagation, + simplify2, + inliner, // Run another type propagation phase: inlining will open up more opprotunities // to remove checkast/instanceof and null checks. - &type_propagation2, + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &simplify3, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b6b1bb1cad..b988813f75 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -19,6 +19,7 @@ #include <sstream> #include <string> +#include <type_traits> #include "atomic.h" @@ -38,7 +39,6 @@ enum MethodCompilationStat { kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledNoCodegen, - kNotCompiledNonSequentialRegPair, kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, @@ -84,14 +84,15 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + << compile_stats_[i]; } } } } private: - std::string PrintMethodCompilationStat(int stat) const { + std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { switch (stat) { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; @@ -106,7 +107,6 @@ class OptimizingCompilerStats { case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; @@ -120,9 +120,12 @@ class OptimizingCompilerStats { case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - default: LOG(FATAL) << "invalid stat"; + + case kLastStat: break; // Invalid to print out. } - return ""; + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4edadef1a4..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,20 +180,21 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, - mirror::Class* klass, - bool is_exact) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); + is_exact = is_exact || klass->IsFinal(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); @@ -188,16 +203,16 @@ void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { +void RTPVisitor::VisitNewArray(HNewArray* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -212,15 +227,15 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -298,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 0a1d4c496e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,26 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file, - bool is_exact); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index a381315bac..e38e49cd19 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -714,13 +714,15 @@ bool RegisterAllocator::TryAllocateFreeReg(LiveInterval* current) { if (defined_by != nullptr && !current->IsSplit()) { LocationSummary* locations = defined_by->GetLocations(); if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) { - for (HInputIterator it(defined_by); !it.Done(); it.Advance()) { + for (size_t i = 0, e = defined_by->InputCount(); i < e; ++i) { // Take the last interval of the input. It is the location of that interval // that will be used at `defined_by`. - LiveInterval* interval = it.Current()->GetLiveInterval()->GetLastSibling(); + LiveInterval* interval = defined_by->InputAt(i)->GetLiveInterval()->GetLastSibling(); // Note that interval may have not been processed yet. // TODO: Handle non-split intervals last in the work list. - if (interval->HasRegister() && interval->SameRegisterKind(*current)) { + if (locations->InAt(i).IsValid() + && interval->HasRegister() + && interval->SameRegisterKind(*current)) { // The input must be live until the end of `defined_by`, to comply to // the linear scan algorithm. So we use `defined_by`'s end lifetime // position to check whether the input is dead or is inactive after diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index d5f977feec..701dbb019b 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -242,7 +242,7 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { HInstruction* input = current->InputAt(i); // Some instructions 'inline' their inputs, that is they do not need // to be materialized. - if (input->HasSsaIndex()) { + if (input->HasSsaIndex() && current->GetLocations()->InAt(i).IsValid()) { live_in->SetBit(input->GetSsaIndex()); input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 4667825a62..220ee6a8d0 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -394,7 +394,7 @@ class LiveInterval : public ArenaObject<kArenaAllocMisc> { first_range_->start_ = from; } else { // Instruction without uses. - DCHECK(!defined_by_->HasNonEnvironmentUses()); + DCHECK(first_use_ == nullptr); DCHECK(from == defined_by_->GetLifetimePosition()); first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(from, from + 2, nullptr); diff --git a/disassembler/Android.mk b/disassembler/Android.mk index 691c43f7a6..039986ce2b 100644 --- a/disassembler/Android.mk +++ b/disassembler/Android.mk @@ -59,12 +59,13 @@ define build-libart-disassembler LOCAL_SRC_FILES := $$(LIBART_DISASSEMBLER_SRC_FILES) ifeq ($$(art_target_or_host),target) - $(call set-target-local-clang-vars) - $(call set-target-local-cflags-vars,$(2)) + $(call set-target-local-clang-vars) + $(call set-target-local-cflags-vars,$(2)) else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_LDLIBS := $(ART_HOST_LDLIBS) LOCAL_CFLAGS += $(ART_HOST_CFLAGS) + LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) ifeq ($$(art_ndebug_or_debug),debug) LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) else diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 96d5654d65..9e9dea64c6 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1610,6 +1610,8 @@ class ImageDumper { const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap); const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields); const auto& method_section = image_header_.GetMethodsSection(); + const auto& intern_section = image_header_.GetImageSection( + ImageHeader::kSectionInternedStrings); stats_.header_bytes = header_bytes; size_t alignment_bytes = RoundUp(header_bytes, kObjectAlignment) - header_bytes; stats_.alignment_bytes += alignment_bytes; @@ -1617,6 +1619,7 @@ class ImageDumper { stats_.bitmap_bytes += bitmap_section.Size(); stats_.art_field_bytes += field_section.Size(); stats_.art_method_bytes += method_section.Size(); + stats_.interned_strings_bytes += intern_section.Size(); stats_.Dump(os); os << "\n"; @@ -1945,6 +1948,7 @@ class ImageDumper { size_t object_bytes; size_t art_field_bytes; size_t art_method_bytes; + size_t interned_strings_bytes; size_t bitmap_bytes; size_t alignment_bytes; @@ -1974,6 +1978,7 @@ class ImageDumper { object_bytes(0), art_field_bytes(0), art_method_bytes(0), + interned_strings_bytes(0), bitmap_bytes(0), alignment_bytes(0), managed_code_bytes(0), @@ -2131,21 +2136,24 @@ class ImageDumper { << "art_file_bytes = header_bytes + object_bytes + alignment_bytes\n"; Indenter indent_filter(os.rdbuf(), kIndentChar, kIndentBy1Count); std::ostream indent_os(&indent_filter); - indent_os << StringPrintf("header_bytes = %8zd (%2.0f%% of art file bytes)\n" - "object_bytes = %8zd (%2.0f%% of art file bytes)\n" - "art_field_bytes = %8zd (%2.0f%% of art file bytes)\n" - "art_method_bytes = %8zd (%2.0f%% of art file bytes)\n" - "bitmap_bytes = %8zd (%2.0f%% of art file bytes)\n" - "alignment_bytes = %8zd (%2.0f%% of art file bytes)\n\n", + indent_os << StringPrintf("header_bytes = %8zd (%2.0f%% of art file bytes)\n" + "object_bytes = %8zd (%2.0f%% of art file bytes)\n" + "art_field_bytes = %8zd (%2.0f%% of art file bytes)\n" + "art_method_bytes = %8zd (%2.0f%% of art file bytes)\n" + "interned_string_bytes = %8zd (%2.0f%% of art file bytes)\n" + "bitmap_bytes = %8zd (%2.0f%% of art file bytes)\n" + "alignment_bytes = %8zd (%2.0f%% of art file bytes)\n\n", header_bytes, PercentOfFileBytes(header_bytes), object_bytes, PercentOfFileBytes(object_bytes), art_field_bytes, PercentOfFileBytes(art_field_bytes), art_method_bytes, PercentOfFileBytes(art_method_bytes), + interned_strings_bytes, + PercentOfFileBytes(interned_strings_bytes), bitmap_bytes, PercentOfFileBytes(bitmap_bytes), alignment_bytes, PercentOfFileBytes(alignment_bytes)) << std::flush; CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes + - bitmap_bytes + alignment_bytes); + interned_strings_bytes + bitmap_bytes + alignment_bytes); } os << "object_bytes breakdown:\n"; diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc index 007125cfbe..04017273a8 100644 --- a/patchoat/patchoat.cc +++ b/patchoat/patchoat.cc @@ -437,6 +437,41 @@ void PatchOat::PatchArtMethods(const ImageHeader* image_header) { } } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(const PatchOat* patch_oat) : patch_oat_(patch_oat) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = patch_oat_->RelocatedAddressOfPointer(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(patch_oat_->RelocatedAddressOfPointer(roots[i]->AsMirrorPtr())); + } + } + + private: + const PatchOat* const patch_oat_; +}; + +void PatchOat::PatchInternedStrings(const ImageHeader* image_header) { + const auto& section = image_header->GetImageSection(ImageHeader::kSectionInternedStrings); + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(image_->Begin() + section.Offset()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); +} + void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) { auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>( img_roots->Get(ImageHeader::kDexCaches)); @@ -483,12 +518,9 @@ bool PatchOat::PatchImage() { auto* img_roots = image_header->GetImageRoots(); image_header->RelocateImage(delta_); - // Patch and update ArtFields. PatchArtFields(image_header); - - // Patch and update ArtMethods. PatchArtMethods(image_header); - + PatchInternedStrings(image_header); // Patch dex file int/long arrays which point to ArtFields. PatchDexFileArrays(img_roots); diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h index 7b9c8bd508..23abca8c7e 100644 --- a/patchoat/patchoat.h +++ b/patchoat/patchoat.h @@ -116,6 +116,8 @@ class PatchOat { bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchArtFields(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchArtMethods(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PatchInternedStrings(const ImageHeader* image_header) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -123,7 +125,7 @@ class PatchOat { bool WriteImage(File* out); template <typename T> - T* RelocatedCopyOf(T* obj) { + T* RelocatedCopyOf(T* obj) const { if (obj == nullptr) { return nullptr; } @@ -136,7 +138,7 @@ class PatchOat { } template <typename T> - T* RelocatedAddressOfPointer(T* obj) { + T* RelocatedAddressOfPointer(T* obj) const { if (obj == nullptr) { return obj; } @@ -149,7 +151,7 @@ class PatchOat { } template <typename T> - T RelocatedAddressOfIntPointer(T obj) { + T RelocatedAddressOfIntPointer(T obj) const { if (obj == 0) { return obj; } @@ -199,6 +201,7 @@ class PatchOat { TimingLogger* timings_; + friend class FixupRootVisitor; DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat); }; diff --git a/runtime/Android.mk b/runtime/Android.mk index b38f9bc9a5..5ed6955185 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -45,6 +45,7 @@ LIBART_COMMON_SRC_FILES := \ dex_file_verifier.cc \ dex_instruction.cc \ elf_file.cc \ + gc/allocation_record.cc \ gc/allocator/dlmalloc.cc \ gc/allocator/rosalloc.cc \ gc/accounting/bitmap.cc \ @@ -451,6 +452,7 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT endif LOCAL_CFLAGS += $$(ART_HOST_CFLAGS) LOCAL_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES="$(LIBART_HOST_DEFAULT_INSTRUCTION_SET_FEATURES)" + LOCAL_ASFLAGS += $$(ART_HOST_ASFLAGS) ifeq ($$(art_ndebug_or_debug),debug) LOCAL_CFLAGS += $$(ART_HOST_DEBUG_CFLAGS) diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 3a0ea646e1..cc1de43723 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -380,7 +380,7 @@ END art_quick_do_long_jump /* * Called by managed code, saves most registers (forms basis of long jump context) and passes * the bottom of the stack. artDeliverExceptionFromCode will place the callee save Method* at - * the bottom of the thread. On entry r0 holds Throwable* + * the bottom of the thread. On entry a0 holds Throwable* */ ENTRY art_quick_deliver_exception SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index b2cd7f26c7..37c6c5b3f9 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -87,11 +87,11 @@ s.d $f24, 8($sp) # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place ArtMethod* at bottom of stack. + ld $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place ArtMethod* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -130,11 +130,11 @@ sd $s2, 8($sp) .cfi_rel_offset 18, 8 # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place Method* at bottom of stack. + ld $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place Method* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -253,11 +253,11 @@ .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place Method* at bottom of stack. + ld $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place Method* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -442,7 +442,7 @@ END art_quick_do_long_jump * Called by managed code, saves most registers (forms basis of long jump * context) and passes the bottom of the stack. * artDeliverExceptionFromCode will place the callee save Method* at - * the bottom of the thread. On entry v0 holds Throwable* + * the bottom of the thread. On entry a0 holds Throwable* */ ENTRY art_quick_deliver_exception SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/art_method.cc b/runtime/art_method.cc index fe26438eac..c78a851b0e 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -428,7 +428,9 @@ void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* // exception was thrown to force the activations to be removed from the // stack. Continue execution in the interpreter. self->ClearException(); - ShadowFrame* shadow_frame = self->GetAndClearDeoptimizationShadowFrame(result); + ShadowFrame* shadow_frame = + self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame); + result->SetJ(self->PopDeoptimizationReturnValue().GetJ()); self->SetTopOfStack(nullptr); self->SetTopOfShadowStack(shadow_frame); interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, result); diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h index ab63dddaff..8daf6d4c9e 100644 --- a/runtime/base/hash_set.h +++ b/runtime/base/hash_set.h @@ -22,6 +22,7 @@ #include <stdint.h> #include <utility> +#include "bit_utils.h" #include "logging.h" namespace art { @@ -121,6 +122,7 @@ class HashSet { typedef BaseIterator<T, HashSet> Iterator; typedef BaseIterator<const T, const HashSet> ConstIterator; + // If we don't own the data, this will create a new array which owns the data. void Clear() { DeallocateStorage(); AllocateStorage(1); @@ -128,19 +130,70 @@ class HashSet { elements_until_expand_ = 0; } - HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr), + HashSet() : num_elements_(0), num_buckets_(0), owns_data_(false), data_(nullptr), min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) { Clear(); } - HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), owns_data_(false), + data_(nullptr) { *this = other; } - HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), owns_data_(false), + data_(nullptr) { *this = std::move(other); } + // Construct from existing data. + // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the + // passed in ptr_. + HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) { + uint64_t temp; + size_t offset = 0; + offset = ReadFromBytes(ptr, offset, &temp); + num_elements_ = static_cast<uint64_t>(temp); + offset = ReadFromBytes(ptr, offset, &temp); + num_buckets_ = static_cast<uint64_t>(temp); + CHECK_LE(num_elements_, num_buckets_); + offset = ReadFromBytes(ptr, offset, &temp); + elements_until_expand_ = static_cast<uint64_t>(temp); + offset = ReadFromBytes(ptr, offset, &min_load_factor_); + offset = ReadFromBytes(ptr, offset, &max_load_factor_); + if (!make_copy_of_data) { + owns_data_ = false; + data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset)); + offset += sizeof(*data_) * num_buckets_; + } else { + AllocateStorage(num_buckets_); + // Write elements, not that this may not be safe for cross compilation if the elements are + // pointer sized. + for (size_t i = 0; i < num_buckets_; ++i) { + offset = ReadFromBytes(ptr, offset, &data_[i]); + } + } + // Caller responsible for aligning. + *read_count = offset; + } + + // Returns how large the table is after being written. If target is null, then no writing happens + // but the size is still returned. Target must be 8 byte aligned. + size_t WriteToMemory(uint8_t* ptr) { + size_t offset = 0; + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_)); + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_)); + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_)); + offset = WriteToBytes(ptr, offset, min_load_factor_); + offset = WriteToBytes(ptr, offset, max_load_factor_); + // Write elements, not that this may not be safe for cross compilation if the elements are + // pointer sized. + for (size_t i = 0; i < num_buckets_; ++i) { + offset = WriteToBytes(ptr, offset, data_[i]); + } + // Caller responsible for aligning. + return offset; + } + ~HashSet() { DeallocateStorage(); } @@ -152,6 +205,7 @@ class HashSet { std::swap(elements_until_expand_, other.elements_until_expand_); std::swap(min_load_factor_, other.min_load_factor_); std::swap(max_load_factor_, other.max_load_factor_); + std::swap(owns_data_, other.owns_data_); return *this; } @@ -386,6 +440,7 @@ class HashSet { void AllocateStorage(size_t num_buckets) { num_buckets_ = num_buckets; data_ = allocfn_.allocate(num_buckets_); + owns_data_ = true; for (size_t i = 0; i < num_buckets_; ++i) { allocfn_.construct(allocfn_.address(data_[i])); emptyfn_.MakeEmpty(data_[i]); @@ -394,10 +449,13 @@ class HashSet { void DeallocateStorage() { if (num_buckets_ != 0) { - for (size_t i = 0; i < NumBuckets(); ++i) { - allocfn_.destroy(allocfn_.address(data_[i])); + if (owns_data_) { + for (size_t i = 0; i < NumBuckets(); ++i) { + allocfn_.destroy(allocfn_.address(data_[i])); + } + allocfn_.deallocate(data_, NumBuckets()); + owns_data_ = false; } - allocfn_.deallocate(data_, NumBuckets()); data_ = nullptr; num_buckets_ = 0; } @@ -418,18 +476,23 @@ class HashSet { // Expand / shrink the table to the new specified size. void Resize(size_t new_size) { DCHECK_GE(new_size, Size()); - T* old_data = data_; + T* const old_data = data_; size_t old_num_buckets = num_buckets_; // Reinsert all of the old elements. + const bool owned_data = owns_data_; AllocateStorage(new_size); for (size_t i = 0; i < old_num_buckets; ++i) { T& element = old_data[i]; if (!emptyfn_.IsEmpty(element)) { data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element); } - allocfn_.destroy(allocfn_.address(element)); + if (owned_data) { + allocfn_.destroy(allocfn_.address(element)); + } + } + if (owned_data) { + allocfn_.deallocate(old_data, old_num_buckets); } - allocfn_.deallocate(old_data, old_num_buckets); } ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const { @@ -439,6 +502,24 @@ class HashSet { return index; } + // Return new offset. + template <typename Elem> + static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) { + DCHECK_ALIGNED(ptr + offset, sizeof(n)); + if (ptr != nullptr) { + *reinterpret_cast<Elem*>(ptr + offset) = n; + } + return offset + sizeof(n); + } + + template <typename Elem> + static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) { + DCHECK(ptr != nullptr); + DCHECK_ALIGNED(ptr + offset, sizeof(*out)); + *out = *reinterpret_cast<const Elem*>(ptr + offset); + return offset + sizeof(*out); + } + Alloc allocfn_; // Allocator function. HashFn hashfn_; // Hashing function. EmptyFn emptyfn_; // IsEmpty/SetEmpty function. @@ -446,6 +527,7 @@ class HashSet { size_t num_elements_; // Number of inserted elements. size_t num_buckets_; // Number of hash table buckets. size_t elements_until_expand_; // Maxmimum number of elements until we expand the table. + bool owns_data_; // If we own data_ and are responsible for freeing it. T* data_; // Backing storage. double min_load_factor_; double max_load_factor_; diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index f2be85e277..0ab148e37e 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -94,7 +94,6 @@ enum LockLevel { kMonitorListLock, kJniLoadLibraryLock, kThreadListLock, - kBreakpointInvokeLock, kAllocTrackerLock, kDeoptimizationLock, kProfilerLock, diff --git a/runtime/base/time_utils.h b/runtime/base/time_utils.h index f58c22a7cc..55d2764576 100644 --- a/runtime/base/time_utils.h +++ b/runtime/base/time_utils.h @@ -68,8 +68,8 @@ static constexpr inline uint64_t NsToMs(uint64_t ns) { } // Converts the given number of milliseconds to nanoseconds -static constexpr inline uint64_t MsToNs(uint64_t ns) { - return ns * 1000 * 1000; +static constexpr inline uint64_t MsToNs(uint64_t ms) { + return ms * 1000 * 1000; } #if defined(__APPLE__) diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 31140a83fc..c4d978f972 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -85,6 +85,9 @@ namespace art { static constexpr bool kSanityCheckObjects = kIsDebugBuild; +// For b/21333911. +static constexpr bool kDuplicateClassesCheck = false; + static void ThrowNoClassDefFoundError(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2))) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -805,18 +808,11 @@ static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) { } const OatFile* ClassLinker::GetBootOatFile() { - // To grab the boot oat, look at the dex files in the boot classpath. Any of those is fine, as - // they were all compiled into the same oat file. So grab the first one, which is guaranteed to - // exist if the boot class-path isn't empty. - if (boot_class_path_.empty()) { + gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); + if (image_space == nullptr) { return nullptr; } - const DexFile* boot_dex_file = boot_class_path_[0]; - // Is it from an oat file? - if (boot_dex_file->GetOatDexFile() != nullptr) { - return boot_dex_file->GetOatDexFile()->GetOatFile(); - } - return nullptr; + return image_space->GetOatFile(); } const OatFile* ClassLinker::GetPrimaryOatFile() { @@ -840,6 +836,10 @@ const OatFile* ClassLinker::GetPrimaryOatFile() { // the two elements agree on whether their dex file was from an already-loaded oat-file or the // new oat file. Any disagreement indicates a collision. bool ClassLinker::HasCollisions(const OatFile* oat_file, std::string* error_msg) { + if (!kDuplicateClassesCheck) { + return false; + } + // Dex files are registered late - once a class is actually being loaded. We have to compare // against the open oat files. Take the dex_lock_ that protects oat_files_ accesses. ReaderMutexLock mu(Thread::Current(), dex_lock_); @@ -1048,7 +1048,7 @@ static void SanityCheckArtMethodPointerArray( static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK(obj != nullptr); - CHECK(obj->GetClass() != nullptr) << "Null class " << obj; + CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj; CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj; if (obj->IsClass()) { auto klass = obj->AsClass(); diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 24615e2a66..5918c10515 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -29,9 +29,11 @@ #include "dex_file-inl.h" #include "dex_instruction.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/space/large_object_space.h" #include "gc/space/space-inl.h" #include "handle_scope.h" +#include "jdwp/jdwp_priv.h" #include "jdwp/object_registry.h" #include "mirror/class.h" #include "mirror/class-inl.h" @@ -61,127 +63,30 @@ namespace art { // The key identifying the debugger to update instrumentation. static constexpr const char* kDbgInstrumentationKey = "Debugger"; -static const size_t kMaxAllocRecordStackDepth = 16; // Max 255. -static const size_t kDefaultNumAllocRecords = 64*1024; // Must be a power of 2. 2BE can hold 64k-1. - -// Limit alloc_record_count to the 2BE value that is the limit of the current protocol. +// Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol. static uint16_t CappedAllocRecordCount(size_t alloc_record_count) { - if (alloc_record_count > 0xffff) { - return 0xffff; - } - return alloc_record_count; -} - -class AllocRecordStackTraceElement { - public: - AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) { - } - - int32_t LineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ArtMethod* method = Method(); - DCHECK(method != nullptr); - return method->GetLineNumFromDexPC(DexPc()); - } - - ArtMethod* Method() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - return soa.DecodeMethod(method_); - } - - void SetMethod(ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - method_ = soa.EncodeMethod(m); - } - - uint32_t DexPc() const { - return dex_pc_; - } - - void SetDexPc(uint32_t pc) { - dex_pc_ = pc; - } - - private: - jmethodID method_; - uint32_t dex_pc_; -}; - -jobject Dbg::TypeCache::Add(mirror::Class* t) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - JNIEnv* const env = soa.Env(); - ScopedLocalRef<jobject> local_ref(soa.Env(), soa.AddLocalReference<jobject>(t)); - const int32_t hash_code = soa.Decode<mirror::Class*>(local_ref.get())->IdentityHashCode(); - auto range = objects_.equal_range(hash_code); - for (auto it = range.first; it != range.second; ++it) { - if (soa.Decode<mirror::Class*>(it->second) == soa.Decode<mirror::Class*>(local_ref.get())) { - // Found a matching weak global, return it. - return it->second; + size_t cap = 0xffff; +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the number of recent records. + const char* propertyName = "dalvik.vm.recentAllocMax"; + char recentAllocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, recentAllocMaxString, "") > 0) { + char* end; + size_t value = strtoul(recentAllocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << recentAllocMaxString + << "' --- invalid"; + } else { + cap = value; } } - const jobject weak_global = env->NewWeakGlobalRef(local_ref.get()); - objects_.insert(std::make_pair(hash_code, weak_global)); - return weak_global; -} - -void Dbg::TypeCache::Clear() { - JavaVMExt* vm = Runtime::Current()->GetJavaVM(); - Thread* self = Thread::Current(); - for (const auto& p : objects_) { - vm->DeleteWeakGlobalRef(self, p.second); +#endif + if (alloc_record_count > cap) { + return cap; } - objects_.clear(); + return alloc_record_count; } -class AllocRecord { - public: - AllocRecord() : type_(nullptr), byte_count_(0), thin_lock_id_(0) {} - - mirror::Class* Type() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return down_cast<mirror::Class*>(Thread::Current()->DecodeJObject(type_)); - } - - void SetType(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_) { - type_ = Dbg::type_cache_.Add(t); - } - - size_t GetDepth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - size_t depth = 0; - while (depth < kMaxAllocRecordStackDepth && stack_[depth].Method() != nullptr) { - ++depth; - } - return depth; - } - - size_t ByteCount() const { - return byte_count_; - } - - void SetByteCount(size_t count) { - byte_count_ = count; - } - - uint16_t ThinLockId() const { - return thin_lock_id_; - } - - void SetThinLockId(uint16_t id) { - thin_lock_id_ = id; - } - - AllocRecordStackTraceElement* StackElement(size_t index) { - DCHECK_LT(index, kMaxAllocRecordStackDepth); - return &stack_[index]; - } - - private: - jobject type_; // This is a weak global. - size_t byte_count_; - uint16_t thin_lock_id_; - // Unused entries have null method. - AllocRecordStackTraceElement stack_[kMaxAllocRecordStackDepth]; -}; - class Breakpoint { public: Breakpoint(ArtMethod* method, uint32_t dex_pc, @@ -382,13 +287,6 @@ bool Dbg::gDebuggerActive = false; bool Dbg::gDisposed = false; ObjectRegistry* Dbg::gRegistry = nullptr; -// Recent allocation tracking. -AllocRecord* Dbg::recent_allocation_records_ = nullptr; // TODO: CircularBuffer<AllocRecord> -size_t Dbg::alloc_record_max_ = 0; -size_t Dbg::alloc_record_head_ = 0; -size_t Dbg::alloc_record_count_ = 0; -Dbg::TypeCache Dbg::type_cache_; - // Deoptimization support. std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_; size_t Dbg::full_deoptimization_event_count_ = 0; @@ -1761,6 +1659,51 @@ JDWP::JdwpTag Dbg::GetStaticFieldBasicTag(JDWP::FieldId field_id) { return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor()); } +static JValue GetArtFieldValue(ArtField* f, mirror::Object* o) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + Primitive::Type fieldType = f->GetTypeAsPrimitiveType(); + JValue field_value; + switch (fieldType) { + case Primitive::kPrimBoolean: + field_value.SetZ(f->GetBoolean(o)); + return field_value; + + case Primitive::kPrimByte: + field_value.SetB(f->GetByte(o)); + return field_value; + + case Primitive::kPrimChar: + field_value.SetC(f->GetChar(o)); + return field_value; + + case Primitive::kPrimShort: + field_value.SetS(f->GetShort(o)); + return field_value; + + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + // Int and Float must be treated as 32-bit values in JDWP. + field_value.SetI(f->GetInt(o)); + return field_value; + + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + // Long and Double must be treated as 64-bit values in JDWP. + field_value.SetJ(f->GetLong(o)); + return field_value; + + case Primitive::kPrimNot: + field_value.SetL(f->GetObject(o)); + return field_value; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Attempt to read from field of type 'void'"; + UNREACHABLE(); + } + LOG(FATAL) << "Attempt to read from field of unknown type"; + UNREACHABLE(); +} + static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::ObjectId object_id, JDWP::FieldId field_id, JDWP::ExpandBuf* pReply, bool is_static) @@ -1795,27 +1738,17 @@ static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::Obje } } else { if (f->IsStatic()) { - LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field " - << PrettyField(f); + LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.GetValues" + << " on static field " << PrettyField(f); } } if (f->IsStatic()) { o = f->GetDeclaringClass(); } + JValue field_value(GetArtFieldValue(f, o)); JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor()); - JValue field_value; - if (tag == JDWP::JT_VOID) { - LOG(FATAL) << "Unknown tag: " << tag; - } else if (!IsPrimitiveTag(tag)) { - field_value.SetL(f->GetObject(o)); - } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) { - field_value.SetJ(f->Get64(o)); - } else { - field_value.SetI(f->Get32(o)); - } Dbg::OutputJValue(tag, &field_value, pReply); - return JDWP::ERR_NONE; } @@ -1829,6 +1762,76 @@ JDWP::JdwpError Dbg::GetStaticFieldValue(JDWP::RefTypeId ref_type_id, JDWP::Fiel return GetFieldValueImpl(ref_type_id, 0, field_id, pReply, true); } +static JDWP::JdwpError SetArtFieldValue(ArtField* f, mirror::Object* o, uint64_t value, int width) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + Primitive::Type fieldType = f->GetTypeAsPrimitiveType(); + // Debugging only happens at runtime so we know we are not running in a transaction. + static constexpr bool kNoTransactionMode = false; + switch (fieldType) { + case Primitive::kPrimBoolean: + CHECK_EQ(width, 1); + f->SetBoolean<kNoTransactionMode>(o, static_cast<uint8_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimByte: + CHECK_EQ(width, 1); + f->SetByte<kNoTransactionMode>(o, static_cast<uint8_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimChar: + CHECK_EQ(width, 2); + f->SetChar<kNoTransactionMode>(o, static_cast<uint16_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimShort: + CHECK_EQ(width, 2); + f->SetShort<kNoTransactionMode>(o, static_cast<int16_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + CHECK_EQ(width, 4); + // Int and Float must be treated as 32-bit values in JDWP. + f->SetInt<kNoTransactionMode>(o, static_cast<int32_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + CHECK_EQ(width, 8); + // Long and Double must be treated as 64-bit values in JDWP. + f->SetLong<kNoTransactionMode>(o, value); + return JDWP::ERR_NONE; + + case Primitive::kPrimNot: { + JDWP::JdwpError error; + mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error); + if (error != JDWP::ERR_NONE) { + return JDWP::ERR_INVALID_OBJECT; + } + if (v != nullptr) { + mirror::Class* field_type; + { + StackHandleScope<2> hs(Thread::Current()); + HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v)); + HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o)); + field_type = f->GetType<true>(); + } + if (!field_type->IsAssignableFrom(v->GetClass())) { + return JDWP::ERR_INVALID_OBJECT; + } + } + f->SetObject<kNoTransactionMode>(o, v); + return JDWP::ERR_NONE; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Attempt to write to field of type 'void'"; + UNREACHABLE(); + } + LOG(FATAL) << "Attempt to write to field of unknown type"; + UNREACHABLE(); +} + static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId object_id, JDWP::FieldId field_id, uint64_t value, int width, bool is_static) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -1847,47 +1850,14 @@ static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId object_id, JDWP::FieldId } } else { if (f->IsStatic()) { - LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field " << PrettyField(f); + LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues" + << " on static field " << PrettyField(f); } } if (f->IsStatic()) { o = f->GetDeclaringClass(); } - - JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor()); - - if (IsPrimitiveTag(tag)) { - if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) { - CHECK_EQ(width, 8); - // Debugging can't use transactional mode (runtime only). - f->Set64<false>(o, value); - } else { - CHECK_LE(width, 4); - // Debugging can't use transactional mode (runtime only). - f->Set32<false>(o, value); - } - } else { - mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error); - if (error != JDWP::ERR_NONE) { - return JDWP::ERR_INVALID_OBJECT; - } - if (v != nullptr) { - mirror::Class* field_type; - { - StackHandleScope<2> hs(Thread::Current()); - HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v)); - HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o)); - field_type = f->GetType<true>(); - } - if (!field_type->IsAssignableFrom(v->GetClass())) { - return JDWP::ERR_INVALID_OBJECT; - } - } - // Debugging can't use transactional mode (runtime only). - f->SetObject<false>(o, v); - } - - return JDWP::ERR_NONE; + return SetArtFieldValue(f, o, value, width); } JDWP::JdwpError Dbg::SetFieldValue(JDWP::ObjectId object_id, JDWP::FieldId field_id, uint64_t value, @@ -3763,17 +3733,16 @@ static char JdwpTagToShortyChar(JDWP::JdwpTag tag) { } } -JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id, - JDWP::RefTypeId class_id, JDWP::MethodId method_id, - uint32_t arg_count, uint64_t* arg_values, - JDWP::JdwpTag* arg_types, uint32_t options, - JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, - JDWP::ObjectId* pExceptionId) { - ThreadList* thread_list = Runtime::Current()->GetThreadList(); +JDWP::JdwpError Dbg::PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id, + JDWP::ObjectId object_id, JDWP::RefTypeId class_id, + JDWP::MethodId method_id, uint32_t arg_count, + uint64_t arg_values[], JDWP::JdwpTag* arg_types, + uint32_t options) { + Thread* const self = Thread::Current(); + CHECK_EQ(self, GetDebugThread()) << "This must be called by the JDWP thread"; + ThreadList* thread_list = Runtime::Current()->GetThreadList(); Thread* targetThread = nullptr; - std::unique_ptr<DebugInvokeReq> req; - Thread* self = Thread::Current(); { ScopedObjectAccessUnchecked soa(self); JDWP::JdwpError error; @@ -3883,99 +3852,82 @@ JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId objec } // Allocates a DebugInvokeReq. - req.reset(new (std::nothrow) DebugInvokeReq(receiver, c, m, options, arg_values, arg_count)); - if (req.get() == nullptr) { + DebugInvokeReq* req = new (std::nothrow) DebugInvokeReq(request_id, thread_id, receiver, c, m, + options, arg_values, arg_count); + if (req == nullptr) { LOG(ERROR) << "Failed to allocate DebugInvokeReq"; return JDWP::ERR_OUT_OF_MEMORY; } - // Attach the DebugInvokeReq to the target thread so it executes the method when - // it is resumed. Once the invocation completes, it will detach it and signal us - // before suspending itself. - targetThread->SetDebugInvokeReq(req.get()); + // Attaches the DebugInvokeReq to the target thread so it executes the method when + // it is resumed. Once the invocation completes, the target thread will delete it before + // suspending itself (see ThreadList::SuspendSelfForDebugger). + targetThread->SetDebugInvokeReq(req); } // The fact that we've released the thread list lock is a bit risky --- if the thread goes - // away we're sitting high and dry -- but we must release this before the ResumeAllThreads - // call, and it's unwise to hold it during WaitForSuspend. - - { - /* - * We change our (JDWP thread) status, which should be THREAD_RUNNING, - * so we can suspend for a GC if the invoke request causes us to - * run out of memory. It's also a good idea to change it before locking - * the invokeReq mutex, although that should never be held for long. - */ - self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend); - - VLOG(jdwp) << " Transferring control to event thread"; - { - MutexLock mu(self, req->lock); - - if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) { - VLOG(jdwp) << " Resuming all threads"; - thread_list->UndoDebuggerSuspensions(); - } else { - VLOG(jdwp) << " Resuming event thread only"; - thread_list->Resume(targetThread, true); - } - - // The target thread is resumed but needs the JDWP token we're holding. - // We release it now and will acquire it again when the invocation is - // complete and the target thread suspends itself. - gJdwpState->ReleaseJdwpTokenForCommand(); - - // Wait for the request to finish executing. - while (targetThread->GetInvokeReq() != nullptr) { - req->cond.Wait(self); - } - } - VLOG(jdwp) << " Control has returned from event thread"; - - /* wait for thread to re-suspend itself */ - SuspendThread(thread_id, false /* request_suspension */); - - // Now the thread is suspended again, we can re-acquire the JDWP token. - gJdwpState->AcquireJdwpTokenForCommand(); - - self->TransitionFromSuspendedToRunnable(); - } + // away we're sitting high and dry -- but we must release this before the UndoDebuggerSuspensions + // call. - /* - * Suspend the threads. We waited for the target thread to suspend - * itself, so all we need to do is suspend the others. - * - * The SuspendAllForDebugger() call will double-suspend the event thread, - * so we want to resume the target thread once to keep the books straight. - */ if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) { - self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension); - VLOG(jdwp) << " Suspending all threads"; - thread_list->SuspendAllForDebugger(); - self->TransitionFromSuspendedToRunnable(); - VLOG(jdwp) << " Resuming event thread to balance the count"; + VLOG(jdwp) << " Resuming all threads"; + thread_list->UndoDebuggerSuspensions(); + } else { + VLOG(jdwp) << " Resuming event thread only"; thread_list->Resume(targetThread, true); } - // Copy the result. - *pResultTag = req->result_tag; - *pResultValue = req->result_value; - *pExceptionId = req->exception; - return req->error; + return JDWP::ERR_NONE; } void Dbg::ExecuteMethod(DebugInvokeReq* pReq) { - ScopedObjectAccess soa(Thread::Current()); + Thread* const self = Thread::Current(); + CHECK_NE(self, GetDebugThread()) << "This must be called by the event thread"; + + ScopedObjectAccess soa(self); // We can be called while an exception is pending. We need // to preserve that across the method invocation. - StackHandleScope<3> hs(soa.Self()); - auto old_exception = hs.NewHandle<mirror::Throwable>(soa.Self()->GetException()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::Throwable> old_exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); + // Execute the method then sends reply to the debugger. + ExecuteMethodWithoutPendingException(soa, pReq); + + // If an exception was pending before the invoke, restore it now. + if (old_exception.Get() != nullptr) { + soa.Self()->SetException(old_exception.Get()); + } +} + +// Helper function: write a variable-width value into the output input buffer. +static void WriteValue(JDWP::ExpandBuf* pReply, int width, uint64_t value) { + switch (width) { + case 1: + expandBufAdd1(pReply, value); + break; + case 2: + expandBufAdd2BE(pReply, value); + break; + case 4: + expandBufAdd4BE(pReply, value); + break; + case 8: + expandBufAdd8BE(pReply, value); + break; + default: + LOG(FATAL) << width; + UNREACHABLE(); + } +} + +void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq) { + soa.Self()->AssertNoPendingException(); + // Translate the method through the vtable, unless the debugger wants to suppress it. - auto* m = pReq->method; - auto image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + ArtMethod* m = pReq->method; + size_t image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver.Read() != nullptr) { ArtMethod* actual_method = pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size); @@ -3992,39 +3944,133 @@ void Dbg::ExecuteMethod(DebugInvokeReq* pReq) { CHECK_EQ(sizeof(jvalue), sizeof(uint64_t)); + // Invoke the method. ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read())); JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m), - reinterpret_cast<jvalue*>(pReq->arg_values)); + reinterpret_cast<jvalue*>(pReq->arg_values.get())); - pReq->result_tag = BasicTagFromDescriptor(m->GetShorty()); - const bool is_object_result = (pReq->result_tag == JDWP::JT_OBJECT); + // Prepare JDWP ids for the reply. + JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty()); + const bool is_object_result = (result_tag == JDWP::JT_OBJECT); + StackHandleScope<2> hs(soa.Self()); Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr); Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); - pReq->exception = gRegistry->Add(exception); - if (pReq->exception != 0) { + + if (!IsDebuggerActive()) { + // The debugger detached: we must not re-suspend threads. We also don't need to fill the reply + // because it won't be sent either. + return; + } + + JDWP::ObjectId exceptionObjectId = gRegistry->Add(exception); + uint64_t result_value = 0; + if (exceptionObjectId != 0) { VLOG(jdwp) << " JDWP invocation returning with exception=" << exception.Get() << " " << exception->Dump(); - pReq->result_value = 0; + result_value = 0; } else if (is_object_result) { - /* if no exception thrown, examine object result more closely */ + /* if no exception was thrown, examine object result more closely */ JDWP::JdwpTag new_tag = TagFromObject(soa, object_result.Get()); - if (new_tag != pReq->result_tag) { - VLOG(jdwp) << " JDWP promoted result from " << pReq->result_tag << " to " << new_tag; - pReq->result_tag = new_tag; + if (new_tag != result_tag) { + VLOG(jdwp) << " JDWP promoted result from " << result_tag << " to " << new_tag; + result_tag = new_tag; } // Register the object in the registry and reference its ObjectId. This ensures // GC safety and prevents from accessing stale reference if the object is moved. - pReq->result_value = gRegistry->Add(object_result.Get()); + result_value = gRegistry->Add(object_result.Get()); } else { // Primitive result. - DCHECK(IsPrimitiveTag(pReq->result_tag)); - pReq->result_value = result.GetJ(); + DCHECK(IsPrimitiveTag(result_tag)); + result_value = result.GetJ(); + } + const bool is_constructor = m->IsConstructor() && !m->IsStatic(); + if (is_constructor) { + // If we invoked a constructor (which actually returns void), return the receiver, + // unless we threw, in which case we return null. + result_tag = JDWP::JT_OBJECT; + if (exceptionObjectId == 0) { + // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the + // object registry. + result_value = GetObjectRegistry()->Add(pReq->receiver.Read()); + } else { + result_value = 0; + } } - if (old_exception.Get() != nullptr) { - soa.Self()->SetException(old_exception.Get()); + // Suspend other threads if the invoke is not single-threaded. + if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) { + soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension); + VLOG(jdwp) << " Suspending all threads"; + Runtime::Current()->GetThreadList()->SuspendAllForDebugger(); + soa.Self()->TransitionFromSuspendedToRunnable(); + } + + VLOG(jdwp) << " --> returned " << result_tag + << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", result_value, + exceptionObjectId); + + // Show detailed debug output. + if (result_tag == JDWP::JT_STRING && exceptionObjectId == 0) { + if (result_value != 0) { + if (VLOG_IS_ON(jdwp)) { + std::string result_string; + JDWP::JdwpError error = Dbg::StringToUtf8(result_value, &result_string); + CHECK_EQ(error, JDWP::ERR_NONE); + VLOG(jdwp) << " string '" << result_string << "'"; + } + } else { + VLOG(jdwp) << " string (null)"; + } + } + + // Attach the reply to DebugInvokeReq so it can be sent to the debugger when the event thread + // is ready to suspend. + BuildInvokeReply(pReq->reply, pReq->request_id, result_tag, result_value, exceptionObjectId); +} + +void Dbg::BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id, JDWP::JdwpTag result_tag, + uint64_t result_value, JDWP::ObjectId exception) { + // Make room for the JDWP header since we do not know the size of the reply yet. + JDWP::expandBufAddSpace(pReply, kJDWPHeaderLen); + + size_t width = GetTagWidth(result_tag); + JDWP::expandBufAdd1(pReply, result_tag); + if (width != 0) { + WriteValue(pReply, width, result_value); + } + JDWP::expandBufAdd1(pReply, JDWP::JT_OBJECT); + JDWP::expandBufAddObjectId(pReply, exception); + + // Now we know the size, we can complete the JDWP header. + uint8_t* buf = expandBufGetBuffer(pReply); + JDWP::Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReply)); + JDWP::Set4BE(buf + kJDWPHeaderIdOffset, request_id); + JDWP::Set1(buf + kJDWPHeaderFlagsOffset, kJDWPFlagReply); // flags + JDWP::Set2BE(buf + kJDWPHeaderErrorCodeOffset, JDWP::ERR_NONE); +} + +void Dbg::FinishInvokeMethod(DebugInvokeReq* pReq) { + CHECK_NE(Thread::Current(), GetDebugThread()) << "This must be called by the event thread"; + + JDWP::ExpandBuf* const pReply = pReq->reply; + CHECK(pReply != nullptr) << "No reply attached to DebugInvokeReq"; + + // We need to prevent other threads (including JDWP thread) from interacting with the debugger + // while we send the reply but are not yet suspended. The JDWP token will be released just before + // we suspend ourself again (see ThreadList::SuspendSelfForDebugger). + gJdwpState->AcquireJdwpTokenForEvent(pReq->thread_id); + + // Send the reply unless the debugger detached before the completion of the method. + if (IsDebuggerActive()) { + const size_t replyDataLength = expandBufGetLength(pReply) - kJDWPHeaderLen; + VLOG(jdwp) << StringPrintf("REPLY INVOKE id=0x%06x (length=%zu)", + pReq->request_id, replyDataLength); + + gJdwpState->SendRequest(pReply); + } else { + VLOG(jdwp) << "Not sending invoke reply because debugger detached"; } } @@ -4665,177 +4711,41 @@ void Dbg::DdmSendHeapSegments(bool native) { Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id); } -static size_t GetAllocTrackerMax() { -#ifdef HAVE_ANDROID_OS - // Check whether there's a system property overriding the number of records. - const char* propertyName = "dalvik.vm.allocTrackerMax"; - char allocRecordMaxString[PROPERTY_VALUE_MAX]; - if (property_get(propertyName, allocRecordMaxString, "") > 0) { - char* end; - size_t value = strtoul(allocRecordMaxString, &end, 10); - if (*end != '\0') { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- invalid"; - return kDefaultNumAllocRecords; - } - if (!IsPowerOfTwo(value)) { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- not power of two"; - return kDefaultNumAllocRecords; - } - return value; - } -#endif - return kDefaultNumAllocRecords; -} - void Dbg::SetAllocTrackingEnabled(bool enable) { - Thread* self = Thread::Current(); - if (enable) { - { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ != nullptr) { - return; // Already enabled, bail. - } - alloc_record_max_ = GetAllocTrackerMax(); - LOG(INFO) << "Enabling alloc tracker (" << alloc_record_max_ << " entries of " - << kMaxAllocRecordStackDepth << " frames, taking " - << PrettySize(sizeof(AllocRecord) * alloc_record_max_) << ")"; - DCHECK_EQ(alloc_record_head_, 0U); - DCHECK_EQ(alloc_record_count_, 0U); - recent_allocation_records_ = new AllocRecord[alloc_record_max_]; - CHECK(recent_allocation_records_ != nullptr); - } - Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); - } else { - { - ScopedObjectAccess soa(self); // For type_cache_.Clear(); - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - return; // Already disabled, bail. - } - LOG(INFO) << "Disabling alloc tracker"; - delete[] recent_allocation_records_; - recent_allocation_records_ = nullptr; - alloc_record_head_ = 0; - alloc_record_count_ = 0; - type_cache_.Clear(); - } - // If an allocation comes in before we uninstrument, we will safely drop it on the floor. - Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); - } -} - -struct AllocRecordStackVisitor : public StackVisitor { - AllocRecordStackVisitor(Thread* thread, AllocRecord* record_in) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), - record(record_in), - depth(0) {} - - // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses - // annotalysis. - bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS { - if (depth >= kMaxAllocRecordStackDepth) { - return false; - } - ArtMethod* m = GetMethod(); - if (!m->IsRuntimeMethod()) { - record->StackElement(depth)->SetMethod(m); - record->StackElement(depth)->SetDexPc(GetDexPc()); - ++depth; - } - return true; - } - - ~AllocRecordStackVisitor() { - // Clear out any unused stack trace elements. - for (; depth < kMaxAllocRecordStackDepth; ++depth) { - record->StackElement(depth)->SetMethod(nullptr); - record->StackElement(depth)->SetDexPc(0); - } - } - - AllocRecord* record; - size_t depth; -}; - -void Dbg::RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - // In the process of shutting down recording, bail. - return; - } - - // Advance and clip. - if (++alloc_record_head_ == alloc_record_max_) { - alloc_record_head_ = 0; - } - - // Fill in the basics. - AllocRecord* record = &recent_allocation_records_[alloc_record_head_]; - record->SetType(type); - record->SetByteCount(byte_count); - record->SetThinLockId(self->GetThreadId()); - - // Fill in the stack trace. - AllocRecordStackVisitor visitor(self, record); - visitor.WalkStack(); - - if (alloc_record_count_ < alloc_record_max_) { - ++alloc_record_count_; - } -} - -// Returns the index of the head element. -// -// We point at the most-recently-written record, so if alloc_record_count_ is 1 -// we want to use the current element. Take "head+1" and subtract count -// from it. -// -// We need to handle underflow in our circular buffer, so we add -// alloc_record_max_ and then mask it back down. -size_t Dbg::HeadIndex() { - return (Dbg::alloc_record_head_ + 1 + Dbg::alloc_record_max_ - Dbg::alloc_record_count_) & - (Dbg::alloc_record_max_ - 1); + gc::AllocRecordObjectMap::SetAllocTrackingEnabled(enable); } void Dbg::DumpRecentAllocations() { ScopedObjectAccess soa(Thread::Current()); MutexLock mu(soa.Self(), *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { + if (!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { LOG(INFO) << "Not recording tracked allocations"; return; } + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); - // "i" is the head of the list. We want to start at the end of the - // list and move forward to the tail. - size_t i = HeadIndex(); - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - LOG(INFO) << "Tracked allocations, (head=" << alloc_record_head_ << " count=" << count << ")"; - while (count--) { - AllocRecord* record = &recent_allocation_records_[i]; + LOG(INFO) << "Tracked allocations, (count=" << count << ")"; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; - LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->ThinLockId(), record->ByteCount()) - << PrettyClass(record->Type()); + LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount()) + << PrettyClass(it->first.Read()->GetClass()); - for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) { - AllocRecordStackTraceElement* stack_element = record->StackElement(stack_frame); - ArtMethod* m = stack_element->Method(); - if (m == nullptr) { - break; - } - LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element->LineNumber(); + for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) { + const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame); + ArtMethod* m = stack_element.GetMethod(); + LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element.ComputeLineNumber(); } // pause periodically to help logcat catch up if ((count % 5) == 0) { usleep(40000); } - - i = (i + 1) & (alloc_record_max_ - 1); } } @@ -4937,6 +4847,15 @@ jbyteArray Dbg::GetRecentAllocations() { std::vector<uint8_t> bytes; { MutexLock mu(self, *Locks::alloc_tracker_lock_); + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + // In case this method is called when allocation tracker is disabled, + // we should still send some data back. + gc::AllocRecordObjectMap dummy; + if (records == nullptr) { + CHECK(!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()); + records = &dummy; + } + // // Part 1: generate string tables. // @@ -4944,26 +4863,23 @@ jbyteArray Dbg::GetRecentAllocations() { StringTable method_names; StringTable filenames; - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - size_t idx = HeadIndex(); - while (count--) { - AllocRecord* record = &recent_allocation_records_[idx]; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; std::string temp; - class_names.Add(record->Type()->GetDescriptor(&temp)); - for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) { - ArtMethod* m = record->StackElement(i)->Method(); - if (m != nullptr) { - class_names.Add(m->GetDeclaringClassDescriptor()); - method_names.Add(m->GetName()); - filenames.Add(GetMethodSourceFile(m)); - } + class_names.Add(it->first.Read()->GetClass()->GetDescriptor(&temp)); + for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) { + ArtMethod* m = record->StackElement(i).GetMethod(); + class_names.Add(m->GetDeclaringClassDescriptor()); + method_names.Add(m->GetName()); + filenames.Add(GetMethodSourceFile(m)); } - - idx = (idx + 1) & (alloc_record_max_ - 1); } - LOG(INFO) << "allocation records: " << capped_count; + LOG(INFO) << "recent allocation records: " << capped_count; + LOG(INFO) << "allocation records all objects: " << records->Size(); // // Part 2: Generate the output and store it in the buffer. @@ -4991,20 +4907,23 @@ jbyteArray Dbg::GetRecentAllocations() { JDWP::Append2BE(bytes, method_names.Size()); JDWP::Append2BE(bytes, filenames.Size()); - idx = HeadIndex(); std::string temp; - for (count = capped_count; count != 0; --count) { + count = capped_count; + // The last "count" number of allocation records in "records" are the most recent "count" number + // of allocations. Reverse iterate to get them. The most recent allocation is sent first. + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { // For each entry: // (4b) total allocation size // (2b) thread id // (2b) allocated object's class name index // (1b) stack depth - AllocRecord* record = &recent_allocation_records_[idx]; + const gc::AllocRecord* record = it->second; size_t stack_depth = record->GetDepth(); size_t allocated_object_class_name_index = - class_names.IndexOf(record->Type()->GetDescriptor(&temp)); + class_names.IndexOf(it->first.Read()->GetClass()->GetDescriptor(&temp)); JDWP::Append4BE(bytes, record->ByteCount()); - JDWP::Append2BE(bytes, record->ThinLockId()); + JDWP::Append2BE(bytes, static_cast<uint16_t>(record->GetTid())); JDWP::Append2BE(bytes, allocated_object_class_name_index); JDWP::Append1BE(bytes, stack_depth); @@ -5014,16 +4933,15 @@ jbyteArray Dbg::GetRecentAllocations() { // (2b) method name // (2b) method source file // (2b) line number, clipped to 32767; -2 if native; -1 if no source - ArtMethod* m = record->StackElement(stack_frame)->Method(); + ArtMethod* m = record->StackElement(stack_frame).GetMethod(); size_t class_name_index = class_names.IndexOf(m->GetDeclaringClassDescriptor()); size_t method_name_index = method_names.IndexOf(m->GetName()); size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(m)); JDWP::Append2BE(bytes, class_name_index); JDWP::Append2BE(bytes, method_name_index); JDWP::Append2BE(bytes, file_name_index); - JDWP::Append2BE(bytes, record->StackElement(stack_frame)->LineNumber()); + JDWP::Append2BE(bytes, record->StackElement(stack_frame).ComputeLineNumber()); } - idx = (idx + 1) & (alloc_record_max_ - 1); } // (xb) class name strings diff --git a/runtime/debugger.h b/runtime/debugger.h index 7c586a4ff9..fd7d46c37e 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -23,7 +23,6 @@ #include <pthread.h> -#include <map> #include <set> #include <string> #include <vector> @@ -32,7 +31,6 @@ #include "jdwp/jdwp.h" #include "jni.h" #include "jvalue.h" -#include "object_callbacks.h" #include "thread_state.h" namespace art { @@ -41,10 +39,10 @@ class Class; class Object; class Throwable; } // namespace mirror -class AllocRecord; class ArtField; class ArtMethod; class ObjectRegistry; +class ScopedObjectAccess; class ScopedObjectAccessUnchecked; class StackVisitor; class Thread; @@ -53,33 +51,32 @@ class Thread; * Invoke-during-breakpoint support. */ struct DebugInvokeReq { - DebugInvokeReq(mirror::Object* invoke_receiver, mirror::Class* invoke_class, + DebugInvokeReq(uint32_t invoke_request_id, JDWP::ObjectId invoke_thread_id, + mirror::Object* invoke_receiver, mirror::Class* invoke_class, ArtMethod* invoke_method, uint32_t invoke_options, - uint64_t* args, uint32_t args_count) - : receiver(invoke_receiver), klass(invoke_class), method(invoke_method), - arg_count(args_count), arg_values(args), options(invoke_options), - error(JDWP::ERR_NONE), result_tag(JDWP::JT_VOID), result_value(0), exception(0), - lock("a DebugInvokeReq lock", kBreakpointInvokeLock), - cond("a DebugInvokeReq condition variable", lock) { + uint64_t args[], uint32_t args_count) + : request_id(invoke_request_id), thread_id(invoke_thread_id), receiver(invoke_receiver), + klass(invoke_class), method(invoke_method), arg_count(args_count), arg_values(args), + options(invoke_options), reply(JDWP::expandBufAlloc()) { } - /* request */ - GcRoot<mirror::Object> receiver; // not used for ClassType.InvokeMethod + ~DebugInvokeReq() { + JDWP::expandBufFree(reply); + } + + // Request + const uint32_t request_id; + const JDWP::ObjectId thread_id; + GcRoot<mirror::Object> receiver; // not used for ClassType.InvokeMethod. GcRoot<mirror::Class> klass; - ArtMethod* method; + ArtMethod* const method; const uint32_t arg_count; - uint64_t* const arg_values; // will be null if arg_count_ == 0 + std::unique_ptr<uint64_t[]> arg_values; // will be null if arg_count_ == 0. We take ownership + // of this array so we must delete it upon destruction. const uint32_t options; - /* result */ - JDWP::JdwpError error; - JDWP::JdwpTag result_tag; - uint64_t result_value; // either a primitive value or an ObjectId - JDWP::ObjectId exception; - - /* condition variable to wait on while the method executes */ - Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER; - ConditionVariable cond GUARDED_BY(lock); + // Reply + JDWP::ExpandBuf* const reply; void VisitRoots(RootVisitor* visitor, const RootInfo& root_info) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -202,19 +199,6 @@ std::ostream& operator<<(std::ostream& os, const DeoptimizationRequest::Kind& rh class Dbg { public: - class TypeCache { - public: - // Returns a weak global for the input type. Deduplicates. - jobject Add(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - // Clears the type cache and deletes all the weak global refs. - void Clear() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - - private: - std::multimap<int32_t, jobject> objects_; - }; - static void SetJdwpAllowed(bool allowed); static void StartJdwp(); @@ -621,19 +605,39 @@ class Dbg { LOCKS_EXCLUDED(Locks::thread_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - // Invoke support for commands ClassType.InvokeMethod, ClassType.NewInstance and - // ObjectReference.InvokeMethod. - static JDWP::JdwpError InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id, - JDWP::RefTypeId class_id, JDWP::MethodId method_id, - uint32_t arg_count, uint64_t* arg_values, - JDWP::JdwpTag* arg_types, uint32_t options, - JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, - JDWP::ObjectId* pExceptObj) + /* + * Invoke support + */ + + // Called by the JDWP thread to prepare invocation in the event thread (suspended on an event). + // If the information sent by the debugger is incorrect, it will send a reply with the + // appropriate error code. Otherwise, it will attach a DebugInvokeReq object to the event thread + // and resume it (and possibly other threads depending on the invoke options). + // Unlike other commands, the JDWP thread will not send the reply to the debugger (see + // JdwpState::ProcessRequest). The reply will be sent by the event thread itself after method + // invocation completes (see FinishInvokeMethod). This is required to allow the JDWP thread to + // process incoming commands from the debugger while the invocation is still in progress in the + // event thread, especially if it gets suspended by a debug event occurring in another thread. + static JDWP::JdwpError PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id, + JDWP::ObjectId object_id, JDWP::RefTypeId class_id, + JDWP::MethodId method_id, uint32_t arg_count, + uint64_t arg_values[], JDWP::JdwpTag* arg_types, + uint32_t options) LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Called by the event thread to execute a method prepared by the JDWP thread in the given + // DebugInvokeReq object. Once the invocation completes, the event thread attaches a reply + // to that DebugInvokeReq object so it can be sent to the debugger only when the event thread + // is ready to suspend (see FinishInvokeMethod). static void ExecuteMethod(DebugInvokeReq* pReq); + // Called by the event thread to send the reply of the invoke (created in ExecuteMethod) + // before suspending itself. This is to ensure the thread is ready to suspend before the + // debugger receives the reply. + static void FinishInvokeMethod(DebugInvokeReq* pReq); + /* * DDM support. */ @@ -655,19 +659,12 @@ class Dbg { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); /* - * Recent allocation tracking support. + * Allocation tracking support. */ - static void RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) - LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); - static bool IsAllocTrackingEnabled() { - return recent_allocation_records_ != nullptr; - } static jbyteArray GetRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); static void DumpRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); enum HpifWhen { @@ -717,6 +714,14 @@ class Dbg { } private: + static void ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + static void BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id, + JDWP::JdwpTag result_tag, uint64_t result_value, + JDWP::ObjectId exception) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor, ScopedObjectAccessUnchecked& soa, int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t width) @@ -755,11 +760,6 @@ class Dbg { static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Indicates whether the debugger is making requests. static bool gDebuggerActive; @@ -784,9 +784,6 @@ class Dbg { static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event); - // Weak global type cache, TODO improve this. - static TypeCache type_cache_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Instrumentation event reference counters. // TODO we could use an array instead of having all these dedicated counters. Instrumentation // events are bits of a mask so we could convert them to array index. @@ -798,7 +795,6 @@ class Dbg { static size_t exception_catch_event_ref_count_ GUARDED_BY(Locks::deoptimization_lock_); static uint32_t instrumentation_events_ GUARDED_BY(Locks::mutator_lock_); - friend class AllocRecord; // For type_cache_ with proper annotalysis. DISALLOW_COPY_AND_ASSIGN(Dbg); }; diff --git a/runtime/dex_file.h b/runtime/dex_file.h index d017601565..7ac264a0c5 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -264,13 +264,18 @@ class DexFile { // Raw code_item. struct CodeItem { - uint16_t registers_size_; - uint16_t ins_size_; - uint16_t outs_size_; - uint16_t tries_size_; - uint32_t debug_info_off_; // file offset to debug info stream + uint16_t registers_size_; // the number of registers used by this code + // (locals + parameters) + uint16_t ins_size_; // the number of words of incoming arguments to the method + // that this code is for + uint16_t outs_size_; // the number of words of outgoing argument space required + // by this code for method invocation + uint16_t tries_size_; // the number of try_items for this instance. If non-zero, + // then these appear as the tries array just after the + // insns in this instance. + uint32_t debug_info_off_; // file offset to debug info stream uint32_t insns_size_in_code_units_; // size of the insns array, in 2 byte code units - uint16_t insns_[1]; + uint16_t insns_[1]; // actual array of bytecode. private: DISALLOW_COPY_AND_ASSIGN(CodeItem); diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index a66c38e0fe..5fa58f754f 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -465,7 +465,9 @@ bool DexFileVerifier::CheckClassDataItemField(uint32_t idx, uint32_t access_flag } bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, - uint32_t code_offset, bool expect_direct) { + uint32_t code_offset, + std::unordered_set<uint32_t>& direct_method_indexes, + bool expect_direct) { if (!CheckIndex(idx, header_->method_ids_size_, "class_data_item method_idx")) { return false; } @@ -480,6 +482,13 @@ bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx, uint32_t access_fla return false; } + if (expect_direct) { + direct_method_indexes.insert(idx); + } else if (direct_method_indexes.find(idx) != direct_method_indexes.end()) { + ErrorStringPrintf("Found virtual method with same index as direct method: %d", idx); + return false; + } + constexpr uint32_t access_method_mask = kAccJavaFlagsMask | kAccConstructor | kAccDeclaredSynchronized; if (UNLIKELY(((access_flags & ~access_method_mask) != 0) || @@ -682,6 +691,7 @@ bool DexFileVerifier::CheckEncodedAnnotation() { bool DexFileVerifier::CheckIntraClassDataItem() { ClassDataItemIterator it(*dex_file_, ptr_); + std::unordered_set<uint32_t> direct_method_indexes; // These calls use the raw access flags to check whether the whole dex field is valid. @@ -697,13 +707,13 @@ bool DexFileVerifier::CheckIntraClassDataItem() { } for (; it.HasNextDirectMethod(); it.Next()) { if (!CheckClassDataItemMethod(it.GetMemberIndex(), it.GetRawMemberAccessFlags(), - it.GetMethodCodeItemOffset(), true)) { + it.GetMethodCodeItemOffset(), direct_method_indexes, true)) { return false; } } for (; it.HasNextVirtualMethod(); it.Next()) { if (!CheckClassDataItemMethod(it.GetMemberIndex(), it.GetRawMemberAccessFlags(), - it.GetMethodCodeItemOffset(), false)) { + it.GetMethodCodeItemOffset(), direct_method_indexes, false)) { return false; } } diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h index 877dfc2efd..ccc40d4442 100644 --- a/runtime/dex_file_verifier.h +++ b/runtime/dex_file_verifier.h @@ -59,6 +59,7 @@ class DexFileVerifier { uint32_t* handler_offsets, uint32_t handlers_size); bool CheckClassDataItemField(uint32_t idx, uint32_t access_flags, bool expect_static); bool CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, uint32_t code_offset, + std::unordered_set<uint32_t>& direct_method_indexes, bool expect_direct); bool CheckPadding(size_t offset, uint32_t aligned_offset); bool CheckEncodedValue(); diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index b0cbd02880..de925b7e8c 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -39,9 +39,12 @@ namespace art { inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, - uint32_t method_index, - InvokeType invoke_type) + const InlineInfo& inline_info, + uint8_t inlining_depth) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uint32_t method_index = inline_info.GetMethodIndexAtDepth(inlining_depth); + InvokeType invoke_type = static_cast<InvokeType>( + inline_info.GetInvokeTypeAtDepth(inlining_depth)); ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); if (!caller->IsRuntimeMethod()) { return caller; @@ -51,10 +54,19 @@ inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, // the stub that will then update the dex cache. Therefore, we need to do the // resolution ourselves. + // We first find the class loader of our caller. If it is the outer method, we can directly + // use its class loader. Otherwise, we also need to resolve our caller. StackHandleScope<2> hs(Thread::Current()); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle(outer_method->GetClassLoader())); + MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr)); Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache())); + if (inlining_depth == 0) { + class_loader.Assign(outer_method->GetClassLoader()); + } else { + caller = GetResolvedMethod(outer_method, inline_info, inlining_depth - 1); + class_loader.Assign(caller->GetClassLoader()); + } + return class_linker->ResolveMethod( *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); } @@ -82,10 +94,7 @@ inline ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, DCHECK(stack_map.IsValid()); if (stack_map.HasInlineInfo(encoding)) { InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - uint32_t method_index = inline_info.GetMethodIndexAtDepth(inline_info.GetDepth() - 1); - InvokeType invoke_type = static_cast<InvokeType>( - inline_info.GetInvokeTypeAtDepth(inline_info.GetDepth() - 1)); - caller = GetResolvedMethod(outer_method, method_index, invoke_type); + caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); } } diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index 3d42ea09d9..f1b54459df 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -36,6 +36,7 @@ extern "C" NO_RETURN void artDeoptimize(Thread* self) SHARED_LOCKS_REQUIRED(Lock self->Dump(LOG(INFO)); } + self->PushAndClearDeoptimizationReturnValue(); self->SetException(Thread::GetDeoptimizationException()); self->QuickDeliverException(); } diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index cc83db1e59..4f76ebdd40 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -676,7 +676,7 @@ extern "C" uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, ArtMethod* caller = QuickArgumentVisitor::GetCallingMethod(sp); if (UNLIKELY(Dbg::IsForcedInterpreterNeededForUpcall(self, caller))) { self->SetException(Thread::GetDeoptimizationException()); - self->SetDeoptimizationReturnValue(result); + self->SetDeoptimizationReturnValue(result, shorty[0] == 'L'); } // No need to restore the args since the method has already been run by the interpreter. diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 963dd0265f..0a5ebfa81b 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -72,6 +72,8 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4); EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4); EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count, handling_signal_, 4); + EXPECT_OFFSET_DIFFP(Thread, tls32_, handling_signal_, + deoptimization_return_value_is_reference, 4); // TODO: Better connection. Take alignment into account. EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4, @@ -103,11 +105,11 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, long_jump_context, instrumentation_stack, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, instrumentation_stack, debug_invoke_req, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, debug_invoke_req, single_step_control, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, deoptimization_shadow_frame, + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, single_step_control, stacked_shadow_frame_record, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_shadow_frame, - shadow_frame_under_construction, sizeof(void*)); - EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, shadow_frame_under_construction, name, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, stacked_shadow_frame_record, + deoptimization_return_value_stack, sizeof(void*)); + EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, deoptimization_return_value_stack, name, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*)); EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause, sizeof(void*)); diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h index c16f5d35e0..006d2c7d30 100644 --- a/runtime/gc/accounting/space_bitmap-inl.h +++ b/runtime/gc/accounting/space_bitmap-inl.h @@ -159,6 +159,7 @@ template<size_t kAlignment> template<bool kSetBit> inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) { uintptr_t addr = reinterpret_cast<uintptr_t>(obj); DCHECK_GE(addr, heap_begin_); + DCHECK(HasAddress(obj)) << obj; const uintptr_t offset = addr - heap_begin_; const size_t index = OffsetToIndex(offset); const uintptr_t mask = OffsetToMask(offset); diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index fe2b284fcb..6546eb4245 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -35,6 +35,11 @@ size_t SpaceBitmap<kAlignment>::ComputeBitmapSize(uint64_t capacity) { } template<size_t kAlignment> +size_t SpaceBitmap<kAlignment>::ComputeHeapSize(uint64_t bitmap_bytes) { + return bitmap_bytes * kBitsPerByte * kAlignment; +} + +template<size_t kAlignment> SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap( const std::string& name, MemMap* mem_map, uint8_t* heap_begin, size_t heap_capacity) { CHECK(mem_map != nullptr); diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index d6b3ed4f26..35faff3774 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -188,15 +188,16 @@ class SpaceBitmap { std::string Dump() const; + // Helper function for computing bitmap size based on a 64 bit capacity. + static size_t ComputeBitmapSize(uint64_t capacity); + static size_t ComputeHeapSize(uint64_t bitmap_bytes); + private: // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1, // however, we document that this is expected on heap_end_ SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size, const void* heap_begin); - // Helper function for computing bitmap size based on a 64 bit capacity. - static size_t ComputeBitmapSize(uint64_t capacity); - template<bool kSetBit> bool Modify(const mirror::Object* obj); diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc new file mode 100644 index 0000000000..a385363428 --- /dev/null +++ b/runtime/gc/allocation_record.cc @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "allocation_record.h" + +#include "art_method-inl.h" +#include "base/stl_util.h" +#include "stack.h" + +#ifdef HAVE_ANDROID_OS +#include "cutils/properties.h" +#endif + +namespace art { +namespace gc { + +int32_t AllocRecordStackTraceElement::ComputeLineNumber() const { + DCHECK(method_ != nullptr); + return method_->GetLineNumFromDexPC(dex_pc_); +} + +void AllocRecordObjectMap::SetProperties() { +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the max number of records. + const char* propertyName = "dalvik.vm.allocTrackerMax"; + char allocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, allocMaxString, "") > 0) { + char* end; + size_t value = strtoul(allocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << allocMaxString + << "' --- invalid"; + } else { + alloc_record_max_ = value; + } + } + // Check whether there's a system property overriding the max depth of stack trace. + propertyName = "dalvik.vm.allocStackDepth"; + char stackDepthString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, stackDepthString, "") > 0) { + char* end; + size_t value = strtoul(stackDepthString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << stackDepthString + << "' --- invalid"; + } else { + max_stack_depth_ = value; + } + } +#endif +} + +AllocRecordObjectMap::~AllocRecordObjectMap() { + STLDeleteValues(&entries_); +} + +void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) { + VLOG(heap) << "Start SweepAllocationRecords()"; + size_t count_deleted = 0, count_moved = 0; + for (auto it = entries_.begin(), end = entries_.end(); it != end;) { + // This does not need a read barrier because this is called by GC. + mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>(); + AllocRecord* record = it->second; + mirror::Object* new_object = callback(old_object, arg); + if (new_object == nullptr) { + delete record; + it = entries_.erase(it); + ++count_deleted; + } else { + if (old_object != new_object) { + it->first = GcRoot<mirror::Object>(new_object); + ++count_moved; + } + ++it; + } + } + VLOG(heap) << "Deleted " << count_deleted << " allocation records"; + VLOG(heap) << "Updated " << count_moved << " allocation records"; +} + +struct AllocRecordStackVisitor : public StackVisitor { + AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + trace(trace_in), + depth(0), + max_depth(max) {} + + // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses + // annotalysis. + bool VisitFrame() OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + if (depth >= max_depth) { + return false; + } + ArtMethod* m = GetMethod(); + if (!m->IsRuntimeMethod()) { + trace->SetStackElementAt(depth, m, GetDexPc()); + ++depth; + } + return true; + } + + ~AllocRecordStackVisitor() { + trace->SetDepth(depth); + } + + AllocRecordStackTrace* trace; + size_t depth; + const size_t max_depth; +}; + +void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) { + Thread* self = Thread::Current(); + Heap* heap = Runtime::Current()->GetHeap(); + if (enable) { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (heap->IsAllocTrackingEnabled()) { + return; // Already enabled, bail. + } + AllocRecordObjectMap* records = new AllocRecordObjectMap(); + CHECK(records != nullptr); + records->SetProperties(); + std::string self_name; + self->GetThreadName(self_name); + if (self_name == "JDWP") { + records->alloc_ddm_thread_id_ = self->GetTid(); + } + size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ + + sizeof(AllocRecord) + sizeof(AllocRecordStackTrace); + LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of " + << records->max_stack_depth_ << " frames, taking up to " + << PrettySize(sz * records->alloc_record_max_) << ")"; + heap->SetAllocationRecords(records); + heap->SetAllocTrackingEnabled(true); + } + Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); + } else { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (!heap->IsAllocTrackingEnabled()) { + return; // Already disabled, bail. + } + heap->SetAllocTrackingEnabled(false); + LOG(INFO) << "Disabling alloc tracker"; + heap->SetAllocationRecords(nullptr); + } + // If an allocation comes in before we uninstrument, we will safely drop it on the floor. + Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); + } +} + +void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + Heap* heap = Runtime::Current()->GetHeap(); + if (!heap->IsAllocTrackingEnabled()) { + // In the process of shutting down recording, bail. + return; + } + + AllocRecordObjectMap* records = heap->GetAllocationRecords(); + DCHECK(records != nullptr); + + // Do not record for DDM thread + if (records->alloc_ddm_thread_id_ == self->GetTid()) { + return; + } + + DCHECK_LE(records->Size(), records->alloc_record_max_); + + // Remove oldest record. + if (records->Size() == records->alloc_record_max_) { + records->RemoveOldest(); + } + + // Get stack trace. + const size_t max_depth = records->max_stack_depth_; + AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth); + // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_ + { + AllocRecordStackVisitor visitor(self, trace, max_depth); + visitor.WalkStack(); + } + + // Fill in the basics. + AllocRecord* record = new AllocRecord(byte_count, trace); + + records->Put(obj, record); + DCHECK_LE(records->Size(), records->alloc_record_max_); +} + +} // namespace gc +} // namespace art diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h new file mode 100644 index 0000000000..45b3406cea --- /dev/null +++ b/runtime/gc/allocation_record.h @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_ALLOCATION_RECORD_H_ +#define ART_RUNTIME_GC_ALLOCATION_RECORD_H_ + +#include <list> + +#include "base/mutex.h" +#include "object_callbacks.h" +#include "gc_root.h" + +namespace art { + +class ArtMethod; +class Thread; + +namespace mirror { + class Class; + class Object; +} + +namespace gc { + +class AllocRecordStackTraceElement { + public: + AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {} + + int32_t ComputeLineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + ArtMethod* GetMethod() const { + return method_; + } + + void SetMethod(ArtMethod* m) { + method_ = m; + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + void SetDexPc(uint32_t pc) { + dex_pc_ = pc; + } + + bool operator==(const AllocRecordStackTraceElement& other) const { + if (this == &other) return true; + return method_ == other.method_ && dex_pc_ == other.dex_pc_; + } + + private: + ArtMethod* method_; + uint32_t dex_pc_; +}; + +class AllocRecordStackTrace { + public: + static constexpr size_t kHashMultiplier = 17; + + AllocRecordStackTrace(pid_t tid, size_t max_depth) + : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {} + + ~AllocRecordStackTrace() { + delete[] stack_; + } + + pid_t GetTid() const { + return tid_; + } + + size_t GetDepth() const { + return depth_; + } + + void SetDepth(size_t depth) { + depth_ = depth; + } + + const AllocRecordStackTraceElement& GetStackElement(size_t index) const { + DCHECK_LT(index, depth_); + return stack_[index]; + } + + void SetStackElementAt(size_t index, ArtMethod* m, uint32_t dex_pc) { + stack_[index].SetMethod(m); + stack_[index].SetDexPc(dex_pc); + } + + bool operator==(const AllocRecordStackTrace& other) const { + if (this == &other) return true; + if (depth_ != other.depth_) return false; + for (size_t i = 0; i < depth_; ++i) { + if (!(stack_[i] == other.stack_[i])) return false; + } + return true; + } + + private: + const pid_t tid_; + size_t depth_; + AllocRecordStackTraceElement* const stack_; +}; + +struct HashAllocRecordTypes { + size_t operator()(const AllocRecordStackTraceElement& r) const { + return std::hash<void*>()(reinterpret_cast<void*>(r.GetMethod())) * + AllocRecordStackTrace::kHashMultiplier + std::hash<uint32_t>()(r.GetDexPc()); + } + + size_t operator()(const AllocRecordStackTrace& r) const { + size_t depth = r.GetDepth(); + size_t result = r.GetTid() * AllocRecordStackTrace::kHashMultiplier + depth; + for (size_t i = 0; i < depth; ++i) { + result = result * AllocRecordStackTrace::kHashMultiplier + (*this)(r.GetStackElement(i)); + } + return result; + } +}; + +template <typename T> struct HashAllocRecordTypesPtr { + size_t operator()(const T* r) const { + if (r == nullptr) return 0; + return HashAllocRecordTypes()(*r); + } +}; + +template <typename T> struct EqAllocRecordTypesPtr { + bool operator()(const T* r1, const T* r2) const { + if (r1 == r2) return true; + if (r1 == nullptr || r2 == nullptr) return false; + return *r1 == *r2; + } +}; + +class AllocRecord { + public: + // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap. + AllocRecord(size_t count, AllocRecordStackTrace* trace) + : byte_count_(count), trace_(trace) {} + + ~AllocRecord() { + delete trace_; + } + + size_t GetDepth() const { + return trace_->GetDepth(); + } + + const AllocRecordStackTrace* GetStackTrace() const { + return trace_; + } + + size_t ByteCount() const { + return byte_count_; + } + + pid_t GetTid() const { + return trace_->GetTid(); + } + + const AllocRecordStackTraceElement& StackElement(size_t index) const { + return trace_->GetStackElement(index); + } + + private: + const size_t byte_count_; + // TODO: Currently trace_ is like a std::unique_ptr, + // but in future with deduplication it could be a std::shared_ptr. + const AllocRecordStackTrace* const trace_; +}; + +class AllocRecordObjectMap { + public: + // Since the entries contain weak roots, they need a read barrier. Do not directly access + // the mirror::Object pointers in it. Use functions that contain read barriers. + // No need for "const AllocRecord*" in the list, because all fields of AllocRecord are const. + typedef std::list<std::pair<GcRoot<mirror::Object>, AllocRecord*>> EntryList; + + // "static" because it is part of double-checked locking. It needs to check a bool first, + // in order to make sure the AllocRecordObjectMap object is not null. + static void RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) + : alloc_record_max_(kDefaultNumAllocRecords), + max_stack_depth_(kDefaultAllocStackDepth), + alloc_ddm_thread_id_(0) {} + + ~AllocRecordObjectMap(); + + void Put(mirror::Object* obj, AllocRecord* record) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + entries_.emplace_back(GcRoot<mirror::Object>(obj), record); + } + + size_t Size() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.size(); + } + + void SweepAllocationRecords(IsMarkedCallback* callback, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void RemoveOldest() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + DCHECK(!entries_.empty()); + delete entries_.front().second; + entries_.pop_front(); + } + + // TODO: Is there a better way to hide the entries_'s type? + EntryList::iterator Begin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.begin(); + } + + EntryList::iterator End() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.end(); + } + + EntryList::reverse_iterator RBegin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rbegin(); + } + + EntryList::reverse_iterator REnd() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rend(); + } + + private: + static constexpr size_t kDefaultNumAllocRecords = 512 * 1024; + static constexpr size_t kDefaultAllocStackDepth = 4; + size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); + // The implementation always allocates max_stack_depth_ number of frames for each stack trace. + // As long as the max depth is not very large, this is not a waste of memory since most stack + // traces will fill up the max depth number of the frames. + size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_); + pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_); + EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_); + + void SetProperties() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); +}; + +} // namespace gc +} // namespace art +#endif // ART_RUNTIME_GC_ALLOCATION_RECORD_H_ diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 2d5433032d..ee4568ecea 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -22,6 +22,7 @@ #include "base/time_utils.h" #include "debugger.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/collector/semi_space.h" #include "gc/space/bump_pointer_space-inl.h" #include "gc/space/dlmalloc_space-inl.h" @@ -168,11 +169,11 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas PushOnAllocationStack(self, &obj); } if (kInstrumented) { - if (Dbg::IsAllocTrackingEnabled()) { - Dbg::RecordAllocation(self, klass, bytes_allocated); + if (IsAllocTrackingEnabled()) { + AllocRecordObjectMap::RecordAllocation(self, obj, bytes_allocated); } } else { - DCHECK(!Dbg::IsAllocTrackingEnabled()); + DCHECK(!IsAllocTrackingEnabled()); } // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 20e791d9f2..22207ee21c 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -110,6 +110,9 @@ static constexpr size_t kVerifyObjectAllocationStackSize = 16 * KB / sizeof(mirror::HeapReference<mirror::Object>); static constexpr size_t kDefaultAllocationStackSize = 8 * MB / sizeof(mirror::HeapReference<mirror::Object>); +// System.runFinalization can deadlock with native allocations, to deal with this, we have a +// timeout on how long we wait for finalizers to run. b/21544853 +static constexpr uint64_t kNativeAllocationFinalizeTimeout = MsToNs(250u); Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free, double target_utilization, double foreground_heap_growth_multiplier, @@ -206,7 +209,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max blocking_gc_count_last_window_(0U), gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount), blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U, - kGcCountRateMaxBucketCount) { + kGcCountRateMaxBucketCount), + alloc_tracking_enabled_(false) { if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; } @@ -232,10 +236,11 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity; } if (!image_file_name.empty()) { + ATRACE_BEGIN("ImageSpace::Create"); std::string error_msg; - space::ImageSpace* image_space = space::ImageSpace::Create(image_file_name.c_str(), - image_instruction_set, - &error_msg); + auto* image_space = space::ImageSpace::Create(image_file_name.c_str(), image_instruction_set, + &error_msg); + ATRACE_END(); if (image_space != nullptr) { AddSpace(image_space); // Oat files referenced by image files immediately follow them in memory, ensure alloc space @@ -287,6 +292,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max } std::string error_str; std::unique_ptr<MemMap> non_moving_space_mem_map; + ATRACE_BEGIN("Create heap maps"); if (separate_non_moving_space) { // If we are the zygote, the non moving space becomes the zygote space when we run // PreZygoteFork the first time. In this case, call the map "zygote space" since we can't @@ -323,6 +329,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max capacity_, &error_str)); CHECK(main_mem_map_2.get() != nullptr) << error_str; } + ATRACE_END(); + ATRACE_BEGIN("Create spaces"); // Create the non moving space first so that bitmaps don't take up the address range. if (separate_non_moving_space) { // Non moving space is always dlmalloc since we currently don't have support for multiple @@ -340,7 +348,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max if (foreground_collector_type_ == kCollectorTypeCC) { region_space_ = space::RegionSpace::Create("Region space", capacity_ * 2, request_begin); AddSpace(region_space_); - } else if (IsMovingGc(foreground_collector_type_) && foreground_collector_type_ != kCollectorTypeGSS) { + } else if (IsMovingGc(foreground_collector_type_) && + foreground_collector_type_ != kCollectorTypeGSS) { // Create bump pointer spaces. // We only to create the bump pointer if the foreground collector is a compacting GC. // TODO: Place bump-pointer spaces somewhere to minimize size of card table. @@ -411,10 +420,12 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max if (main_space_backup_.get() != nullptr) { RemoveSpace(main_space_backup_.get()); } + ATRACE_END(); // Allocate the card table. + ATRACE_BEGIN("Create card table"); card_table_.reset(accounting::CardTable::Create(heap_begin, heap_capacity)); CHECK(card_table_.get() != nullptr) << "Failed to create card table"; - + ATRACE_END(); if (foreground_collector_type_ == kCollectorTypeCC && kUseTableLookupReadBarrier) { rb_table_.reset(new accounting::ReadBarrierTable()); DCHECK(rb_table_->IsAllCleared()); @@ -990,6 +1001,27 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { BaseMutex::DumpAll(os); } +void Heap::ResetGcPerformanceInfo() { + for (auto& collector : garbage_collectors_) { + collector->ResetMeasurements(); + } + total_allocation_time_.StoreRelaxed(0); + total_bytes_freed_ever_ = 0; + total_objects_freed_ever_ = 0; + total_wait_time_ = 0; + blocking_gc_count_ = 0; + blocking_gc_time_ = 0; + gc_count_last_window_ = 0; + blocking_gc_count_last_window_ = 0; + last_update_time_gc_count_rate_histograms_ = // Round down by the window duration. + (NanoTime() / kGcCountRateHistogramWindowDuration) * kGcCountRateHistogramWindowDuration; + { + MutexLock mu(Thread::Current(), *gc_complete_lock_); + gc_count_rate_histogram_.Reset(); + blocking_gc_count_rate_histogram_.Reset(); + } +} + uint64_t Heap::GetGcCount() const { uint64_t gc_count = 0U; for (auto& collector : garbage_collectors_) { @@ -1033,6 +1065,7 @@ Heap::~Heap() { STLDeleteElements(&garbage_collectors_); // If we don't reset then the mark stack complains in its destructor. allocation_stack_->Reset(); + allocation_records_.reset(); live_stack_->Reset(); STLDeleteValues(&mod_union_tables_); STLDeleteValues(&remembered_sets_); @@ -3531,22 +3564,16 @@ bool Heap::IsGCRequestPending() const { return concurrent_gc_pending_.LoadRelaxed(); } -void Heap::RunFinalization(JNIEnv* env) { - // Can't do this in WellKnownClasses::Init since System is not properly set up at that point. - if (WellKnownClasses::java_lang_System_runFinalization == nullptr) { - CHECK(WellKnownClasses::java_lang_System != nullptr); - WellKnownClasses::java_lang_System_runFinalization = - CacheMethod(env, WellKnownClasses::java_lang_System, true, "runFinalization", "()V"); - CHECK(WellKnownClasses::java_lang_System_runFinalization != nullptr); - } - env->CallStaticVoidMethod(WellKnownClasses::java_lang_System, - WellKnownClasses::java_lang_System_runFinalization); +void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) { + env->CallStaticVoidMethod(WellKnownClasses::dalvik_system_VMRuntime, + WellKnownClasses::dalvik_system_VMRuntime_runFinalization, + static_cast<jlong>(timeout)); } void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { Thread* self = ThreadForEnv(env); if (native_need_to_run_finalization_) { - RunFinalization(env); + RunFinalization(env, kNativeAllocationFinalizeTimeout); UpdateMaxNativeFootprint(); native_need_to_run_finalization_ = false; } @@ -3562,7 +3589,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { if (new_native_bytes_allocated > growth_limit_) { if (WaitForGcToComplete(kGcCauseForNativeAlloc, self) != collector::kGcTypeNone) { // Just finished a GC, attempt to run finalizers. - RunFinalization(env); + RunFinalization(env, kNativeAllocationFinalizeTimeout); CHECK(!env->ExceptionCheck()); // Native bytes allocated may be updated by finalization, refresh it. new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed(); @@ -3570,7 +3597,7 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { // If we still are over the watermark, attempt a GC for alloc and run finalizers. if (new_native_bytes_allocated > growth_limit_) { CollectGarbageInternal(gc_type, kGcCauseForNativeAlloc, false); - RunFinalization(env); + RunFinalization(env, kNativeAllocationFinalizeTimeout); native_need_to_run_finalization_ = false; CHECK(!env->ExceptionCheck()); } @@ -3649,5 +3676,18 @@ void Heap::ClearMarkedObjects() { } } +void Heap::SetAllocationRecords(AllocRecordObjectMap* records) { + allocation_records_.reset(records); +} + +void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const { + if (IsAllocTrackingEnabled()) { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->SweepAllocationRecords(visitor, arg); + } + } +} + } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index c72414a1ab..18244c856b 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -58,6 +58,7 @@ namespace mirror { namespace gc { +class AllocRecordObjectMap; class ReferenceProcessor; class TaskProcessor; @@ -597,6 +598,7 @@ class Heap { // GC performance measuring void DumpGcPerformanceInfo(std::ostream& os); + void ResetGcPerformanceInfo(); // Returns true if we currently care about pause times. bool CareAboutPauseTimes() const { @@ -683,6 +685,27 @@ class Heap { void DumpGcCountRateHistogram(std::ostream& os) const; void DumpBlockingGcCountRateHistogram(std::ostream& os) const; + // Allocation tracking support + // Callers to this function use double-checked locking to ensure safety on allocation_records_ + bool IsAllocTrackingEnabled() const { + return alloc_tracking_enabled_.LoadRelaxed(); + } + + void SetAllocTrackingEnabled(bool enabled) EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + alloc_tracking_enabled_.StoreRelaxed(enabled); + } + + AllocRecordObjectMap* GetAllocationRecords() const + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return allocation_records_.get(); + } + + void SetAllocationRecords(AllocRecordObjectMap* records) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -776,8 +799,8 @@ class Heap { bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - // Run the finalizers. - void RunFinalization(JNIEnv* env); + // Run the finalizers. If timeout is non zero, then we use the VMRuntime version. + void RunFinalization(JNIEnv* env, uint64_t timeout); // Blocks the caller until the garbage collector becomes idle and returns the type of GC we // waited for. @@ -1191,6 +1214,11 @@ class Heap { // The histogram of the number of blocking GC invocations per window duration. Histogram<uint64_t> blocking_gc_count_rate_histogram_ GUARDED_BY(gc_complete_lock_); + // Allocation tracking support + Atomic<bool> alloc_tracking_enabled_; + std::unique_ptr<AllocRecordObjectMap> allocation_records_ + GUARDED_BY(Locks::alloc_tracker_lock_); + friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::MarkCompact; diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 437fd8c5c9..1923d24805 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -694,7 +694,7 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat const auto section_idx = static_cast<ImageHeader::ImageSections>(i); auto& section = image_header.GetImageSection(section_idx); LOG(INFO) << section_idx << " start=" - << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) + << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) << " " << section; } } @@ -730,9 +730,9 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename, bitmap_index)); std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap( - accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(), - reinterpret_cast<uint8_t*>(map->Begin()), - map->Size())); + accounting::ContinuousSpaceBitmap::CreateFromMemMap( + bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()), + accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size()))); if (bitmap.get() == nullptr) { *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str()); return nullptr; @@ -755,6 +755,7 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat DCHECK(!error_msg->empty()); return nullptr; } + space->oat_file_non_owned_ = space->oat_file_.get(); if (validate_oat_file && !space->ValidateOatFile(error_msg)) { DCHECK(!error_msg->empty()); @@ -838,10 +839,12 @@ bool ImageSpace::ValidateOatFile(std::string* error_msg) const { return true; } + const OatFile* ImageSpace::GetOatFile() const { - return oat_file_.get(); + return oat_file_non_owned_; } + OatFile* ImageSpace::ReleaseOatFile() { CHECK(oat_file_.get() != nullptr); return oat_file_.release(); diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 54dc7a61dd..93ff8aaff7 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -152,6 +152,10 @@ class ImageSpace : public MemMapSpace { // the ClassLinker during it's initialization. std::unique_ptr<OatFile> oat_file_; + // There are times when we need to find the boot image oat file. As + // we release ownership during startup, keep a non-owned reference. + const OatFile* oat_file_non_owned_; + const std::string image_location_; DISALLOW_COPY_AND_ASSIGN(ImageSpace); diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 6e0e56e82a..f32d5a1b81 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -48,6 +48,7 @@ #include "dex_file-inl.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" +#include "gc/allocation_record.h" #include "gc/heap.h" #include "gc/space/space.h" #include "globals.h" @@ -68,14 +69,13 @@ namespace hprof { static constexpr bool kDirectStream = true; static constexpr uint32_t kHprofTime = 0; -static constexpr uint32_t kHprofNullStackTrace = 0; static constexpr uint32_t kHprofNullThread = 0; static constexpr size_t kMaxObjectsPerSegment = 128; static constexpr size_t kMaxBytesPerSegment = 4096; // The static field-name for the synthetic object generated to account for class static overhead. -static constexpr const char* kStaticOverheadName = "$staticOverhead"; +static constexpr const char* kClassOverheadName = "$classOverhead"; enum HprofTag { HPROF_TAG_STRING = 0x01, @@ -144,6 +144,10 @@ enum HprofBasicType { typedef uint32_t HprofStringId; typedef uint32_t HprofClassObjectId; +typedef uint32_t HprofClassSerialNumber; +typedef uint32_t HprofStackTraceSerialNumber; +typedef uint32_t HprofStackFrameId; +static constexpr HprofStackTraceSerialNumber kHprofNullStackTrace = 0; class EndianOutput { public: @@ -194,6 +198,10 @@ class EndianOutput { AddU4(PointerToLowMemUInt32(value)); } + void AddStackTraceSerialNumber(HprofStackTraceSerialNumber value) { + AddU4(value); + } + // The ID for the synthetic object generated to account for class static overhead. void AddClassStaticsId(const mirror::Class* value) { AddU4(1 | PointerToLowMemUInt32(value)); @@ -415,13 +423,21 @@ class Hprof : public SingleRootVisitor { start_ns_(NanoTime()), current_heap_(HPROF_HEAP_DEFAULT), objects_in_segment_(0), - next_string_id_(0x400000) { + next_string_id_(0x400000), + next_class_serial_number_(1) { LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting..."; } void Dump() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) { + LOCKS_EXCLUDED(Locks::heap_bitmap_lock_, Locks::alloc_tracker_lock_) { + { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { + PopulateAllocationTrackingTraces(); + } + } + // First pass to measure the size of the dump. size_t overall_size; size_t max_length; @@ -480,11 +496,11 @@ class Hprof : public SingleRootVisitor { objects_in_segment_ = 0; if (header_first) { - ProcessHeader(); + ProcessHeader(true); ProcessBody(); } else { ProcessBody(); - ProcessHeader(); + ProcessHeader(false); } } @@ -501,21 +517,29 @@ class Hprof : public SingleRootVisitor { output_->EndRecord(); } - void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { + void ProcessHeader(bool string_first) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write the header. WriteFixedHeader(); // Write the string and class tables, and any stack traces, to the header. // (jhat requires that these appear before any of the data in the body that refers to them.) - WriteStringTable(); + // jhat also requires the string table appear before class table and stack traces. + // However, WriteStackTraces() can modify the string table, so it's necessary to call + // WriteStringTable() last in the first pass, to compute the correct length of the output. + if (string_first) { + WriteStringTable(); + } WriteClassTable(); WriteStackTraces(); + if (!string_first) { + WriteStringTable(); + } output_->EndRecord(); } void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - uint32_t nextSerialNumber = 1; - - for (mirror::Class* c : classes_) { + for (const auto& p : classes_) { + mirror::Class* c = p.first; + HprofClassSerialNumber sn = p.second; CHECK(c != nullptr); output_->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime); // LOAD CLASS format: @@ -523,9 +547,9 @@ class Hprof : public SingleRootVisitor { // ID: class object ID. We use the address of the class object structure as its ID. // U4: stack trace serial number // ID: class name string ID - __ AddU4(nextSerialNumber++); + __ AddU4(sn); __ AddObjectId(c); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(c)); __ AddStringId(LookupClassNameId(c)); } } @@ -567,15 +591,31 @@ class Hprof : public SingleRootVisitor { HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { if (c != nullptr) { - auto result = classes_.insert(c); - const mirror::Class* present = *result.first; - CHECK_EQ(present, c); - // Make sure that we've assigned a string ID for this class' name - LookupClassNameId(c); + auto it = classes_.find(c); + if (it == classes_.end()) { + // first time to see this class + HprofClassSerialNumber sn = next_class_serial_number_++; + classes_.Put(c, sn); + // Make sure that we've assigned a string ID for this class' name + LookupClassNameId(c); + } } return PointerToLowMemUInt32(c); } + HprofStackTraceSerialNumber LookupStackTraceSerialNumber(const mirror::Object* obj) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + auto r = allocation_records_.find(obj); + if (r == allocation_records_.end()) { + return kHprofNullStackTrace; + } else { + const gc::AllocRecordStackTrace* trace = r->second; + auto result = traces_.find(trace); + CHECK(result != traces_.end()); + return result->second; + } + } + HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return LookupStringId(string->ToModifiedUtf8()); } @@ -622,12 +662,66 @@ class Hprof : public SingleRootVisitor { __ AddU4(static_cast<uint32_t>(nowMs & 0xFFFFFFFF)); } - void WriteStackTraces() { + void WriteStackTraces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write a dummy stack trace record so the analysis tools don't freak out. output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(kHprofNullStackTrace); __ AddU4(kHprofNullThread); __ AddU4(0); // no frames + + // TODO: jhat complains "WARNING: Stack trace not found for serial # -1", but no trace should + // have -1 as its serial number (as long as HprofStackTraceSerialNumber doesn't overflow). + for (const auto& it : traces_) { + const gc::AllocRecordStackTrace* trace = it.first; + HprofStackTraceSerialNumber trace_sn = it.second; + size_t depth = trace->GetDepth(); + + // First write stack frames of the trace + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + ArtMethod* method = frame->GetMethod(); + CHECK(method != nullptr); + output_->StartNewRecord(HPROF_TAG_STACK_FRAME, kHprofTime); + // STACK FRAME format: + // ID: stack frame ID. We use the address of the AllocRecordStackTraceElement object as its ID. + // ID: method name string ID + // ID: method signature string ID + // ID: source file name string ID + // U4: class serial number + // U4: >0, line number; 0, no line information available; -1, unknown location + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + __ AddStringId(LookupStringId(method->GetName())); + __ AddStringId(LookupStringId(method->GetSignature().ToString())); + const char* source_file = method->GetDeclaringClassSourceFile(); + if (source_file == nullptr) { + source_file = ""; + } + __ AddStringId(LookupStringId(source_file)); + auto class_result = classes_.find(method->GetDeclaringClass()); + CHECK(class_result != classes_.end()); + __ AddU4(class_result->second); + __ AddU4(frame->ComputeLineNumber()); + } + + // Then write the trace itself + output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); + // STACK TRACE format: + // U4: stack trace serial number. We use the address of the AllocRecordStackTrace object as its serial number. + // U4: thread serial number. We use Thread::GetTid(). + // U4: number of frames + // [ID]*: series of stack frame ID's + __ AddStackTraceSerialNumber(trace_sn); + __ AddU4(trace->GetTid()); + __ AddU4(depth); + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + } + } } bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED) @@ -723,6 +817,40 @@ class Hprof : public SingleRootVisitor { return true; } + void PopulateAllocationTrackingTraces() + EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::alloc_tracker_lock_) { + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); + HprofStackTraceSerialNumber next_trace_sn = kHprofNullStackTrace + 1; + HprofStackFrameId next_frame_id = 0; + + for (auto it = records->Begin(), end = records->End(); it != end; ++it) { + const mirror::Object* obj = it->first.Read(); + const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace(); + + // Copy the pair into a real hash map to speed up look up. + auto records_result = allocation_records_.emplace(obj, trace); + // The insertion should always succeed, i.e. no duplicate object pointers in "records" + CHECK(records_result.second); + + // Generate serial numbers for traces, and IDs for frames. + auto traces_result = traces_.find(trace); + if (traces_result == traces_.end()) { + traces_.emplace(trace, next_trace_sn++); + // only check frames if the trace is newly discovered + for (size_t i = 0, depth = trace->GetDepth(); i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frames_result = frames_.find(frame); + if (frames_result == frames_.end()) { + frames_.emplace(frame, next_frame_id++); + } + } + } + } + CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1); + CHECK_EQ(frames_.size(), next_frame_id); + } + // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored. // Otherwise, "filename_" must be valid, though if "fd" >= 0 it will // only be used for debug messages. @@ -737,9 +865,18 @@ class Hprof : public SingleRootVisitor { HprofHeapId current_heap_; // Which heap we're currently dumping. size_t objects_in_segment_; - std::set<mirror::Class*> classes_; HprofStringId next_string_id_; SafeMap<std::string, HprofStringId> strings_; + HprofClassSerialNumber next_class_serial_number_; + SafeMap<mirror::Class*, HprofClassSerialNumber> classes_; + + std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber, + gc::HashAllocRecordTypesPtr<gc::AllocRecordStackTrace>, + gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTrace>> traces_; + std::unordered_map<const gc::AllocRecordStackTraceElement*, HprofStackFrameId, + gc::HashAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>, + gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>> frames_; + std::unordered_map<const mirror::Object*, const gc::AllocRecordStackTrace*> allocation_records_; DISALLOW_COPY_AND_ASSIGN(Hprof); }; @@ -881,10 +1018,6 @@ void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeap ++objects_in_segment_; } -static int StackTraceSerialNumber(const mirror::Object* /*obj*/) { - return kHprofNullStackTrace; -} - void Hprof::DumpHeapObject(mirror::Object* obj) { // Ignore classes that are retired. if (obj->IsClass() && obj->AsClass()->IsRetired()) { @@ -959,24 +1092,30 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { // Class is allocated but not yet loaded: we cannot access its fields or super class. return; } - size_t sFieldCount = klass->NumStaticFields(); - if (sFieldCount != 0) { - int byteLength = sFieldCount * sizeof(JValue); // TODO bogus; fields are packed + const size_t num_static_fields = klass->NumStaticFields(); + // Total class size including embedded IMT, embedded vtable, and static fields. + const size_t class_size = klass->GetClassSize(); + // Class size excluding static fields (relies on reference fields being the first static fields). + const size_t class_size_without_overhead = sizeof(mirror::Class); + CHECK_LE(class_size_without_overhead, class_size); + const size_t overhead_size = class_size - class_size_without_overhead; + + if (overhead_size != 0) { // Create a byte array to reflect the allocation of the // StaticField array at the end of this class. __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddClassStaticsId(klass); - __ AddU4(StackTraceSerialNumber(klass)); - __ AddU4(byteLength); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); + __ AddU4(overhead_size); __ AddU1(hprof_basic_byte); - for (int i = 0; i < byteLength; ++i) { + for (size_t i = 0; i < overhead_size; ++i) { __ AddU1(0); } } __ AddU1(HPROF_CLASS_DUMP); __ AddClassId(LookupClassId(klass)); - __ AddU4(StackTraceSerialNumber(klass)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); __ AddClassId(LookupClassId(klass->GetSuperClass())); __ AddObjectId(klass->GetClassLoader()); __ AddObjectId(nullptr); // no signer @@ -986,7 +1125,7 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { if (klass->IsClassClass()) { // ClassObjects have their static fields appended, so aren't all the same size. // But they're at least this size. - __ AddU4(sizeof(mirror::Class)); // instance size + __ AddU4(class_size_without_overhead); // instance size } else if (klass->IsStringClass()) { // Strings are variable length with character data at the end like arrays. // This outputs the size of an empty string. @@ -1000,15 +1139,15 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { __ AddU2(0); // empty const pool // Static fields - if (sFieldCount == 0) { - __ AddU2((uint16_t)0); + if (overhead_size == 0) { + __ AddU2(static_cast<uint16_t>(0)); } else { - __ AddU2((uint16_t)(sFieldCount+1)); - __ AddStringId(LookupStringId(kStaticOverheadName)); + __ AddU2(static_cast<uint16_t>(num_static_fields + 1)); + __ AddStringId(LookupStringId(kClassOverheadName)); __ AddU1(hprof_basic_object); __ AddClassStaticsId(klass); - for (size_t i = 0; i < sFieldCount; ++i) { + for (size_t i = 0; i < num_static_fields; ++i) { ArtField* f = klass->GetStaticField(i); size_t size; @@ -1072,7 +1211,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_OBJECT_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddClassId(LookupClassId(klass)); @@ -1087,7 +1226,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddU1(t); @@ -1108,7 +1247,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { // obj is an instance object. __ AddU1(HPROF_INSTANCE_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddClassId(LookupClassId(klass)); // Reserve some space for the length of the instance data, which we won't @@ -1170,7 +1309,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(value); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(s->GetLength()); __ AddU1(hprof_basic_char); __ AddU2List(s->GetValue(), s->GetLength()); diff --git a/runtime/image.cc b/runtime/image.cc index 947c914de6..44193da4ee 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -24,7 +24,7 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '6', '\0' }; +const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '7', '\0' }; ImageHeader::ImageHeader(uint32_t image_begin, uint32_t image_size, diff --git a/runtime/image.h b/runtime/image.h index c6be7ef3f7..d856f218af 100644 --- a/runtime/image.h +++ b/runtime/image.h @@ -142,6 +142,7 @@ class PACKED(4) ImageHeader { kSectionObjects, kSectionArtFields, kSectionArtMethods, + kSectionInternedStrings, kSectionImageBitmap, kSectionCount, // Number of elements in enum. }; diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc index 4ced23d488..d37ddcb88b 100644 --- a/runtime/instrumentation.cc +++ b/runtime/instrumentation.cc @@ -1019,7 +1019,7 @@ TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintpt PrettyMethod(method).c_str(), return_value.GetJ()) << *self; } - self->SetDeoptimizationReturnValue(return_value); + self->SetDeoptimizationReturnValue(return_value, return_shorty == 'L'); return GetTwoWordSuccessValue(*return_pc, reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint())); } else { diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index 9abbca8460..2a962784ca 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -152,20 +152,28 @@ void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) { CHECK(image_space != nullptr); MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); if (!image_added_to_intern_table_) { - mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches); - mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>(); - for (int32_t i = 0; i < dex_caches->GetLength(); ++i) { - mirror::DexCache* dex_cache = dex_caches->Get(i); - const DexFile* dex_file = dex_cache->GetDexFile(); - const size_t num_strings = dex_file->NumStringIds(); - for (size_t j = 0; j < num_strings; ++j) { - mirror::String* image_string = dex_cache->GetResolvedString(j); - if (image_string != nullptr) { - mirror::String* found = LookupStrong(image_string); - if (found == nullptr) { - InsertStrong(image_string); - } else { - DCHECK_EQ(found, image_string); + const ImageHeader* const header = &image_space->GetImageHeader(); + // Check if we have the interned strings section. + const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings); + if (section.Size() > 0) { + ReadFromMemoryLocked(image_space->Begin() + section.Offset()); + } else { + // TODO: Delete this logic? + mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches); + mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>(); + for (int32_t i = 0; i < dex_caches->GetLength(); ++i) { + mirror::DexCache* dex_cache = dex_caches->Get(i); + const DexFile* dex_file = dex_cache->GetDexFile(); + const size_t num_strings = dex_file->NumStringIds(); + for (size_t j = 0; j < num_strings; ++j) { + mirror::String* image_string = dex_cache->GetResolvedString(j); + if (image_string != nullptr) { + mirror::String* found = LookupStrong(image_string); + if (found == nullptr) { + InsertStrong(image_string); + } else { + DCHECK_EQ(found, image_string); + } } } } @@ -285,6 +293,29 @@ void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) { weak_interns_.SweepWeaks(callback, arg); } +void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) { + const ImageSection& intern_section = image_space->GetImageHeader().GetImageSection( + ImageHeader::kSectionInternedStrings); + // Read the string tables from the image. + const uint8_t* ptr = image_space->Begin() + intern_section.Offset(); + const size_t offset = ReadFromMemory(ptr); + CHECK_LE(offset, intern_section.Size()); +} + +size_t InternTable::ReadFromMemory(const uint8_t* ptr) { + MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); + return ReadFromMemoryLocked(ptr); +} + +size_t InternTable::ReadFromMemoryLocked(const uint8_t* ptr) { + return strong_interns_.ReadIntoPreZygoteTable(ptr); +} + +size_t InternTable::WriteToMemory(uint8_t* ptr) { + MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); + return strong_interns_.WriteFromPostZygoteTable(ptr); +} + std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) const { if (kIsDebugBuild) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); @@ -300,6 +331,17 @@ bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a, return a.Read()->Equals(b.Read()); } +size_t InternTable::Table::ReadIntoPreZygoteTable(const uint8_t* ptr) { + CHECK_EQ(pre_zygote_table_.Size(), 0u); + size_t read_count = 0; + pre_zygote_table_ = UnorderedSet(ptr, false /* make copy */, &read_count); + return read_count; +} + +size_t InternTable::Table::WriteFromPostZygoteTable(uint8_t* ptr) { + return post_zygote_table_.WriteToMemory(ptr); +} + void InternTable::Table::Remove(mirror::String* s) { auto it = post_zygote_table_.Find(GcRoot<mirror::String>(s)); if (it != post_zygote_table_.end()) { @@ -325,9 +367,13 @@ mirror::String* InternTable::Table::Find(mirror::String* s) { } void InternTable::Table::SwapPostZygoteWithPreZygote() { - CHECK(pre_zygote_table_.Empty()); - std::swap(pre_zygote_table_, post_zygote_table_); - VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table"; + if (pre_zygote_table_.Empty()) { + std::swap(pre_zygote_table_, post_zygote_table_); + VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table"; + } else { + // This case happens if read the intern table from the image. + VLOG(heap) << "Not swapping due to non-empty pre_zygote_table_"; + } } void InternTable::Table::Insert(mirror::String* s) { diff --git a/runtime/intern_table.h b/runtime/intern_table.h index 1e5d3c22c9..97ce73c52e 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -97,6 +97,20 @@ class InternTable { void SwapPostZygoteWithPreZygote() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_); + // Add an intern table which was serialized to the image. + void AddImageInternTable(gc::space::ImageSpace* image_space) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_); + + // Read the intern table from memory. The elements aren't copied, the intern hash set data will + // point to somewhere within ptr. Only reads the strong interns. + size_t ReadFromMemory(const uint8_t* ptr) LOCKS_EXCLUDED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Write the post zygote intern table to a pointer. Only writes the strong interns since it is + // expected that there is no weak interns since this is called from the image writer. + size_t WriteToMemory(uint8_t* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::intern_table_lock_); + private: class StringHashEquals { public: @@ -133,6 +147,16 @@ class InternTable { EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); void SwapPostZygoteWithPreZygote() EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); size_t Size() const EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); + // Read pre zygote table is called from ReadFromMemory which happens during runtime creation + // when we load the image intern table. Returns how many bytes were read. + size_t ReadIntoPreZygoteTable(const uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // The image writer calls WritePostZygoteTable through WriteToMemory, it writes the interns in + // the post zygote table. Returns how many bytes were written. + size_t WriteFromPostZygoteTable(uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: typedef HashSet<GcRoot<mirror::String>, GcRootEmptyFn, StringHashEquals, StringHashEquals, @@ -192,6 +216,10 @@ class InternTable { EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); friend class Transaction; + size_t ReadFromMemoryLocked(const uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_); bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_); bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_); diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc index 1ed1a649b8..0f6f788013 100644 --- a/runtime/interpreter/interpreter_common.cc +++ b/runtime/interpreter/interpreter_common.cc @@ -450,10 +450,13 @@ void UnexpectedOpcode(const Instruction* inst, const ShadowFrame& shadow_frame) static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame, size_t dest_reg, size_t src_reg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - // If both register locations contains the same value, the register probably holds a reference. // Uint required, so that sign extension does not make this wrong on 64b systems uint32_t src_value = shadow_frame.GetVReg(src_reg); mirror::Object* o = shadow_frame.GetVRegReference<kVerifyNone>(src_reg); + + // If both register locations contains the same value, the register probably holds a reference. + // Note: As an optimization, non-moving collectors leave a stale reference value + // in the references array even after the original vreg was overwritten to a non-reference. if (src_value == reinterpret_cast<uintptr_t>(o)) { new_shadow_frame->SetVRegReference(dest_reg, o); } else { @@ -517,7 +520,8 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, // Slow path. // We might need to do class loading, which incurs a thread state change to kNative. So // register the shadow frame as under construction and allow suspension again. - self->SetShadowFrameUnderConstruction(new_shadow_frame); + ScopedStackedShadowFramePusher pusher( + self, new_shadow_frame, StackedShadowFrameType::kShadowFrameUnderConstruction); self->EndAssertNoThreadSuspension(old_cause); // We need to do runtime check on reference assignment. We need to load the shorty @@ -590,8 +594,6 @@ bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame, break; } } - // We're done with the construction. - self->ClearShadowFrameUnderConstruction(); } else { // Fast path: no extra checks. if (is_range) { diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index dd7aa40368..fcf083cbe1 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -56,7 +56,7 @@ namespace interpreter { template<bool do_access_check, bool transaction_active> JValue ExecuteSwitchImpl(Thread* self, const DexFile::CodeItem* code_item, ShadowFrame& shadow_frame, JValue result_register) { - bool do_assignability_check = do_access_check; + constexpr bool do_assignability_check = do_access_check; if (UNLIKELY(!shadow_frame.HasReferenceArray())) { LOG(FATAL) << "Invalid shadow frame for interpreter use"; return JValue(); diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc index 2d3d19ce3e..eb9c32d7ad 100644 --- a/runtime/java_vm_ext.cc +++ b/runtime/java_vm_ext.cc @@ -16,6 +16,8 @@ #include "jni_internal.h" +#define ATRACE_TAG ATRACE_TAG_DALVIK +#include <cutils/trace.h> #include <dlfcn.h> #include "art_method.h" @@ -788,9 +790,11 @@ void JavaVMExt::VisitRoots(RootVisitor* visitor) { // JNI Invocation interface. extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) { + ATRACE_BEGIN(__FUNCTION__); const JavaVMInitArgs* args = static_cast<JavaVMInitArgs*>(vm_args); if (IsBadJniVersion(args->version)) { LOG(ERROR) << "Bad JNI version passed to CreateJavaVM: " << args->version; + ATRACE_END(); return JNI_EVERSION; } RuntimeOptions options; @@ -800,6 +804,7 @@ extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) { } bool ignore_unrecognized = args->ignoreUnrecognized; if (!Runtime::Create(options, ignore_unrecognized)) { + ATRACE_END(); return JNI_ERR; } Runtime* runtime = Runtime::Current(); @@ -808,10 +813,12 @@ extern "C" jint JNI_CreateJavaVM(JavaVM** p_vm, JNIEnv** p_env, void* vm_args) { delete Thread::Current()->GetJniEnv(); delete runtime->GetJavaVM(); LOG(WARNING) << "CreateJavaVM failed"; + ATRACE_END(); return JNI_ERR; } *p_env = Thread::Current()->GetJniEnv(); *p_vm = runtime->GetJavaVM(); + ATRACE_END(); return JNI_OK; } diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h index e18d10fa0a..7c48985dfe 100644 --- a/runtime/jdwp/jdwp.h +++ b/runtime/jdwp/jdwp.h @@ -297,7 +297,7 @@ struct JdwpState { private: explicit JdwpState(const JdwpOptions* options); - size_t ProcessRequest(Request* request, ExpandBuf* pReply); + size_t ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply); bool InvokeInProgress(); bool IsConnected(); void SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId thread_self_id) diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc index 612af8bc99..14f097f72a 100644 --- a/runtime/jdwp/jdwp_event.cc +++ b/runtime/jdwp/jdwp_event.cc @@ -99,10 +99,6 @@ put ourselves to sleep. That way we don't interfere with anyone else and don't allow anyone else to interfere with us. */ - -#define kJdwpEventCommandSet 64 -#define kJdwpCompositeCommand 100 - namespace art { namespace JDWP { @@ -612,13 +608,10 @@ void JdwpState::SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId */ DebugInvokeReq* const pReq = Dbg::GetInvokeReq(); if (pReq == nullptr) { - /*LOGD("SuspendByPolicy: no invoke needed");*/ break; } - /* grab this before posting/suspending again */ - AcquireJdwpTokenForEvent(thread_self_id); - + // Execute method. Dbg::ExecuteMethod(pReq); } } @@ -749,11 +742,11 @@ static ExpandBuf* eventPrep() { void JdwpState::EventFinish(ExpandBuf* pReq) { uint8_t* buf = expandBufGetBuffer(pReq); - Set4BE(buf, expandBufGetLength(pReq)); - Set4BE(buf + 4, NextRequestSerial()); - Set1(buf + 8, 0); /* flags */ - Set1(buf + 9, kJdwpEventCommandSet); - Set1(buf + 10, kJdwpCompositeCommand); + Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReq)); + Set4BE(buf + kJDWPHeaderIdOffset, NextRequestSerial()); + Set1(buf + kJDWPHeaderFlagsOffset, 0); /* flags */ + Set1(buf + kJDWPHeaderCmdSetOffset, kJDWPEventCmdSet); + Set1(buf + kJDWPHeaderCmdOffset, kJDWPEventCompositeCmd); SendRequest(pReq); diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index f7f70f6ed7..d4e2656b7e 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -52,17 +52,6 @@ std::string DescribeRefTypeId(const RefTypeId& ref_type_id) { return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str()); } -// Helper function: write a variable-width value into the output input buffer. -static void WriteValue(ExpandBuf* pReply, int width, uint64_t value) { - switch (width) { - case 1: expandBufAdd1(pReply, value); break; - case 2: expandBufAdd2BE(pReply, value); break; - case 4: expandBufAdd4BE(pReply, value); break; - case 8: expandBufAdd8BE(pReply, value); break; - default: LOG(FATAL) << width; break; - } -} - static JdwpError WriteTaggedObject(ExpandBuf* reply, ObjectId object_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { uint8_t tag; @@ -92,7 +81,7 @@ static JdwpError WriteTaggedObjectList(ExpandBuf* reply, const std::vector<Objec * If "is_constructor" is set, this returns "object_id" rather than the * expected-to-be-void return value of the called function. */ -static JdwpError RequestInvoke(JdwpState*, Request* request, ExpandBuf* pReply, +static JdwpError RequestInvoke(JdwpState*, Request* request, ObjectId thread_id, ObjectId object_id, RefTypeId class_id, MethodId method_id, bool is_constructor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -122,49 +111,15 @@ static JdwpError RequestInvoke(JdwpState*, Request* request, ExpandBuf* pReply, (options & INVOKE_SINGLE_THREADED) ? " (SINGLE_THREADED)" : "", (options & INVOKE_NONVIRTUAL) ? " (NONVIRTUAL)" : ""); - JdwpTag resultTag; - uint64_t resultValue; - ObjectId exceptObjId; - JdwpError err = Dbg::InvokeMethod(thread_id, object_id, class_id, method_id, arg_count, - argValues.get(), argTypes.get(), options, &resultTag, - &resultValue, &exceptObjId); - if (err != ERR_NONE) { - return err; - } - - if (is_constructor) { - // If we invoked a constructor (which actually returns void), return the receiver, - // unless we threw, in which case we return null. - resultTag = JT_OBJECT; - resultValue = (exceptObjId == 0) ? object_id : 0; - } - - size_t width = Dbg::GetTagWidth(resultTag); - expandBufAdd1(pReply, resultTag); - if (width != 0) { - WriteValue(pReply, width, resultValue); - } - expandBufAdd1(pReply, JT_OBJECT); - expandBufAddObjectId(pReply, exceptObjId); - - VLOG(jdwp) << " --> returned " << resultTag - << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId); - - /* show detailed debug output */ - if (resultTag == JT_STRING && exceptObjId == 0) { - if (resultValue != 0) { - if (VLOG_IS_ON(jdwp)) { - std::string result_string; - JDWP::JdwpError error = Dbg::StringToUtf8(resultValue, &result_string); - CHECK_EQ(error, JDWP::ERR_NONE); - VLOG(jdwp) << " string '" << result_string << "'"; - } - } else { - VLOG(jdwp) << " string (null)"; - } + JDWP::JdwpError error = Dbg::PrepareInvokeMethod(request->GetId(), thread_id, object_id, + class_id, method_id, arg_count, + argValues.get(), argTypes.get(), options); + if (error == JDWP::ERR_NONE) { + // We successfully requested the invoke. The event thread now owns the arguments array in its + // DebugInvokeReq mailbox. + argValues.release(); } - - return err; + return error; } static JdwpError VM_Version(JdwpState*, Request*, ExpandBuf* pReply) @@ -684,13 +639,14 @@ static JdwpError CT_SetValues(JdwpState* , Request* request, ExpandBuf*) * Example: Eclipse sometimes uses java/lang/Class.forName(String s) on * values in the "variables" display. */ -static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { RefTypeId class_id = request->ReadRefTypeId(); ObjectId thread_id = request->ReadThreadId(); MethodId method_id = request->ReadMethodId(); - return RequestInvoke(state, request, pReply, thread_id, 0, class_id, method_id, false); + return RequestInvoke(state, request, thread_id, 0, class_id, method_id, false); } /* @@ -700,7 +656,8 @@ static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* * Example: in IntelliJ, create a watch on "new String(myByteArray)" to * see the contents of a byte[] as a string. */ -static JdwpError CT_NewInstance(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError CT_NewInstance(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { RefTypeId class_id = request->ReadRefTypeId(); ObjectId thread_id = request->ReadThreadId(); @@ -711,7 +668,7 @@ static JdwpError CT_NewInstance(JdwpState* state, Request* request, ExpandBuf* p if (status != ERR_NONE) { return status; } - return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, true); + return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, true); } /* @@ -863,14 +820,15 @@ static JdwpError OR_MonitorInfo(JdwpState*, Request* request, ExpandBuf* reply) * object), it will try to invoke the object's toString() function. This * feature becomes crucial when examining ArrayLists with Eclipse. */ -static JdwpError OR_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError OR_InvokeMethod(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { ObjectId object_id = request->ReadObjectId(); ObjectId thread_id = request->ReadThreadId(); RefTypeId class_id = request->ReadRefTypeId(); MethodId method_id = request->ReadMethodId(); - return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, false); + return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, false); } static JdwpError OR_DisableCollection(JdwpState*, Request* request, ExpandBuf*) @@ -1602,13 +1560,27 @@ static std::string DescribeCommand(Request* request) { return result; } +// Returns true if the given command_set and command identify an "invoke" command. +static bool IsInvokeCommand(uint8_t command_set, uint8_t command) { + if (command_set == kJDWPClassTypeCmdSet) { + return command == kJDWPClassTypeInvokeMethodCmd || command == kJDWPClassTypeNewInstanceCmd; + } else if (command_set == kJDWPObjectReferenceCmdSet) { + return command == kJDWPObjectReferenceInvokeCmd; + } else { + return false; + } +} + /* - * Process a request from the debugger. + * Process a request from the debugger. The skip_reply flag is set to true to indicate to the + * caller the reply must not be sent to the debugger. This is used for invoke commands where the + * reply is sent by the event thread after completing the invoke. * * On entry, the JDWP thread is in VMWAIT. */ -size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply) { +size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply) { JdwpError result = ERR_NONE; + *skip_reply = false; if (request->GetCommandSet() != kJDWPDdmCmdSet) { /* @@ -1661,24 +1633,31 @@ size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply) { result = ERR_NOT_IMPLEMENTED; } - /* - * Set up the reply header. - * - * If we encountered an error, only send the header back. - */ - uint8_t* replyBuf = expandBufGetBuffer(pReply); - size_t replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen; - Set4BE(replyBuf + 0, replyLength); - Set4BE(replyBuf + 4, request->GetId()); - Set1(replyBuf + 8, kJDWPFlagReply); - Set2BE(replyBuf + 9, result); - - CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId(); - - size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen; - VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")"; - if (false) { - VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, ""); + size_t replyLength = 0U; + if (result == ERR_NONE && IsInvokeCommand(request->GetCommandSet(), request->GetCommand())) { + // We successfully request an invoke in the event thread. It will send the reply once the + // invoke completes so we must not send it now. + *skip_reply = true; + } else { + /* + * Set up the reply header. + * + * If we encountered an error, only send the header back. + */ + uint8_t* replyBuf = expandBufGetBuffer(pReply); + replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen; + Set4BE(replyBuf + kJDWPHeaderSizeOffset, replyLength); + Set4BE(replyBuf + kJDWPHeaderIdOffset, request->GetId()); + Set1(replyBuf + kJDWPHeaderFlagsOffset, kJDWPFlagReply); + Set2BE(replyBuf + kJDWPHeaderErrorCodeOffset, result); + + CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId(); + + size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen; + VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")"; + if (false) { + VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, ""); + } } VLOG(jdwp) << "----------"; diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc index e6b97a2083..6bc5e27f85 100644 --- a/runtime/jdwp/jdwp_main.cc +++ b/runtime/jdwp/jdwp_main.cc @@ -395,8 +395,15 @@ bool JdwpState::HandlePacket() { JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_); ExpandBuf* pReply = expandBufAlloc(); - size_t replyLength = ProcessRequest(&request, pReply); - ssize_t cc = netStateBase->WritePacket(pReply, replyLength); + bool skip_reply = false; + size_t replyLength = ProcessRequest(&request, pReply, &skip_reply); + ssize_t cc = 0; + if (!skip_reply) { + cc = netStateBase->WritePacket(pReply, replyLength); + } else { + DCHECK_EQ(replyLength, 0U); + } + expandBufFree(pReply); /* * We processed this request and sent its reply so we can release the JDWP token. @@ -405,10 +412,8 @@ bool JdwpState::HandlePacket() { if (cc != static_cast<ssize_t>(replyLength)) { PLOG(ERROR) << "Failed sending reply to debugger"; - expandBufFree(pReply); return false; } - expandBufFree(pReply); netStateBase->ConsumeBytes(request.GetLength()); { MutexLock mu(self, shutdown_lock_); diff --git a/runtime/jdwp/jdwp_priv.h b/runtime/jdwp/jdwp_priv.h index f290be0f52..d58467d108 100644 --- a/runtime/jdwp/jdwp_priv.h +++ b/runtime/jdwp/jdwp_priv.h @@ -29,15 +29,32 @@ /* * JDWP constants. */ -#define kJDWPHeaderLen 11 -#define kJDWPFlagReply 0x80 - -#define kMagicHandshake "JDWP-Handshake" -#define kMagicHandshakeLen (sizeof(kMagicHandshake)-1) +static constexpr size_t kJDWPHeaderSizeOffset = 0U; +static constexpr size_t kJDWPHeaderIdOffset = 4U; +static constexpr size_t kJDWPHeaderFlagsOffset = 8U; +static constexpr size_t kJDWPHeaderErrorCodeOffset = 9U; +static constexpr size_t kJDWPHeaderCmdSetOffset = 9U; +static constexpr size_t kJDWPHeaderCmdOffset = 10U; +static constexpr size_t kJDWPHeaderLen = 11U; +static constexpr uint8_t kJDWPFlagReply = 0x80; + +static constexpr const char kMagicHandshake[] = "JDWP-Handshake"; +static constexpr size_t kMagicHandshakeLen = sizeof(kMagicHandshake) - 1; + +/* Invoke commands */ +static constexpr uint8_t kJDWPClassTypeCmdSet = 3U; +static constexpr uint8_t kJDWPClassTypeInvokeMethodCmd = 3U; +static constexpr uint8_t kJDWPClassTypeNewInstanceCmd = 4U; +static constexpr uint8_t kJDWPObjectReferenceCmdSet = 9U; +static constexpr uint8_t kJDWPObjectReferenceInvokeCmd = 6U; + +/* Event command */ +static constexpr uint8_t kJDWPEventCmdSet = 64U; +static constexpr uint8_t kJDWPEventCompositeCmd = 100U; /* DDM support */ -#define kJDWPDdmCmdSet 199 /* 0xc7, or 'G'+128 */ -#define kJDWPDdmCmd 1 +static constexpr uint8_t kJDWPDdmCmdSet = 199U; // 0xc7, or 'G'+128 +static constexpr uint8_t kJDWPDdmCmd = 1U; namespace art { diff --git a/runtime/jvalue.h b/runtime/jvalue.h index b39567b297..6a6d1986dc 100644 --- a/runtime/jvalue.h +++ b/runtime/jvalue.h @@ -61,6 +61,8 @@ union PACKED(4) JValue { uint8_t GetZ() const { return z; } void SetZ(uint8_t new_z) { z = new_z; } + mirror::Object** GetGCRoot() { return &l; } + private: uint8_t z; int8_t b; diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index d8c1ec1508..2e335dcae5 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -240,6 +240,22 @@ static bool CheckMapRequest(uint8_t* expected_ptr, void* actual_ptr, size_t byte return false; } +#if USE_ART_LOW_4G_ALLOCATOR +static inline void* TryMemMapLow4GB(void* ptr, size_t page_aligned_byte_count, int prot, int flags, + int fd) { + void* actual = mmap(ptr, page_aligned_byte_count, prot, flags, fd, 0); + if (actual != MAP_FAILED) { + // Since we didn't use MAP_FIXED the kernel may have mapped it somewhere not in the low + // 4GB. If this is the case, unmap and retry. + if (reinterpret_cast<uintptr_t>(actual) + page_aligned_byte_count >= 4 * GB) { + munmap(actual, page_aligned_byte_count); + actual = MAP_FAILED; + } + } + return actual; +} +#endif + MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byte_count, int prot, bool low_4gb, bool reuse, std::string* error_msg) { #ifndef __LP64__ @@ -314,7 +330,39 @@ MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byt if (low_4gb && expected_ptr == nullptr) { bool first_run = true; + MutexLock mu(Thread::Current(), *Locks::mem_maps_lock_); for (uintptr_t ptr = next_mem_pos_; ptr < 4 * GB; ptr += kPageSize) { + // Use maps_ as an optimization to skip over large maps. + // Find the first map which is address > ptr. + auto it = maps_->upper_bound(reinterpret_cast<void*>(ptr)); + if (it != maps_->begin()) { + auto before_it = it; + --before_it; + // Start at the end of the map before the upper bound. + ptr = std::max(ptr, reinterpret_cast<uintptr_t>(before_it->second->BaseEnd())); + CHECK_ALIGNED(ptr, kPageSize); + } + while (it != maps_->end()) { + // How much space do we have until the next map? + size_t delta = reinterpret_cast<uintptr_t>(it->first) - ptr; + // If the space may be sufficient, break out of the loop. + if (delta >= page_aligned_byte_count) { + break; + } + // Otherwise, skip to the end of the map. + ptr = reinterpret_cast<uintptr_t>(it->second->BaseEnd()); + CHECK_ALIGNED(ptr, kPageSize); + ++it; + } + + // Try to see if we get lucky with this address since none of the ART maps overlap. + actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags, + fd.get()); + if (actual != MAP_FAILED) { + next_mem_pos_ = reinterpret_cast<uintptr_t>(actual) + page_aligned_byte_count; + break; + } + if (4U * GB - ptr < page_aligned_byte_count) { // Not enough memory until 4GB. if (first_run) { @@ -344,17 +392,10 @@ MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byt next_mem_pos_ = tail_ptr; // update early, as we break out when we found and mapped a region if (safe == true) { - actual = mmap(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags, fd.get(), - 0); + actual = TryMemMapLow4GB(reinterpret_cast<void*>(ptr), page_aligned_byte_count, prot, flags, + fd.get()); if (actual != MAP_FAILED) { - // Since we didn't use MAP_FIXED the kernel may have mapped it somewhere not in the low - // 4GB. If this is the case, unmap and retry. - if (reinterpret_cast<uintptr_t>(actual) + page_aligned_byte_count < 4 * GB) { break; - } else { - munmap(actual, page_aligned_byte_count); - actual = MAP_FAILED; - } } } else { // Skip over last page. @@ -395,7 +436,7 @@ MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byt return nullptr; } return new MemMap(name, reinterpret_cast<uint8_t*>(actual), byte_count, actual, - page_aligned_byte_count, prot, false); + page_aligned_byte_count, prot, reuse); } MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int prot, int flags, diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h index 835b94ade4..8c9222f6a4 100644 --- a/runtime/mirror/class-inl.h +++ b/runtime/mirror/class-inl.h @@ -666,7 +666,7 @@ template <bool kVisitClass, typename Visitor> inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) { VisitInstanceFieldsReferences<kVisitClass>(klass, visitor); // Right after a class is allocated, but not yet loaded - // (kStatusNotReady, see ClassLinkder::LoadClass()), GC may find it + // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it // and scan it. IsTemp() may call Class::GetAccessFlags() but may // fail in the DCHECK in Class::GetAccessFlags() because the class // status is kStatusNotReady. To avoid it, rely on IsResolved() diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h index 9f6cd11c3e..8d9c08d9d5 100644 --- a/runtime/mirror/string-inl.h +++ b/runtime/mirror/string-inl.h @@ -176,11 +176,13 @@ inline String* String::AllocFromByteArray(Thread* self, int32_t byte_length, } template <bool kIsInstrumented> -inline String* String::AllocFromCharArray(Thread* self, int32_t array_length, +inline String* String::AllocFromCharArray(Thread* self, int32_t count, Handle<CharArray> array, int32_t offset, gc::AllocatorType allocator_type) { - SetStringCountAndValueVisitorFromCharArray visitor(array_length, array, offset); - String* new_string = Alloc<kIsInstrumented>(self, array_length, allocator_type, visitor); + // It is a caller error to have a count less than the actual array's size. + DCHECK_GE(array->GetLength(), count); + SetStringCountAndValueVisitorFromCharArray visitor(count, array, offset); + String* new_string = Alloc<kIsInstrumented>(self, count, allocator_type, visitor); return new_string; } diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h index a8f16d78ff..af06385401 100644 --- a/runtime/mirror/string.h +++ b/runtime/mirror/string.h @@ -95,7 +95,7 @@ class MANAGED String FINAL : public Object { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); template <bool kIsInstrumented> - ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t array_length, + ALWAYS_INLINE static String* AllocFromCharArray(Thread* self, int32_t count, Handle<CharArray> array, int32_t offset, gc::AllocatorType allocator_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc index 94024ef4b2..67dcc9c6af 100644 --- a/runtime/native/java_lang_Class.cc +++ b/runtime/native/java_lang_Class.cc @@ -380,8 +380,8 @@ static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis, static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis, jboolean publicOnly) { ScopedFastNativeObjectAccess soa(env); - StackHandleScope<3> hs(soa.Self()); - auto* klass = DecodeClass(soa, javaThis); + StackHandleScope<2> hs(soa.Self()); + auto klass = hs.NewHandle(DecodeClass(soa, javaThis)); size_t num_methods = 0; for (auto& m : klass->GetVirtualMethods(sizeof(void*))) { auto modifiers = m.GetAccessFlags(); diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc index bd043a84c6..abac8153b3 100644 --- a/runtime/native/java_lang_Runtime.cc +++ b/runtime/native/java_lang_Runtime.cc @@ -52,52 +52,29 @@ NO_RETURN static void Runtime_nativeExit(JNIEnv*, jclass, jint status) { exit(status); } -static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) { +static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) { #ifdef HAVE_ANDROID_OS - std::stringstream ss; if (javaLdLibraryPathJstr != nullptr) { - ScopedUtfChars javaLdLibraryPath(env, javaLdLibraryPathJstr); - if (javaLdLibraryPath.c_str() != nullptr) { - ss << javaLdLibraryPath.c_str(); + ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr); + if (ldLibraryPath.c_str() != nullptr) { + android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str()); } } - if (javaDexPathJstr != nullptr) { - ScopedUtfChars javaDexPath(env, javaDexPathJstr); - if (javaDexPath.c_str() != nullptr) { - std::vector<std::string> dexPathVector; - Split(javaDexPath.c_str(), ':', &dexPathVector); - - for (auto abi : art::Runtime::Current()->GetCpuAbilist()) { - for (auto zip_path : dexPathVector) { - // Native libraries live under lib/<abi>/ inside .apk file. - ss << ":" << zip_path << "!" << "lib/" << abi; - } - } - } - } - - std::string ldLibraryPathStr = ss.str(); - const char* ldLibraryPath = ldLibraryPathStr.c_str(); - if (*ldLibraryPath == ':') { - ++ldLibraryPath; - } - - android_update_LD_LIBRARY_PATH(ldLibraryPath); #else LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!"; - UNUSED(javaLdLibraryPathJstr, javaDexPathJstr, env); + UNUSED(javaLdLibraryPathJstr, env); #endif } static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader, - jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) { + jstring javaLdLibraryPathJstr) { ScopedUtfChars filename(env, javaFilename); if (filename.c_str() == nullptr) { return nullptr; } - SetLdLibraryPath(env, javaLdLibraryPathJstr, javaDexPathJstr); + SetLdLibraryPath(env, javaLdLibraryPathJstr); std::string error_msg; { @@ -130,7 +107,7 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(Runtime, gc, "()V"), NATIVE_METHOD(Runtime, maxMemory, "!()J"), NATIVE_METHOD(Runtime, nativeExit, "(I)V"), - NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"), + NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"), NATIVE_METHOD(Runtime, totalMemory, "!()J"), }; diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc index b96ddc8102..9ce4a02f1b 100644 --- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc +++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc @@ -38,7 +38,7 @@ static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv* env, jclass) { } static jboolean DdmVmInternal_getRecentAllocationStatus(JNIEnv*, jclass) { - return Dbg::IsAllocTrackingEnabled(); + return Runtime::Current()->GetHeap()->IsAllocTrackingEnabled(); } /* diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 0bc834f67b..4b563b563b 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -368,23 +368,28 @@ bool ParsedOptions::ProcessSpecialOptions(const RuntimeOptions& options, return true; } -bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized, - RuntimeArgumentMap* runtime_options) { +// Intended for local changes only. +static void MaybeOverrideVerbosity() { // gLogVerbosity.class_linker = true; // TODO: don't check this in! // gLogVerbosity.compiler = true; // TODO: don't check this in! + // gLogVerbosity.deopt = true; // TODO: don't check this in! // gLogVerbosity.gc = true; // TODO: don't check this in! // gLogVerbosity.heap = true; // TODO: don't check this in! // gLogVerbosity.jdwp = true; // TODO: don't check this in! // gLogVerbosity.jit = true; // TODO: don't check this in! // gLogVerbosity.jni = true; // TODO: don't check this in! // gLogVerbosity.monitor = true; // TODO: don't check this in! + // gLogVerbosity.oat = true; // TODO: don't check this in! // gLogVerbosity.profiler = true; // TODO: don't check this in! // gLogVerbosity.signals = true; // TODO: don't check this in! // gLogVerbosity.startup = true; // TODO: don't check this in! // gLogVerbosity.third_party_jni = true; // TODO: don't check this in! // gLogVerbosity.threads = true; // TODO: don't check this in! // gLogVerbosity.verifier = true; // TODO: don't check this in! +} +bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognized, + RuntimeArgumentMap* runtime_options) { for (size_t i = 0; i < options.size(); ++i) { if (true && options[0].first == "-Xzygote") { LOG(INFO) << "option[" << i << "]=" << options[i].first; @@ -453,6 +458,8 @@ bool ParsedOptions::Parse(const RuntimeOptions& options, bool ignore_unrecognize } } + MaybeOverrideVerbosity(); + // -Xprofile: Trace::SetDefaultClockSource(args.GetOrDefault(M::ProfileClock)); diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index 8c9782aefe..02baad758f 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -163,8 +163,8 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { : StackVisitor(self, context, StackVisitor::StackWalkKind::kIncludeInlinedFrames), self_(self), exception_handler_(exception_handler), - prev_shadow_frame_(nullptr) { - CHECK(!self_->HasDeoptimizationShadowFrame()); + prev_shadow_frame_(nullptr), + stacked_shadow_frame_pushed_(false) { } bool VisitFrame() OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -174,6 +174,13 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { // This is the upcall, we remember the frame and last pc so that we may long jump to them. exception_handler_->SetHandlerQuickFramePc(GetCurrentQuickFramePc()); exception_handler_->SetHandlerQuickFrame(GetCurrentQuickFrame()); + if (!stacked_shadow_frame_pushed_) { + // In case there is no deoptimized shadow frame for this upcall, we still + // need to push a nullptr to the stack since there is always a matching pop after + // the long jump. + self_->PushStackedShadowFrame(nullptr, StackedShadowFrameType::kDeoptimizationShadowFrame); + stacked_shadow_frame_pushed_ = true; + } return false; // End stack walk. } else if (method->IsRuntimeMethod()) { // Ignore callee save method. @@ -204,111 +211,116 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { bool verifier_success = verifier.Verify(); CHECK(verifier_success) << PrettyMethod(m); ShadowFrame* new_frame = ShadowFrame::CreateDeoptimizedFrame(num_regs, nullptr, m, dex_pc); - self_->SetShadowFrameUnderConstruction(new_frame); - const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc)); - - // Markers for dead values, used when the verifier knows a Dex register is undefined, - // or when the compiler knows the register has not been initialized, or is not used - // anymore in the method. - static constexpr uint32_t kDeadValue = 0xEBADDE09; - static constexpr uint64_t kLongDeadValue = 0xEBADDE09EBADDE09; - for (uint16_t reg = 0; reg < num_regs; ++reg) { - VRegKind kind = GetVRegKind(reg, kinds); - switch (kind) { - case kUndefined: - new_frame->SetVReg(reg, kDeadValue); - break; - case kConstant: - new_frame->SetVReg(reg, kinds.at((reg * 2) + 1)); - break; - case kReferenceVReg: { - uint32_t value = 0; - // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier. - // We don't want to copy a stale reference into the shadow frame as a reference. - // b/20736048 - if (GetVReg(m, reg, kind, &value) && IsReferenceVReg(m, reg)) { - new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value)); - } else { + { + ScopedStackedShadowFramePusher pusher(self_, new_frame, + StackedShadowFrameType::kShadowFrameUnderConstruction); + const std::vector<int32_t> kinds(verifier.DescribeVRegs(dex_pc)); + + // Markers for dead values, used when the verifier knows a Dex register is undefined, + // or when the compiler knows the register has not been initialized, or is not used + // anymore in the method. + static constexpr uint32_t kDeadValue = 0xEBADDE09; + static constexpr uint64_t kLongDeadValue = 0xEBADDE09EBADDE09; + for (uint16_t reg = 0; reg < num_regs; ++reg) { + VRegKind kind = GetVRegKind(reg, kinds); + switch (kind) { + case kUndefined: new_frame->SetVReg(reg, kDeadValue); - } - break; - } - case kLongLoVReg: - if (GetVRegKind(reg + 1, kinds) == kLongHiVReg) { - // Treat it as a "long" register pair. - uint64_t value = 0; - if (GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &value)) { - new_frame->SetVRegLong(reg, value); - } else { - new_frame->SetVRegLong(reg, kLongDeadValue); - } - } else { + break; + case kConstant: + new_frame->SetVReg(reg, kinds.at((reg * 2) + 1)); + break; + case kReferenceVReg: { uint32_t value = 0; - if (GetVReg(m, reg, kind, &value)) { - new_frame->SetVReg(reg, value); + // Check IsReferenceVReg in case the compiled GC map doesn't agree with the verifier. + // We don't want to copy a stale reference into the shadow frame as a reference. + // b/20736048 + if (GetVReg(m, reg, kind, &value) && IsReferenceVReg(m, reg)) { + new_frame->SetVRegReference(reg, reinterpret_cast<mirror::Object*>(value)); } else { new_frame->SetVReg(reg, kDeadValue); } + break; } - break; - case kLongHiVReg: - if (GetVRegKind(reg - 1, kinds) == kLongLoVReg) { - // Nothing to do: we treated it as a "long" register pair. - } else { - uint32_t value = 0; - if (GetVReg(m, reg, kind, &value)) { - new_frame->SetVReg(reg, value); + case kLongLoVReg: + if (GetVRegKind(reg + 1, kinds) == kLongHiVReg) { + // Treat it as a "long" register pair. + uint64_t value = 0; + if (GetVRegPair(m, reg, kLongLoVReg, kLongHiVReg, &value)) { + new_frame->SetVRegLong(reg, value); + } else { + new_frame->SetVRegLong(reg, kLongDeadValue); + } } else { - new_frame->SetVReg(reg, kDeadValue); + uint32_t value = 0; + if (GetVReg(m, reg, kind, &value)) { + new_frame->SetVReg(reg, value); + } else { + new_frame->SetVReg(reg, kDeadValue); + } } - } - break; - case kDoubleLoVReg: - if (GetVRegKind(reg + 1, kinds) == kDoubleHiVReg) { - uint64_t value = 0; - if (GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &value)) { - // Treat it as a "double" register pair. - new_frame->SetVRegLong(reg, value); + break; + case kLongHiVReg: + if (GetVRegKind(reg - 1, kinds) == kLongLoVReg) { + // Nothing to do: we treated it as a "long" register pair. } else { - new_frame->SetVRegLong(reg, kLongDeadValue); + uint32_t value = 0; + if (GetVReg(m, reg, kind, &value)) { + new_frame->SetVReg(reg, value); + } else { + new_frame->SetVReg(reg, kDeadValue); + } } - } else { - uint32_t value = 0; - if (GetVReg(m, reg, kind, &value)) { - new_frame->SetVReg(reg, value); + break; + case kDoubleLoVReg: + if (GetVRegKind(reg + 1, kinds) == kDoubleHiVReg) { + uint64_t value = 0; + if (GetVRegPair(m, reg, kDoubleLoVReg, kDoubleHiVReg, &value)) { + // Treat it as a "double" register pair. + new_frame->SetVRegLong(reg, value); + } else { + new_frame->SetVRegLong(reg, kLongDeadValue); + } } else { - new_frame->SetVReg(reg, kDeadValue); + uint32_t value = 0; + if (GetVReg(m, reg, kind, &value)) { + new_frame->SetVReg(reg, value); + } else { + new_frame->SetVReg(reg, kDeadValue); + } } - } - break; - case kDoubleHiVReg: - if (GetVRegKind(reg - 1, kinds) == kDoubleLoVReg) { - // Nothing to do: we treated it as a "double" register pair. - } else { + break; + case kDoubleHiVReg: + if (GetVRegKind(reg - 1, kinds) == kDoubleLoVReg) { + // Nothing to do: we treated it as a "double" register pair. + } else { + uint32_t value = 0; + if (GetVReg(m, reg, kind, &value)) { + new_frame->SetVReg(reg, value); + } else { + new_frame->SetVReg(reg, kDeadValue); + } + } + break; + default: uint32_t value = 0; if (GetVReg(m, reg, kind, &value)) { new_frame->SetVReg(reg, value); } else { new_frame->SetVReg(reg, kDeadValue); } - } - break; - default: - uint32_t value = 0; - if (GetVReg(m, reg, kind, &value)) { - new_frame->SetVReg(reg, value); - } else { - new_frame->SetVReg(reg, kDeadValue); - } - break; + break; + } } } if (prev_shadow_frame_ != nullptr) { prev_shadow_frame_->SetLink(new_frame); } else { - self_->SetDeoptimizationShadowFrame(new_frame); + // Will be popped after the long jump after DeoptimizeStack(), + // right before interpreter::EnterInterpreterFromDeoptimize(). + stacked_shadow_frame_pushed_ = true; + self_->PushStackedShadowFrame(new_frame, StackedShadowFrameType::kDeoptimizationShadowFrame); } - self_->ClearShadowFrameUnderConstruction(); prev_shadow_frame_ = new_frame; return true; } @@ -316,6 +328,7 @@ class DeoptimizeStackVisitor FINAL : public StackVisitor { Thread* const self_; QuickExceptionHandler* const exception_handler_; ShadowFrame* prev_shadow_frame_; + bool stacked_shadow_frame_pushed_; DISALLOW_COPY_AND_ASSIGN(DeoptimizeStackVisitor); }; diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index d341ee1017..8d84c35bd9 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -31,7 +31,7 @@ namespace art { template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup> inline MirrorType* ReadBarrier::Barrier( mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) { - const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; + constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; if (with_read_barrier && kUseBakerReadBarrier) { // The higher bits of the rb ptr, rb_ptr_high_bits (must be zero) // is used to create artificial data dependency from the is_gray diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 65ea77ad29..66ec7ccf7a 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -22,6 +22,8 @@ #include <linux/fs.h> #endif +#define ATRACE_TAG ATRACE_TAG_DALVIK +#include <cutils/trace.h> #include <signal.h> #include <sys/syscall.h> #include <valgrind.h> @@ -400,6 +402,7 @@ void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) { GetInternTable()->SweepInternTableWeaks(visitor, arg); GetMonitorList()->SweepMonitorList(visitor, arg); GetJavaVM()->SweepJniWeakGlobals(visitor, arg); + GetHeap()->SweepAllocationRecords(visitor, arg); } bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) { @@ -492,8 +495,12 @@ bool Runtime::Start() { ScopedObjectAccess soa(self); gc::space::ImageSpace* image_space = heap_->GetImageSpace(); if (image_space != nullptr) { + ATRACE_BEGIN("AddImageStringsToTable"); GetInternTable()->AddImageStringsToTable(image_space); + ATRACE_END(); + ATRACE_BEGIN("MoveImageClassesToClassTable"); GetClassLinker()->MoveImageClassesToClassTable(); + ATRACE_END(); } } @@ -512,7 +519,9 @@ bool Runtime::Start() { // InitNativeMethods needs to be after started_ so that the classes // it touches will have methods linked to the oat file if necessary. + ATRACE_BEGIN("InitNativeMethods"); InitNativeMethods(); + ATRACE_END(); // Initialize well known thread group values that may be accessed threads while attaching. InitThreadGroups(self); @@ -533,7 +542,9 @@ bool Runtime::Start() { GetInstructionSetString(kRuntimeISA)); } + ATRACE_BEGIN("StartDaemonThreads"); StartDaemonThreads(); + ATRACE_END(); { ScopedObjectAccess soa(self); @@ -635,6 +646,10 @@ void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const ch // Create the thread pools. heap_->CreateThreadPool(); + // Reset the gc performance data at zygote fork so that the GCs + // before fork aren't attributed to an app. + heap_->ResetGcPerformanceInfo(); + if (jit_.get() == nullptr && jit_options_->UseJIT()) { // Create the JIT if the flag is set and we haven't already create it (happens for run-tests). CreateJit(); @@ -763,6 +778,7 @@ static size_t OpenDexFiles(const std::vector<std::string>& dex_filenames, } bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) { + ATRACE_BEGIN("Runtime::Init"); CHECK_EQ(sysconf(_SC_PAGE_SIZE), kPageSize); MemMap::Init(); @@ -773,6 +789,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) ParsedOptions::Create(raw_options, ignore_unrecognized, &runtime_options)); if (parsed_options.get() == nullptr) { LOG(ERROR) << "Failed to parse options"; + ATRACE_END(); return false; } VLOG(startup) << "Runtime::Init -verbose:startup enabled"; @@ -826,6 +843,7 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots); XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption); + ATRACE_BEGIN("CreateHeap"); heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize), runtime_options.GetOrDefault(Opt::HeapGrowthLimit), runtime_options.GetOrDefault(Opt::HeapMinFree), @@ -855,9 +873,11 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) xgc_option.verify_post_gc_rosalloc_, runtime_options.GetOrDefault(Opt::EnableHSpaceCompactForOOM), runtime_options.GetOrDefault(Opt::HSpaceCompactForOOMMinIntervalsMs)); + ATRACE_END(); if (heap_->GetImageSpace() == nullptr && !allow_dex_file_fallback_) { LOG(ERROR) << "Dex file fallback disabled, cannot continue without image."; + ATRACE_END(); return false; } @@ -957,7 +977,9 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U); class_linker_ = new ClassLinker(intern_table_); if (GetHeap()->HasImageSpace()) { + ATRACE_BEGIN("InitFromImage"); class_linker_->InitFromImage(); + ATRACE_END(); if (kIsDebugBuild) { GetHeap()->GetImageSpace()->VerifyImageAllocations(); } @@ -1090,6 +1112,8 @@ bool Runtime::Init(const RuntimeOptions& raw_options, bool ignore_unrecognized) VLOG(startup) << "Runtime::Init exiting"; + ATRACE_END(); + return true; } @@ -1452,6 +1476,11 @@ void Runtime::DisallowNewSystemWeaks() { monitor_list_->DisallowNewMonitors(); intern_table_->DisallowNewInterns(); java_vm_->DisallowNewWeakGlobals(); + // TODO: add a similar call for heap.allocation_records_, otherwise some of the newly allocated + // objects that are not marked might be swept from the records, making the records incomplete. + // It is safe for now since the only effect is that those objects do not have allocation records. + // The number of such objects should be small, and current allocation tracker cannot collect + // allocation records for all objects anyway. } void Runtime::AllowNewSystemWeaks() { diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 922334ee99..4a307d5ed6 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -30,6 +30,8 @@ // If a default value is omitted here, T{} is used as the default value, which is // almost-always the value of the type as if it was memset to all 0. // +// Please keep the columns aligned if possible when adding new rows. +// // Parse-able keys from the command line. RUNTIME_OPTIONS_KEY (Unit, Zygote) @@ -64,9 +66,9 @@ RUNTIME_OPTIONS_KEY (Unit, IgnoreMaxFootprint) RUNTIME_OPTIONS_KEY (Unit, LowMemoryMode) RUNTIME_OPTIONS_KEY (bool, UseTLAB, (kUseTlab || kUseReadBarrier)) RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true) -RUNTIME_OPTIONS_KEY (bool, UseJIT, false) -RUNTIME_OPTIONS_KEY (unsigned int, JITCompileThreshold, jit::Jit::kDefaultCompileThreshold) -RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheCapacity, jit::JitCodeCache::kDefaultCapacity) +RUNTIME_OPTIONS_KEY (bool, UseJIT, false) +RUNTIME_OPTIONS_KEY (unsigned int, JITCompileThreshold, jit::Jit::kDefaultCompileThreshold) +RUNTIME_OPTIONS_KEY (MemoryKiB, JITCodeCacheCapacity, jit::JitCodeCache::kDefaultCapacity) RUNTIME_OPTIONS_KEY (MillisecondsToNanoseconds, \ HSpaceCompactForOOMMinIntervalsMs,\ MsToNs(100 * 1000)) // 100s @@ -105,9 +107,12 @@ RUNTIME_OPTIONS_KEY (std::vector<std::string>, \ ImageCompilerOptions) // -Ximage-compiler-option ... RUNTIME_OPTIONS_KEY (bool, Verify, true) RUNTIME_OPTIONS_KEY (std::string, NativeBridge) +RUNTIME_OPTIONS_KEY (unsigned int, ZygoteMaxFailedBoots, 10) +RUNTIME_OPTIONS_KEY (Unit, NoDexFileFallback) RUNTIME_OPTIONS_KEY (std::string, CpuAbiList) // Not parse-able from command line, but can be provided explicitly. +// (Do not add anything here that is defined in ParsedOptions::MakeParser) RUNTIME_OPTIONS_KEY (const std::vector<const DexFile*>*, \ BootClassPathDexList) // TODO: make unique_ptr RUNTIME_OPTIONS_KEY (InstructionSet, ImageInstructionSet, kRuntimeISA) @@ -120,7 +125,5 @@ RUNTIME_OPTIONS_KEY (void (*)(int32_t status), \ // We don't call abort(3) by default; see // Runtime::Abort. RUNTIME_OPTIONS_KEY (void (*)(), HookAbort, nullptr) -RUNTIME_OPTIONS_KEY (unsigned int, ZygoteMaxFailedBoots, 10) -RUNTIME_OPTIONS_KEY (Unit, NoDexFileFallback) #undef RUNTIME_OPTIONS_KEY diff --git a/runtime/stack.cc b/runtime/stack.cc index 5aeca98a88..11c94dbbb8 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -126,10 +126,7 @@ ArtMethod* StackVisitor::GetMethod() const { if (IsInInlinedFrame()) { size_t depth_in_stack_map = current_inlining_depth_ - 1; InlineInfo inline_info = GetCurrentInlineInfo(); - uint32_t method_index = inline_info.GetMethodIndexAtDepth(depth_in_stack_map); - InvokeType invoke_type = - static_cast<InvokeType>(inline_info.GetInvokeTypeAtDepth(depth_in_stack_map)); - return GetResolvedMethod(*GetCurrentQuickFrame(), method_index, invoke_type); + return GetResolvedMethod(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map); } else { return *cur_quick_frame_; } diff --git a/runtime/stack.h b/runtime/stack.h index 79d2f40d73..d60714f7a3 100644 --- a/runtime/stack.h +++ b/runtime/stack.h @@ -95,6 +95,8 @@ class ShadowFrame { } ~ShadowFrame() {} + // TODO(iam): Clean references array up since they're always there, + // we don't need to do conditionals. bool HasReferenceArray() const { return true; } @@ -149,6 +151,9 @@ class ShadowFrame { return *reinterpret_cast<unaligned_double*>(vreg); } + // Look up the reference given its virtual register number. + // If this returns non-null then this does not mean the vreg is currently a reference + // on non-moving collectors. Check that the raw reg with GetVReg is equal to this if not certain. template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> mirror::Object* GetVRegReference(size_t i) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK_LT(i, NumberOfVRegs()); @@ -283,6 +288,8 @@ class ShadowFrame { ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method, uint32_t dex_pc, bool has_reference_array) : number_of_vregs_(num_vregs), link_(link), method_(method), dex_pc_(dex_pc) { + // TODO(iam): Remove this parameter, it's an an artifact of portable removal + DCHECK(has_reference_array); if (has_reference_array) { memset(vregs_, 0, num_vregs * (sizeof(uint32_t) + sizeof(StackReference<mirror::Object>))); } else { @@ -306,6 +313,15 @@ class ShadowFrame { ShadowFrame* link_; ArtMethod* method_; uint32_t dex_pc_; + + // This is a two-part array: + // - [0..number_of_vregs) holds the raw virtual registers, and each element here is always 4 + // bytes. + // - [number_of_vregs..number_of_vregs*2) holds only reference registers. Each element here is + // ptr-sized. + // In other words when a primitive is stored in vX, the second (reference) part of the array will + // be null. When a reference is stored in vX, the second (reference) part of the array will be a + // copy of vX. uint32_t vregs_[0]; DISALLOW_IMPLICIT_CONSTRUCTORS(ShadowFrame); diff --git a/runtime/thread.cc b/runtime/thread.cc index 4203b96f24..fe8b0d8c60 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -147,29 +147,82 @@ void Thread::ResetQuickAllocEntryPointsForThread() { ResetQuickAllocEntryPoints(&tlsPtr_.quick_entrypoints); } -void Thread::SetDeoptimizationShadowFrame(ShadowFrame* sf) { - tlsPtr_.deoptimization_shadow_frame = sf; -} +class DeoptimizationReturnValueRecord { + public: + DeoptimizationReturnValueRecord(const JValue& ret_val, + bool is_reference, + DeoptimizationReturnValueRecord* link) + : ret_val_(ret_val), is_reference_(is_reference), link_(link) {} + + JValue GetReturnValue() const { return ret_val_; } + bool IsReference() const { return is_reference_; } + DeoptimizationReturnValueRecord* GetLink() const { return link_; } + mirror::Object** GetGCRoot() { + DCHECK(is_reference_); + return ret_val_.GetGCRoot(); + } + + private: + JValue ret_val_; + const bool is_reference_; + DeoptimizationReturnValueRecord* const link_; + + DISALLOW_COPY_AND_ASSIGN(DeoptimizationReturnValueRecord); +}; + +class StackedShadowFrameRecord { + public: + StackedShadowFrameRecord(ShadowFrame* shadow_frame, + StackedShadowFrameType type, + StackedShadowFrameRecord* link) + : shadow_frame_(shadow_frame), + type_(type), + link_(link) {} -void Thread::SetDeoptimizationReturnValue(const JValue& ret_val) { - tls64_.deoptimization_return_value.SetJ(ret_val.GetJ()); + ShadowFrame* GetShadowFrame() const { return shadow_frame_; } + StackedShadowFrameType GetType() const { return type_; } + StackedShadowFrameRecord* GetLink() const { return link_; } + + private: + ShadowFrame* const shadow_frame_; + const StackedShadowFrameType type_; + StackedShadowFrameRecord* const link_; + + DISALLOW_COPY_AND_ASSIGN(StackedShadowFrameRecord); +}; + +void Thread::PushAndClearDeoptimizationReturnValue() { + DeoptimizationReturnValueRecord* record = new DeoptimizationReturnValueRecord( + tls64_.deoptimization_return_value, + tls32_.deoptimization_return_value_is_reference, + tlsPtr_.deoptimization_return_value_stack); + tlsPtr_.deoptimization_return_value_stack = record; + ClearDeoptimizationReturnValue(); } -ShadowFrame* Thread::GetAndClearDeoptimizationShadowFrame(JValue* ret_val) { - ShadowFrame* sf = tlsPtr_.deoptimization_shadow_frame; - tlsPtr_.deoptimization_shadow_frame = nullptr; - ret_val->SetJ(tls64_.deoptimization_return_value.GetJ()); - return sf; +JValue Thread::PopDeoptimizationReturnValue() { + DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack; + DCHECK(record != nullptr); + tlsPtr_.deoptimization_return_value_stack = record->GetLink(); + JValue ret_val(record->GetReturnValue()); + delete record; + return ret_val; } -void Thread::SetShadowFrameUnderConstruction(ShadowFrame* sf) { - sf->SetLink(tlsPtr_.shadow_frame_under_construction); - tlsPtr_.shadow_frame_under_construction = sf; +void Thread::PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type) { + StackedShadowFrameRecord* record = new StackedShadowFrameRecord( + sf, type, tlsPtr_.stacked_shadow_frame_record); + tlsPtr_.stacked_shadow_frame_record = record; } -void Thread::ClearShadowFrameUnderConstruction() { - CHECK_NE(static_cast<ShadowFrame*>(nullptr), tlsPtr_.shadow_frame_under_construction); - tlsPtr_.shadow_frame_under_construction = tlsPtr_.shadow_frame_under_construction->GetLink(); +ShadowFrame* Thread::PopStackedShadowFrame(StackedShadowFrameType type) { + StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; + DCHECK(record != nullptr); + DCHECK_EQ(record->GetType(), type); + tlsPtr_.stacked_shadow_frame_record = record->GetLink(); + ShadowFrame* shadow_frame = record->GetShadowFrame(); + delete record; + return shadow_frame; } void Thread::InitTid() { @@ -2387,21 +2440,27 @@ void Thread::VisitRoots(RootVisitor* visitor) { if (tlsPtr_.debug_invoke_req != nullptr) { tlsPtr_.debug_invoke_req->VisitRoots(visitor, RootInfo(kRootDebugger, thread_id)); } - if (tlsPtr_.deoptimization_shadow_frame != nullptr) { + if (tlsPtr_.stacked_shadow_frame_record != nullptr) { RootCallbackVisitor visitor_to_callback(visitor, thread_id); ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback); - for (ShadowFrame* shadow_frame = tlsPtr_.deoptimization_shadow_frame; shadow_frame != nullptr; - shadow_frame = shadow_frame->GetLink()) { - mapper.VisitShadowFrame(shadow_frame); + for (StackedShadowFrameRecord* record = tlsPtr_.stacked_shadow_frame_record; + record != nullptr; + record = record->GetLink()) { + for (ShadowFrame* shadow_frame = record->GetShadowFrame(); + shadow_frame != nullptr; + shadow_frame = shadow_frame->GetLink()) { + mapper.VisitShadowFrame(shadow_frame); + } } } - if (tlsPtr_.shadow_frame_under_construction != nullptr) { - RootCallbackVisitor visitor_to_callback(visitor, thread_id); - ReferenceMapVisitor<RootCallbackVisitor> mapper(this, nullptr, visitor_to_callback); - for (ShadowFrame* shadow_frame = tlsPtr_.shadow_frame_under_construction; - shadow_frame != nullptr; - shadow_frame = shadow_frame->GetLink()) { - mapper.VisitShadowFrame(shadow_frame); + if (tlsPtr_.deoptimization_return_value_stack != nullptr) { + for (DeoptimizationReturnValueRecord* record = tlsPtr_.deoptimization_return_value_stack; + record != nullptr; + record = record->GetLink()) { + if (record->IsReference()) { + visitor->VisitRootIfNonNull(record->GetGCRoot(), + RootInfo(kRootThreadObject, thread_id)); + } } } for (auto* verifier = tlsPtr_.method_verifier; verifier != nullptr; verifier = verifier->link_) { @@ -2519,12 +2578,11 @@ void Thread::SetDebugInvokeReq(DebugInvokeReq* req) { } void Thread::ClearDebugInvokeReq() { - CHECK(Dbg::IsDebuggerActive()); CHECK(GetInvokeReq() != nullptr) << "Debug invoke req not active in thread " << *this; CHECK(Thread::Current() == this) << "Debug invoke must be finished by the thread itself"; - // We do not own the DebugInvokeReq* so we must not delete it, it is the responsibility of - // the owner (the JDWP thread). + DebugInvokeReq* req = tlsPtr_.debug_invoke_req; tlsPtr_.debug_invoke_req = nullptr; + delete req; } void Thread::PushVerifier(verifier::MethodVerifier* verifier) { diff --git a/runtime/thread.h b/runtime/thread.h index 3f0d0a59a9..0e71c08b07 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -74,6 +74,7 @@ class ClassLinker; class Closure; class Context; struct DebugInvokeReq; +class DeoptimizationReturnValueRecord; class DexFile; class JavaVMExt; struct JNIEnvExt; @@ -82,6 +83,7 @@ class Runtime; class ScopedObjectAccessAlreadyRunnable; class ShadowFrame; class SingleStepControl; +class StackedShadowFrameRecord; class Thread; class ThreadList; @@ -99,6 +101,11 @@ enum ThreadFlag { kCheckpointRequest = 2 // Request that the thread do some checkpoint work and then continue. }; +enum class StackedShadowFrameType { + kShadowFrameUnderConstruction, + kDeoptimizationShadowFrame +}; + static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34; // Thread's stack layout for implicit stack overflow checks: @@ -774,15 +781,14 @@ class Thread { void DeactivateSingleStepControl(); // Sets debug invoke request for debugging. When the thread is resumed, - // it executes the method described by this request then suspends itself. - // The thread does not take ownership of the given DebugInvokeReq*, it is - // owned by the JDWP thread which is waiting for the execution of the - // method. + // it executes the method described by this request then sends the reply + // before suspending itself. The thread takes the ownership of the given + // DebugInvokeReq*. It is deleted by a call to ClearDebugInvokeReq. void SetDebugInvokeReq(DebugInvokeReq* req); // Clears debug invoke request for debugging. When the thread completes - // method invocation, it clears its debug invoke request, signals the - // JDWP thread and suspends itself. + // method invocation, it deletes its debug invoke request and suspends + // itself. void ClearDebugInvokeReq(); // Returns the fake exception used to activate deoptimization. @@ -790,21 +796,25 @@ class Thread { return reinterpret_cast<mirror::Throwable*>(-1); } - void SetDeoptimizationShadowFrame(ShadowFrame* sf); - void SetDeoptimizationReturnValue(const JValue& ret_val); - - ShadowFrame* GetAndClearDeoptimizationShadowFrame(JValue* ret_val); - - bool HasDeoptimizationShadowFrame() const { - return tlsPtr_.deoptimization_shadow_frame != nullptr; + // Currently deoptimization invokes verifier which can trigger class loading + // and execute Java code, so there might be nested deoptimizations happening. + // We need to save the ongoing deoptimization shadow frames and return + // values on stacks. + void SetDeoptimizationReturnValue(const JValue& ret_val, bool is_reference) { + tls64_.deoptimization_return_value.SetJ(ret_val.GetJ()); + tls32_.deoptimization_return_value_is_reference = is_reference; } - - void SetShadowFrameUnderConstruction(ShadowFrame* sf); - void ClearShadowFrameUnderConstruction(); - - bool HasShadowFrameUnderConstruction() const { - return tlsPtr_.shadow_frame_under_construction != nullptr; + bool IsDeoptimizationReturnValueReference() { + return tls32_.deoptimization_return_value_is_reference; + } + void ClearDeoptimizationReturnValue() { + tls64_.deoptimization_return_value.SetJ(0); + tls32_.deoptimization_return_value_is_reference = false; } + void PushAndClearDeoptimizationReturnValue(); + JValue PopDeoptimizationReturnValue(); + void PushStackedShadowFrame(ShadowFrame* sf, StackedShadowFrameType type); + ShadowFrame* PopStackedShadowFrame(StackedShadowFrameType type); std::deque<instrumentation::InstrumentationStackFrame>* GetInstrumentationStack() { return tlsPtr_.instrumentation_stack; @@ -1048,7 +1058,8 @@ class Thread { explicit tls_32bit_sized_values(bool is_daemon) : suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0), daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0), - thread_exit_check_count(0), handling_signal_(false), suspended_at_suspend_check(false), + thread_exit_check_count(0), handling_signal_(false), + deoptimization_return_value_is_reference(false), suspended_at_suspend_check(false), ready_for_debug_invoke(false), debug_method_entry_(false) { } @@ -1089,6 +1100,10 @@ class Thread { // True if signal is being handled by this thread. bool32_t handling_signal_; + // True if the return value for interpreter after deoptimization is a reference. + // For gc purpose. + bool32_t deoptimization_return_value_is_reference; + // True if the thread is suspended in FullSuspendCheck(). This is // used to distinguish runnable threads that are suspended due to // a normal suspend check from other threads. @@ -1124,8 +1139,9 @@ class Thread { stack_trace_sample(nullptr), wait_next(nullptr), monitor_enter_object(nullptr), top_handle_scope(nullptr), class_loader_override(nullptr), long_jump_context(nullptr), instrumentation_stack(nullptr), debug_invoke_req(nullptr), single_step_control(nullptr), - deoptimization_shadow_frame(nullptr), shadow_frame_under_construction(nullptr), name(nullptr), - pthread_self(0), last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr), + stacked_shadow_frame_record(nullptr), deoptimization_return_value_stack(nullptr), + name(nullptr), pthread_self(0), + last_no_thread_suspension_cause(nullptr), thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_objects(0), thread_local_alloc_stack_top(nullptr), thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr), flip_function(nullptr), method_verifier(nullptr) { @@ -1201,11 +1217,13 @@ class Thread { // JDWP single-stepping support. SingleStepControl* single_step_control; - // Shadow frame stack that is used temporarily during the deoptimization of a method. - ShadowFrame* deoptimization_shadow_frame; + // For gc purpose, a shadow frame record stack that keeps track of: + // 1) shadow frames under construction. + // 2) deoptimization shadow frames. + StackedShadowFrameRecord* stacked_shadow_frame_record; - // Shadow frame stack that is currently under construction but not yet on the stack - ShadowFrame* shadow_frame_under_construction; + // Deoptimization return value record stack. + DeoptimizationReturnValueRecord* deoptimization_return_value_stack; // A cached copy of the java.lang.Thread's name. std::string* name; @@ -1293,7 +1311,25 @@ class ScopedAssertNoThreadSuspension { const char* const old_cause_; }; +class ScopedStackedShadowFramePusher { + public: + ScopedStackedShadowFramePusher(Thread* self, ShadowFrame* sf, StackedShadowFrameType type) + : self_(self), type_(type) { + self_->PushStackedShadowFrame(sf, type); + } + ~ScopedStackedShadowFramePusher() { + self_->PopStackedShadowFrame(type_); + } + + private: + Thread* const self_; + const StackedShadowFrameType type_; + + DISALLOW_COPY_AND_ASSIGN(ScopedStackedShadowFramePusher); +}; + std::ostream& operator<<(std::ostream& os, const Thread& thread); +std::ostream& operator<<(std::ostream& os, const StackedShadowFrameType& thread); } // namespace art diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index af9ba6848b..b697b43a77 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -875,31 +875,36 @@ void ThreadList::SuspendSelfForDebugger() { // The debugger thread must not suspend itself due to debugger activity! Thread* debug_thread = Dbg::GetDebugThread(); - CHECK(debug_thread != nullptr); CHECK(self != debug_thread); CHECK_NE(self->GetState(), kRunnable); Locks::mutator_lock_->AssertNotHeld(self); - { + // The debugger may have detached while we were executing an invoke request. In that case, we + // must not suspend ourself. + DebugInvokeReq* pReq = self->GetInvokeReq(); + const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive()); + if (!skip_thread_suspension) { // Collisions with other suspends aren't really interesting. We want // to ensure that we're the only one fiddling with the suspend count // though. MutexLock mu(self, *Locks::thread_suspend_count_lock_); self->ModifySuspendCount(self, +1, true); CHECK_GT(self->GetSuspendCount(), 0); - } - VLOG(threads) << *self << " self-suspending (debugger)"; + VLOG(threads) << *self << " self-suspending (debugger)"; + } else { + // We must no longer be subject to debugger suspension. + MutexLock mu(self, *Locks::thread_suspend_count_lock_); + CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us"; - // Tell JDWP we've completed invocation and are ready to suspend. - DebugInvokeReq* const pReq = self->GetInvokeReq(); + VLOG(threads) << *self << " not self-suspending because debugger detached during invoke"; + } + + // If the debugger requested an invoke, we need to send the reply and clear the request. if (pReq != nullptr) { - // Clear debug invoke request before signaling. + Dbg::FinishInvokeMethod(pReq); self->ClearDebugInvokeReq(); - - VLOG(jdwp) << "invoke complete, signaling"; - MutexLock mu(self, pReq->lock); - pReq->cond.Signal(self); + pReq = nullptr; // object has been deleted, clear it for safety. } // Tell JDWP that we've completed suspension. The JDWP thread can't diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 9faaa4a57e..b86a7ee966 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -3318,7 +3318,10 @@ ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator( } if (method_type != METHOD_INTERFACE && !actual_arg_type.IsZero()) { const RegType* res_method_class; - if (res_method != nullptr) { + // Miranda methods have the declaring interface as their declaring class, not the abstract + // class. It would be wrong to use this for the type check (interface type checks are + // postponed to runtime). + if (res_method != nullptr && !res_method->IsMiranda()) { mirror::Class* klass = res_method->GetDeclaringClass(); std::string temp; res_method_class = ®_types_.FromClass(klass->GetDescriptor(&temp), klass, @@ -3369,11 +3372,27 @@ ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator( << " but expected " << reg_type; return nullptr; } - } else if (!work_line_->VerifyRegisterType(this, get_reg, reg_type)) { - // Continue on soft failures. We need to find possible hard failures to avoid problems in the - // compiler. - if (have_pending_hard_failure_) { - return nullptr; + } else { + if (!work_line_->VerifyRegisterType(this, get_reg, reg_type)) { + // Continue on soft failures. We need to find possible hard failures to avoid problems in + // the compiler. + if (have_pending_hard_failure_) { + return nullptr; + } + } else if (reg_type.IsLongOrDoubleTypes()) { + // Check that registers are consecutive (for non-range invokes). Invokes are the only + // instructions not specifying register pairs by the first component, but require them + // nonetheless. Only check when there's an actual register in the parameters. If there's + // none, this will fail below. + if (!is_range && sig_registers + 1 < expected_args) { + uint32_t second_reg = arg[sig_registers + 1]; + if (second_reg != get_reg + 1) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, long or double parameter " + "at index " << sig_registers << " is not a pair: " << get_reg << " + " + << second_reg << "."; + return nullptr; + } + } } } sig_registers += reg_type.IsLongOrDoubleTypes() ? 2 : 1; diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc index 3dbfe1b516..0c7cce908c 100644 --- a/runtime/well_known_classes.cc +++ b/runtime/well_known_classes.cc @@ -34,6 +34,7 @@ jclass WellKnownClasses::dalvik_system_DexFile; jclass WellKnownClasses::dalvik_system_DexPathList; jclass WellKnownClasses::dalvik_system_DexPathList__Element; jclass WellKnownClasses::dalvik_system_PathClassLoader; +jclass WellKnownClasses::dalvik_system_VMRuntime; jclass WellKnownClasses::java_lang_BootClassLoader; jclass WellKnownClasses::java_lang_ClassLoader; jclass WellKnownClasses::java_lang_ClassNotFoundException; @@ -63,6 +64,7 @@ jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_Chunk; jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer; jmethodID WellKnownClasses::com_android_dex_Dex_create; +jmethodID WellKnownClasses::dalvik_system_VMRuntime_runFinalization; jmethodID WellKnownClasses::java_lang_Boolean_valueOf; jmethodID WellKnownClasses::java_lang_Byte_valueOf; jmethodID WellKnownClasses::java_lang_Character_valueOf; @@ -209,6 +211,8 @@ void WellKnownClasses::Init(JNIEnv* env) { dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList"); dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element"); dalvik_system_PathClassLoader = CacheClass(env, "dalvik/system/PathClassLoader"); + dalvik_system_VMRuntime = CacheClass(env, "dalvik/system/VMRuntime"); + java_lang_BootClassLoader = CacheClass(env, "java/lang/BootClassLoader"); java_lang_ClassLoader = CacheClass(env, "java/lang/ClassLoader"); java_lang_ClassNotFoundException = CacheClass(env, "java/lang/ClassNotFoundException"); @@ -238,6 +242,7 @@ void WellKnownClasses::Init(JNIEnv* env) { org_apache_harmony_dalvik_ddmc_Chunk = CacheClass(env, "org/apache/harmony/dalvik/ddmc/Chunk"); org_apache_harmony_dalvik_ddmc_DdmServer = CacheClass(env, "org/apache/harmony/dalvik/ddmc/DdmServer"); + dalvik_system_VMRuntime_runFinalization = CacheMethod(env, dalvik_system_VMRuntime, true, "runFinalization", "(J)V"); com_android_dex_Dex_create = CacheMethod(env, com_android_dex_Dex, true, "create", "(Ljava/nio/ByteBuffer;)Lcom/android/dex/Dex;"); java_lang_ClassNotFoundException_init = CacheMethod(env, java_lang_ClassNotFoundException, false, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V"); java_lang_ClassLoader_loadClass = CacheMethod(env, java_lang_ClassLoader, false, "loadClass", "(Ljava/lang/String;)Ljava/lang/Class;"); @@ -364,7 +369,7 @@ void WellKnownClasses::Init(JNIEnv* env) { void WellKnownClasses::LateInit(JNIEnv* env) { ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime")); - java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"); + java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"); } mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) { diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h index d25d1c3f7d..66b9abece7 100644 --- a/runtime/well_known_classes.h +++ b/runtime/well_known_classes.h @@ -45,6 +45,7 @@ struct WellKnownClasses { static jclass dalvik_system_DexPathList; static jclass dalvik_system_DexPathList__Element; static jclass dalvik_system_PathClassLoader; + static jclass dalvik_system_VMRuntime; static jclass java_lang_BootClassLoader; static jclass java_lang_ClassLoader; static jclass java_lang_ClassNotFoundException; @@ -74,6 +75,7 @@ struct WellKnownClasses { static jclass org_apache_harmony_dalvik_ddmc_DdmServer; static jmethodID com_android_dex_Dex_create; + static jmethodID dalvik_system_VMRuntime_runFinalization; static jmethodID java_lang_Boolean_valueOf; static jmethodID java_lang_Byte_valueOf; static jmethodID java_lang_Character_valueOf; diff --git a/sigchainlib/Android.mk b/sigchainlib/Android.mk index e1aae11f06..11f44fec83 100644 --- a/sigchainlib/Android.mk +++ b/sigchainlib/Android.mk @@ -22,6 +22,7 @@ include $(CLEAR_VARS) LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) LOCAL_MODULE_TAGS := optional LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) +LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS) LOCAL_SRC_FILES := sigchain_dummy.cc LOCAL_CLANG = $(ART_TARGET_CLANG) LOCAL_MODULE:= libsigchain @@ -36,6 +37,7 @@ include $(CLEAR_VARS) LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) LOCAL_MODULE_TAGS := optional LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) +LOCAL_ASFLAGS += $(ART_TARGET_ASFLAGS) LOCAL_SRC_FILES := sigchain.cc LOCAL_CLANG = $(ART_TARGET_CLANG) LOCAL_MODULE:= libsigchain @@ -51,6 +53,7 @@ LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) LOCAL_MODULE_TAGS := optional LOCAL_IS_HOST_MODULE := true LOCAL_CFLAGS += $(ART_HOST_CFLAGS) +LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) LOCAL_CLANG = $(ART_HOST_CLANG) LOCAL_SRC_FILES := sigchain_dummy.cc LOCAL_MODULE:= libsigchain @@ -65,6 +68,7 @@ LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) LOCAL_MODULE_TAGS := optional LOCAL_IS_HOST_MODULE := true LOCAL_CFLAGS += $(ART_HOST_CFLAGS) +LOCAL_ASFLAGS += $(ART_HOST_ASFLAGS) LOCAL_CLANG = $(ART_HOST_CLANG) LOCAL_SRC_FILES := sigchain.cc LOCAL_MODULE:= libsigchain diff --git a/sigchainlib/version-script.txt b/sigchainlib/version-script.txt index ce1505490b..08c312e7e2 100644 --- a/sigchainlib/version-script.txt +++ b/sigchainlib/version-script.txt @@ -5,6 +5,7 @@ global: InvokeUserSignalHandler; InitializeSignalChain; EnsureFrontOfChain; + SetSpecialSignalHandlerFn; sigaction; signal; sigprocmask; diff --git a/test/004-NativeAllocations/src/Main.java b/test/004-NativeAllocations/src/Main.java index a99fe92081..92f4e21f40 100644 --- a/test/004-NativeAllocations/src/Main.java +++ b/test/004-NativeAllocations/src/Main.java @@ -19,6 +19,8 @@ import java.lang.Runtime; public class Main { static Object nativeLock = new Object(); + static Object deadlockLock = new Object(); + static boolean aboutToDeadlockLock = false; static int nativeBytes = 0; static Object runtime; static Method register_native_allocation; @@ -28,13 +30,15 @@ public class Main { static class NativeAllocation { private int bytes; - NativeAllocation(int bytes) throws Exception { + NativeAllocation(int bytes, boolean testingDeadlock) throws Exception { this.bytes = bytes; register_native_allocation.invoke(runtime, bytes); synchronized (nativeLock) { - nativeBytes += bytes; - if (nativeBytes > maxMem) { - throw new OutOfMemoryError(); + if (!testingDeadlock) { + nativeBytes += bytes; + if (nativeBytes > maxMem) { + throw new OutOfMemoryError(); + } } } } @@ -44,6 +48,9 @@ public class Main { nativeBytes -= bytes; } register_native_free.invoke(runtime, bytes); + aboutToDeadlockLock = true; + synchronized (deadlockLock) { + } } } @@ -59,7 +66,20 @@ public class Main { int allocation_count = 256; NativeAllocation[] allocations = new NativeAllocation[count]; for (int i = 0; i < allocation_count; ++i) { - allocations[i % count] = new NativeAllocation(size); + allocations[i % count] = new NativeAllocation(size, false); + } + // Test that we don't get a deadlock if we are holding nativeLock. If there is no timeout, + // then we will get a finalizer timeout exception. + aboutToDeadlockLock = false; + synchronized (deadlockLock) { + for (int i = 0; aboutToDeadlockLock != true; ++i) { + allocations[i % count] = new NativeAllocation(size, true); + } + // Do more allocations now that the finalizer thread is deadlocked so that we force + // finalization and timeout. + for (int i = 0; i < 10; ++i) { + allocations[i % count] = new NativeAllocation(size, true); + } } System.out.println("Test complete"); } diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java index f41ff2a94a..4914ba2289 100644 --- a/test/098-ddmc/src/Main.java +++ b/test/098-ddmc/src/Main.java @@ -43,14 +43,24 @@ public class Main { System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248"); final int overflowAllocations = 64 * 1024; // Won't fit in unsigned 16-bit value. + // TODO: Temporary fix. Keep the new objects live so they are not garbage collected. + // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of + // getRecentAllocations(). Working on restoring its old behaviour. b/20037135 + Object[] objects = new Object[overflowAllocations]; for (int i = 0; i < overflowAllocations; i++) { - new Object(); + objects[i] = new Object(); } Allocations after = new Allocations(DdmVmInternal.getRecentAllocations()); System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations)); System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries)); System.out.println("after.numberOfEntries=" + after.numberOfEntries); + // TODO: Temporary fix as above. b/20037135 + objects = null; + Runtime.getRuntime().gc(); + final int fillerStrings = 16 * 1024; + String[] strings = new String[fillerStrings]; + System.out.println("Disable and confirm back to empty"); DdmVmInternal.enableRecentAllocations(false); System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus()); @@ -66,8 +76,8 @@ public class Main { System.out.println("Confirm we can reenable twice in a row without losing allocations"); DdmVmInternal.enableRecentAllocations(true); System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus()); - for (int i = 0; i < 16 * 1024; i++) { - new String("fnord"); + for (int i = 0; i < fillerStrings; i++) { + strings[i] = new String("fnord"); } Allocations first = new Allocations(DdmVmInternal.getRecentAllocations()); DdmVmInternal.enableRecentAllocations(true); diff --git a/test/135-MirandaDispatch/expected.txt b/test/135-MirandaDispatch/expected.txt index 134d8d0b47..5b098e5fac 100644 --- a/test/135-MirandaDispatch/expected.txt +++ b/test/135-MirandaDispatch/expected.txt @@ -1 +1,2 @@ +b/21646347 Finishing diff --git a/test/135-MirandaDispatch/smali/b_21646347.smali b/test/135-MirandaDispatch/smali/b_21646347.smali new file mode 100644 index 0000000000..b4979a5357 --- /dev/null +++ b/test/135-MirandaDispatch/smali/b_21646347.smali @@ -0,0 +1,15 @@ +.class public LB21646347; + +# If an invoke-virtual dispatches to a miranda method, ensure that we test for the receiver +# being a subclass of the abstract class, not postpone the check because the miranda method's +# declaring class is an interface. + +.super Ljava/lang/Object; + +.method public static run(LB21646347;)V + .registers 1 + # Invoke the miranda method on an object of this class. This should fail type-checking, + # instead of letting this pass as the declaring class is an interface. + invoke-virtual {v0}, LMain$AbstractClass;->m()V + return-void +.end method diff --git a/test/135-MirandaDispatch/src/Main.java b/test/135-MirandaDispatch/src/Main.java index bb005b0103..ada8cefead 100644 --- a/test/135-MirandaDispatch/src/Main.java +++ b/test/135-MirandaDispatch/src/Main.java @@ -46,6 +46,15 @@ public class Main { if (counter != loopIterations * loopIterations) { System.out.println("Expected " + loopIterations * loopIterations + " got " + counter); } + + try { + Class<?> b21646347 = Class.forName("B21646347"); + throw new RuntimeException("Expected a VerifyError"); + } catch (VerifyError expected) { + System.out.println("b/21646347"); + } catch (Throwable t) { + t.printStackTrace(); + } System.out.println("Finishing"); } } diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc index b2d7e55214..601fbaaea8 100644 --- a/test/137-cfi/cfi.cc +++ b/test/137-cfi/cfi.cc @@ -29,6 +29,9 @@ #include "base/logging.h" #include "base/macros.h" +#include "gc/heap.h" +#include "gc/space/image_space.h" +#include "oat_file.h" #include "utils.h" namespace art { @@ -73,18 +76,45 @@ static bool CheckStack(Backtrace* bt, const std::vector<std::string>& seq) { } } + printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str()); + for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) { + if (BacktraceMap::IsValid(it->map)) { + printf(" %s\n", it->func_name.c_str()); + } + } + return false; } #endif +// Currently we have to fall back to our own loader for the boot image when it's compiled PIC +// because its base is zero. Thus in-process unwinding through it won't work. This is a helper +// detecting this. +#if __linux__ +static bool IsPicImage() { + gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); + CHECK(image_space != nullptr); // We should be running with an image. + const OatFile* oat_file = image_space->GetOatFile(); + CHECK(oat_file != nullptr); // We should have an oat file to go with the image. + return oat_file->IsPic(); +} +#endif + extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject, jint, jboolean) { #if __linux__ + if (IsPicImage()) { + LOG(INFO) << "Image is pic, in-process unwinding check bypassed."; + return JNI_TRUE; + } + // TODO: What to do on Valgrind? std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid())); if (!bt->Unwind(0, nullptr)) { + printf("Can not unwind in process.\n"); return JNI_FALSE; } else if (bt->NumFrames() == 0) { + printf("No frames for unwind in process.\n"); return JNI_FALSE; } @@ -94,6 +124,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject std::vector<std::string> seq = { "Java_Main_unwindInProcess", // This function. "boolean Main.unwindInProcess(int, boolean)", // The corresponding Java native method frame. + "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)", // Framework method. "void Main.main(java.lang.String[])" // The Java entry method. }; @@ -155,6 +186,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj if (ptrace(PTRACE_ATTACH, pid, 0, 0)) { // Were not able to attach, bad. + printf("Failed to attach to other process.\n"); PLOG(ERROR) << "Failed to attach."; kill(pid, SIGCONT); return JNI_FALSE; @@ -172,8 +204,10 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD)); bool result = true; if (!bt->Unwind(0, nullptr)) { + printf("Can not unwind other process.\n"); result = false; } else if (bt->NumFrames() == 0) { + printf("No frames for unwind of other process.\n"); result = false; } @@ -185,6 +219,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj // Note: For some reason, the name isn't // resolved, so don't look for it right now. "boolean Main.sleep(int, boolean, double)", // The corresponding Java native method frame. + "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)", // Framework method. "void Main.main(java.lang.String[])" // The Java entry method. }; diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java index e184e66e6f..658ba53099 100644 --- a/test/137-cfi/src/Main.java +++ b/test/137-cfi/src/Main.java @@ -20,8 +20,10 @@ import java.io.InputStream; import java.io.OutputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.Comparator; -public class Main { +public class Main implements Comparator<Main> { // Whether to test local unwinding. Libunwind uses linker info to find executables. As we do // not dlopen at the moment, this doesn't work, so keep it off for now. public final static boolean TEST_LOCAL_UNWINDING = false; @@ -32,6 +34,8 @@ public class Main { private boolean secondary; + private boolean passed; + public Main(boolean secondary) { this.secondary = secondary; } @@ -60,13 +64,13 @@ public class Main { } private void runSecondary() { - foo(true); + foo(); throw new RuntimeException("Didn't expect to get back..."); } private void runPrimary() { // First do the in-process unwinding. - if (TEST_LOCAL_UNWINDING && !foo(false)) { + if (TEST_LOCAL_UNWINDING && !foo()) { System.out.println("Unwinding self failed."); } @@ -134,8 +138,19 @@ public class Main { } } - public boolean foo(boolean b) { - return bar(b); + public boolean foo() { + // Call bar via Arrays.binarySearch. + // This tests that we can unwind from framework code. + Main[] array = { this, this, this }; + Arrays.binarySearch(array, 0, 3, this /* value */, this /* comparator */); + return passed; + } + + public int compare(Main lhs, Main rhs) { + passed = bar(secondary); + // Returning "equal" ensures that we terminate search + // after first item and thus call bar() only once. + return 0; } public boolean bar(boolean b) { diff --git a/test/441-checker-inliner/src/Main.java b/test/441-checker-inliner/src/Main.java index df969a488e..3899d7fb26 100644 --- a/test/441-checker-inliner/src/Main.java +++ b/test/441-checker-inliner/src/Main.java @@ -19,7 +19,7 @@ public class Main { /// CHECK-START: void Main.InlineVoid() inliner (before) /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 /// CHECK-DAG: InvokeStaticOrDirect - /// CHECK-DAG: InvokeStaticOrDirect [<<Const42>>] + /// CHECK-DAG: InvokeStaticOrDirect [<<Const42>>,{{[ij]\d+}}] /// CHECK-START: void Main.InlineVoid() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect @@ -31,7 +31,7 @@ public class Main { /// CHECK-START: int Main.InlineParameter(int) inliner (before) /// CHECK-DAG: <<Param:i\d+>> ParameterValue - /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>] + /// CHECK-DAG: <<Result:i\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}] /// CHECK-DAG: Return [<<Result>>] /// CHECK-START: int Main.InlineParameter(int) inliner (after) @@ -44,7 +44,7 @@ public class Main { /// CHECK-START: long Main.InlineWideParameter(long) inliner (before) /// CHECK-DAG: <<Param:j\d+>> ParameterValue - /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>] + /// CHECK-DAG: <<Result:j\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}] /// CHECK-DAG: Return [<<Result>>] /// CHECK-START: long Main.InlineWideParameter(long) inliner (after) @@ -57,7 +57,7 @@ public class Main { /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (before) /// CHECK-DAG: <<Param:l\d+>> ParameterValue - /// CHECK-DAG: <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>] + /// CHECK-DAG: <<Result:l\d+>> InvokeStaticOrDirect [<<Param>>,{{[ij]\d+}}] /// CHECK-DAG: Return [<<Result>>] /// CHECK-START: java.lang.Object Main.InlineReferenceParameter(java.lang.Object) inliner (after) @@ -130,8 +130,8 @@ public class Main { /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 /// CHECK-DAG: <<Const3:i\d+>> IntConstant 3 /// CHECK-DAG: <<Const5:i\d+>> IntConstant 5 - /// CHECK-DAG: <<Add:i\d+>> InvokeStaticOrDirect [<<Const1>>,<<Const3>>] - /// CHECK-DAG: <<Sub:i\d+>> InvokeStaticOrDirect [<<Const5>>,<<Const3>>] + /// CHECK-DAG: <<Add:i\d+>> InvokeStaticOrDirect [<<Const1>>,<<Const3>>,{{[ij]\d+}}] + /// CHECK-DAG: <<Sub:i\d+>> InvokeStaticOrDirect [<<Const5>>,<<Const3>>,{{[ij]\d+}}] /// CHECK-DAG: <<Phi:i\d+>> Phi [<<Add>>,<<Sub>>] /// CHECK-DAG: Return [<<Phi>>] diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index 8960df896b..ed6fc1ee2b 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -617,15 +617,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo1(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo1(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -646,15 +652,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo2(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo2(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -675,14 +687,20 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo3(int[], int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo3(int[] array, int end) { // Two HDeoptimize will be added. One for end < array.length, @@ -694,6 +712,7 @@ public class Main { } } + /// CHECK-START: void Main.foo4(int[], int) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -701,14 +720,20 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo4(int[], int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo4(int[] array, int end) { // Two HDeoptimize will be added. One for end <= array.length, @@ -734,8 +759,6 @@ public class Main { /// CHECK-START: void Main.foo5(int[], int) BCE (after) /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet @@ -743,6 +766,15 @@ public class Main { /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + // array.length is defined before the loop header so no phi is needed. + /// CHECK-NOT: Phi + /// CHECK: Goto void foo5(int[] array, int end) { // Bounds check in this loop can be eliminated without deoptimization. @@ -774,10 +806,6 @@ public class Main { /// CHECK: ArraySet /// CHECK-START: void Main.foo6(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet @@ -791,6 +819,17 @@ public class Main { /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto + /// CHECK-NOT: Deoptimize void foo6(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -810,15 +849,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK: BoundsCheck /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo7(int[] array, int start, int end, boolean lowEnd) { // Three HDeoptimize will be added. One for @@ -837,6 +882,73 @@ public class Main { } + /// CHECK-START: void Main.foo8(int[][], int, int) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArrayGet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.foo8(int[][], int, int) BCE (after) + /// CHECK: Phi + /// CHECK-NOT: BoundsCheck + /// CHECK: ArrayGet + /// CHECK: Phi + /// CHECK-NOT: BoundsCheck + /// CHECK: ArraySet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto + + void foo8(int[][] matrix, int start, int end) { + // Three HDeoptimize will be added for the outer loop. + // start >= 0, end <= matrix.length, and null check on matrix. + // Three HDeoptimize will be added for the inner loop + // start >= 0 (TODO: this may be optimized away), + // end <= row.length, and null check on row. + for (int i = start; i < end; i++) { + int[] row = matrix[i]; + for (int j = start; j < end; j++) { + row[j] = 1; + } + } + } + + + /// CHECK-START: void Main.foo9(int[]) BCE (before) + /// CHECK: NullCheck + /// CHECK: BoundsCheck + /// CHECK: ArrayGet + + /// CHECK-START: void Main.foo9(int[]) BCE (after) + // The loop is guaranteed to be entered. No need to transform the + // loop for loop body entry test. + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Phi + /// CHECK-NOT: NullCheck + /// CHECK-NOT: BoundsCheck + /// CHECK: ArrayGet + + void foo9(int[] array) { + // Two HDeoptimize will be added. One for + // 10 <= array.length, and one for null check on array. + for (int i = 0 ; i < 10; i++) { + sum += array[i]; + } + } + + /// CHECK-START: void Main.partialLooping(int[], int, int) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -951,6 +1063,13 @@ public class Main { main.foo6(new int[10], 2, 7); main = new Main(); + int[] array9 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + main.foo9(array9); + if (main.sum != 45) { + System.out.println("foo9 failed!"); + } + + main = new Main(); int[] array = new int[4]; main.partialLooping(new int[3], 0, 4); if ((array[0] != 1) && (array[1] != 1) && diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java index 4056275d3d..9070627f1c 100644 --- a/test/450-checker-types/src/Main.java +++ b/test/450-checker-types/src/Main.java @@ -364,6 +364,37 @@ public class Main { ((SubclassA)b).$noinline$g(); } + public SubclassA $noinline$getSubclass() { throw new RuntimeException(); } + + /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testArraySimpleRemove() { + Super[] b = new SubclassA[10]; + SubclassA[] c = (SubclassA[])b; + } + + /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testInvokeSimpleRemove() { + Super b = $noinline$getSubclass(); + ((SubclassA)b).$noinline$g(); + } + /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testArrayGetSimpleRemove() { + Super[] a = new SubclassA[10]; + ((SubclassA)a[0]).$noinline$g(); + } + public static void main(String[] args) { } } diff --git a/test/478-checker-clinit-check-pruning/src/Main.java b/test/478-checker-clinit-check-pruning/src/Main.java index a2c98c9363..e6aab630f9 100644 --- a/test/478-checker-clinit-check-pruning/src/Main.java +++ b/test/478-checker-clinit-check-pruning/src/Main.java @@ -26,7 +26,7 @@ public class Main { /// CHECK-START: void Main.invokeStaticInlined() builder (after) /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false /// CHECK-DAG: <<ClinitCheck:l\d+>> ClinitCheck [<<LoadClass>>] - /// CHECK-DAG: InvokeStaticOrDirect [<<ClinitCheck>>] + /// CHECK-DAG: InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>] /// CHECK-START: void Main.invokeStaticInlined() inliner (after) /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false @@ -69,12 +69,12 @@ public class Main { /// CHECK-START: void Main.invokeStaticNotInlined() builder (after) /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false /// CHECK-DAG: <<ClinitCheck:l\d+>> ClinitCheck [<<LoadClass>>] - /// CHECK-DAG: InvokeStaticOrDirect [<<ClinitCheck>>] + /// CHECK-DAG: InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>] /// CHECK-START: void Main.invokeStaticNotInlined() inliner (after) /// CHECK-DAG: <<LoadClass:l\d+>> LoadClass gen_clinit_check:false /// CHECK-DAG: <<ClinitCheck:l\d+>> ClinitCheck [<<LoadClass>>] - /// CHECK-DAG: InvokeStaticOrDirect [<<ClinitCheck>>] + /// CHECK-DAG: InvokeStaticOrDirect [{{[ij]\d+}},<<ClinitCheck>>] // The following checks ensure the clinit check and load class // instructions added by the builder are pruned by the diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java index 5754723d8a..a4280de749 100644 --- a/test/482-checker-loop-back-edge-use/src/Main.java +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -36,8 +36,8 @@ public class Main { } /// CHECK-START: void Main.loop3(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,62)} uses:[58,62] - /// CHECK: Goto liveness:60 + /// CHECK: ParameterValue liveness:4 ranges:{[4,64)} uses:[60,64] + /// CHECK: Goto liveness:62 /// CHECK-START: void Main.loop3(boolean) liveness (after) /// CHECK-NOT: Goto liveness:56 @@ -63,9 +63,9 @@ public class Main { } /// CHECK-START: void Main.loop5(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,52)} uses:[35,44,48,52] - /// CHECK: Goto liveness:46 - /// CHECK: Goto liveness:50 + /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[37,46,50,54] + /// CHECK: Goto liveness:48 + /// CHECK: Goto liveness:52 public static void loop5(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { @@ -76,8 +76,8 @@ public class Main { } /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,48)} uses:[26,48] - /// CHECK: Goto liveness:46 + /// CHECK: ParameterValue liveness:4 ranges:{[4,50)} uses:[26,50] + /// CHECK: Goto liveness:48 /// CHECK-START: void Main.loop6(boolean) liveness (after) /// CHECK-NOT: Goto liveness:24 @@ -90,9 +90,9 @@ public class Main { } /// CHECK-START: void Main.loop7(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,52)} uses:[34,43,48,52] - /// CHECK: Goto liveness:46 - /// CHECK: Goto liveness:50 + /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[36,45,50,54] + /// CHECK: Goto liveness:48 + /// CHECK: Goto liveness:52 public static void loop7(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { @@ -102,9 +102,9 @@ public class Main { } /// CHECK-START: void Main.loop8() liveness (after) - /// CHECK: StaticFieldGet liveness:14 ranges:{[14,46)} uses:[37,42,46] - /// CHECK: Goto liveness:40 - /// CHECK: Goto liveness:44 + /// CHECK: StaticFieldGet liveness:14 ranges:{[14,48)} uses:[39,44,48] + /// CHECK: Goto liveness:42 + /// CHECK: Goto liveness:46 public static void loop8() { // 'incoming' must have a use at both back edges. boolean incoming = field; @@ -114,8 +114,8 @@ public class Main { } /// CHECK-START: void Main.loop9() liveness (after) - /// CHECK: StaticFieldGet liveness:24 ranges:{[24,38)} uses:[33,38] - /// CHECK: Goto liveness:40 + /// CHECK: StaticFieldGet liveness:26 ranges:{[26,40)} uses:[35,40] + /// CHECK: Goto liveness:42 public static void loop9() { while (Runtime.getRuntime() != null) { // 'incoming' must only have a use in the inner loop. diff --git a/test/491-current-method/expected.txt b/test/491-current-method/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/491-current-method/expected.txt diff --git a/test/491-current-method/info.txt b/test/491-current-method/info.txt new file mode 100644 index 0000000000..e9678da769 --- /dev/null +++ b/test/491-current-method/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing that used to +crash in the presence of slow paths with intrinsics. diff --git a/test/491-current-method/src/Main.java b/test/491-current-method/src/Main.java new file mode 100644 index 0000000000..87ef05218d --- /dev/null +++ b/test/491-current-method/src/Main.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class Main { + + // The code below is written in a way that will crash + // the generated code at the time of submission of this test. + // Therefore, changes to the register allocator may + // affect the reproducibility of the crash. + public static void $noinline$foo(int a, int b, int c) { + // The division on x86 will take EAX and EDX, leaving ECX + // to put the ART current method. + c = c / 42; + // We use the empty string for forcing the slow path. + // The slow path for charAt when it is intrinsified, will + // move the parameter to ECX, and therefore overwrite the ART + // current method. + "".charAt(c); + + // Do more things in the method to prevent inlining. + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + c = c / 42; + "".charAt(c); + } + + public static void main(String[] args) { + boolean didThrow = false; + try { + $noinline$foo(1, 2, 3); + } catch (Throwable e) { + didThrow = true; + } + + if (!didThrow) { + throw new Error("Expected an exception from charAt"); + } + } +} diff --git a/test/492-checker-inline-invoke-interface/expected.txt b/test/492-checker-inline-invoke-interface/expected.txt new file mode 100644 index 0000000000..b0014d7529 --- /dev/null +++ b/test/492-checker-inline-invoke-interface/expected.txt @@ -0,0 +1,5 @@ +Hello from clinit +java.lang.Exception + at ForceStatic.<clinit>(Main.java:24) + at Main.$inline$foo(Main.java:31) + at Main.main(Main.java:48) diff --git a/test/492-checker-inline-invoke-interface/info.txt b/test/492-checker-inline-invoke-interface/info.txt new file mode 100644 index 0000000000..4a0a5ff1d3 --- /dev/null +++ b/test/492-checker-inline-invoke-interface/info.txt @@ -0,0 +1 @@ +Checker test to ensure we can inline interface calls. diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java new file mode 100644 index 0000000000..9a4548542b --- /dev/null +++ b/test/492-checker-inline-invoke-interface/src/Main.java @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Itf { + public void $inline$foo(); +} + +class ForceStatic { + static { + System.out.println("Hello from clinit"); + new Exception().printStackTrace(); + } + static int field; +} + +public class Main implements Itf { + public void $inline$foo() { + int a = ForceStatic.field; + } + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (before) + /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeStaticOrDirect + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (before) + /// CHECK-NOT: ClinitCheck + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (after) + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (after) + /// CHECK: ClinitCheck + public static void main(String[] args) { + Itf itf = new Main(); + itf.$inline$foo(); + } +} diff --git a/test/493-checker-inline-invoke-interface/expected.txt b/test/493-checker-inline-invoke-interface/expected.txt new file mode 100644 index 0000000000..93620a6fb5 --- /dev/null +++ b/test/493-checker-inline-invoke-interface/expected.txt @@ -0,0 +1,5 @@ +Hello from clinit +java.lang.Exception + at ForceStatic.<clinit>(Main.java:24) + at Main.foo(Main.java:31) + at Main.main(Main.java:42) diff --git a/test/493-checker-inline-invoke-interface/info.txt b/test/493-checker-inline-invoke-interface/info.txt new file mode 100644 index 0000000000..bac9c82c9d --- /dev/null +++ b/test/493-checker-inline-invoke-interface/info.txt @@ -0,0 +1,2 @@ +Check that we can optimize interface calls without +requiring the verifier to sharpen them. diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java new file mode 100644 index 0000000000..44b727fe55 --- /dev/null +++ b/test/493-checker-inline-invoke-interface/src/Main.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Itf { + public void foo(); +} + +class ForceStatic { + static { + System.out.println("Hello from clinit"); + new Exception().printStackTrace(); + } + static int field; +} + +public class Main implements Itf { + public void foo() { + int a = ForceStatic.field; + } + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (before) + /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeInterface + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (after) + /// CHECK-NOT: Invoke{{.*}} + public static void main(String[] args) { + Itf itf = bar(); + itf.foo(); + } + + public static Itf bar() { + return new Main(); + } +} diff --git a/test/494-checker-instanceof-tests/expected.txt b/test/494-checker-instanceof-tests/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/494-checker-instanceof-tests/expected.txt diff --git a/test/494-checker-instanceof-tests/info.txt b/test/494-checker-instanceof-tests/info.txt new file mode 100644 index 0000000000..59e20bd6a9 --- /dev/null +++ b/test/494-checker-instanceof-tests/info.txt @@ -0,0 +1 @@ +Checker test for optimizations on instanceof. diff --git a/test/494-checker-instanceof-tests/src/Main.java b/test/494-checker-instanceof-tests/src/Main.java new file mode 100644 index 0000000000..bff9c72ded --- /dev/null +++ b/test/494-checker-instanceof-tests/src/Main.java @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static boolean $inline$classTypeTest(Object o) { + return o instanceof SubMain; + } + + public static boolean $inline$interfaceTypeTest(Object o) { + return o instanceof Itf; + } + + public static SubMain subMain; + public static Main mainField; + public static Unrelated unrelatedField; + public static FinalUnrelated finalUnrelatedField; + + /// CHECK-START: boolean Main.classTypeTestNull() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestNull() { + return $inline$classTypeTest(null); + } + + /// CHECK-START: boolean Main.classTypeTestExactMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestExactMain() { + return $inline$classTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.classTypeTestExactSubMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 1 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestExactSubMain() { + return $inline$classTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.classTypeTestSubMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> NotEqual + /// CHECK-DAG: Return [<<Value>>] + public static boolean classTypeTestSubMainOrNull() { + return $inline$classTypeTest(subMain); + } + + /// CHECK-START: boolean Main.classTypeTestMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean classTypeTestMainOrNull() { + return $inline$classTypeTest(mainField); + } + + /// CHECK-START: boolean Main.classTypeTestUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestUnrelated() { + return $inline$classTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.classTypeTestFinalUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestFinalUnrelated() { + return $inline$classTypeTest(finalUnrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestNull() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestNull() { + return $inline$interfaceTypeTest(null); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestExactMain() { + return $inline$interfaceTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactSubMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 1 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestExactSubMain() { + return $inline$interfaceTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestSubMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> NotEqual + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestSubMainOrNull() { + return $inline$interfaceTypeTest(subMain); + } + + /// CHECK-START: boolean Main.interfaceTypeTestMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestMainOrNull() { + return $inline$interfaceTypeTest(mainField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestUnrelated() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestUnrelated() { + // This method is the main difference between doing an instanceof on an interface + // or a class. We have to keep the instanceof in case a subclass of Unrelated + // implements the interface. + return $inline$interfaceTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestFinalUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestFinalUnrelated() { + return $inline$interfaceTypeTest(finalUnrelatedField); + } + + public static void expect(boolean expected, boolean actual) { + if (expected != actual) { + throw new Error("Unexpected result"); + } + } + + public static void main(String[] args) { + expect(false, classTypeTestNull()); + expect(false, classTypeTestExactMain()); + expect(true, classTypeTestExactSubMain()); + + subMain = null; + expect(false, classTypeTestSubMainOrNull()); + subMain = new SubMain(); + expect(true, classTypeTestSubMainOrNull()); + + mainField = null; + expect(false, classTypeTestMainOrNull()); + mainField = new Main(); + expect(false, classTypeTestMainOrNull()); + mainField = new SubMain(); + expect(true, classTypeTestMainOrNull()); + + unrelatedField = null; + expect(false, classTypeTestUnrelated()); + unrelatedField = new Unrelated(); + expect(false, classTypeTestUnrelated()); + + finalUnrelatedField = null; + expect(false, classTypeTestFinalUnrelated()); + finalUnrelatedField = new FinalUnrelated(); + expect(false, classTypeTestFinalUnrelated()); + + expect(false, interfaceTypeTestNull()); + expect(false, interfaceTypeTestExactMain()); + expect(true, interfaceTypeTestExactSubMain()); + + subMain = null; + expect(false, interfaceTypeTestSubMainOrNull()); + subMain = new SubMain(); + expect(true, interfaceTypeTestSubMainOrNull()); + + mainField = null; + expect(false, interfaceTypeTestMainOrNull()); + mainField = new Main(); + expect(false, interfaceTypeTestMainOrNull()); + mainField = new SubMain(); + expect(true, interfaceTypeTestMainOrNull()); + + unrelatedField = null; + expect(false, interfaceTypeTestUnrelated()); + unrelatedField = new Unrelated(); + expect(false, interfaceTypeTestUnrelated()); + + finalUnrelatedField = null; + expect(false, interfaceTypeTestFinalUnrelated()); + finalUnrelatedField = new FinalUnrelated(); + expect(false, interfaceTypeTestFinalUnrelated()); + } +} + +interface Itf { +} + +class SubMain extends Main implements Itf { +} + +class Unrelated { +} + +final class FinalUnrelated { +} diff --git a/test/495-checker-checkcast-tests/expected.txt b/test/495-checker-checkcast-tests/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/495-checker-checkcast-tests/expected.txt diff --git a/test/495-checker-checkcast-tests/info.txt b/test/495-checker-checkcast-tests/info.txt new file mode 100644 index 0000000000..4517b22c69 --- /dev/null +++ b/test/495-checker-checkcast-tests/info.txt @@ -0,0 +1 @@ +Checker tests for optimizations on checkcast. diff --git a/test/495-checker-checkcast-tests/src/Main.java b/test/495-checker-checkcast-tests/src/Main.java new file mode 100644 index 0000000000..aa6d5a75f7 --- /dev/null +++ b/test/495-checker-checkcast-tests/src/Main.java @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static boolean $inline$classTypeTest(Object o) { + return ((SubMain)o) == o; + } + + public static boolean $inline$interfaceTypeTest(Object o) { + return ((Itf)o) == o; + } + + public static SubMain subMain; + public static Main mainField; + public static Unrelated unrelatedField; + public static FinalUnrelated finalUnrelatedField; + + /// CHECK-START: boolean Main.classTypeTestNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestNull() { + return $inline$classTypeTest(null); + } + + /// CHECK-START: boolean Main.classTypeTestExactMain() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestExactMain() { + return $inline$classTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.classTypeTestExactSubMain() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestExactSubMain() { + return $inline$classTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.classTypeTestSubMainOrNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestSubMainOrNull() { + return $inline$classTypeTest(subMain); + } + + /// CHECK-START: boolean Main.classTypeTestMainOrNull() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestMainOrNull() { + return $inline$classTypeTest(mainField); + } + + /// CHECK-START: boolean Main.classTypeTestUnrelated() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestUnrelated() { + return $inline$classTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.classTypeTestFinalUnrelated() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestFinalUnrelated() { + return $inline$classTypeTest(finalUnrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestNull() { + return $inline$interfaceTypeTest(null); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactMain() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestExactMain() { + return $inline$interfaceTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactSubMain() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestExactSubMain() { + return $inline$interfaceTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestSubMainOrNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestSubMainOrNull() { + return $inline$interfaceTypeTest(subMain); + } + + /// CHECK-START: boolean Main.interfaceTypeTestMainOrNull() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestMainOrNull() { + return $inline$interfaceTypeTest(mainField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestUnrelated() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestUnrelated() { + return $inline$interfaceTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestFinalUnrelated() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestFinalUnrelated() { + return $inline$interfaceTypeTest(finalUnrelatedField); + } + + public static void main(String[] args) { + classTypeTestNull(); + try { + classTypeTestExactMain(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + classTypeTestExactSubMain(); + + subMain = null; + classTypeTestSubMainOrNull(); + subMain = new SubMain(); + classTypeTestSubMainOrNull(); + + mainField = null; + classTypeTestMainOrNull(); + mainField = new Main(); + try { + classTypeTestMainOrNull(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + mainField = new SubMain(); + classTypeTestMainOrNull(); + + unrelatedField = null; + classTypeTestUnrelated(); + unrelatedField = new Unrelated(); + try { + classTypeTestUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + finalUnrelatedField = null; + classTypeTestFinalUnrelated(); + finalUnrelatedField = new FinalUnrelated(); + try { + classTypeTestFinalUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + interfaceTypeTestNull(); + try { + interfaceTypeTestExactMain(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + interfaceTypeTestExactSubMain(); + + subMain = null; + interfaceTypeTestSubMainOrNull(); + subMain = new SubMain(); + interfaceTypeTestSubMainOrNull(); + + mainField = null; + interfaceTypeTestMainOrNull(); + mainField = new Main(); + try { + interfaceTypeTestMainOrNull(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + mainField = new SubMain(); + interfaceTypeTestMainOrNull(); + + unrelatedField = null; + interfaceTypeTestUnrelated(); + unrelatedField = new Unrelated(); + try { + interfaceTypeTestUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + finalUnrelatedField = null; + interfaceTypeTestFinalUnrelated(); + finalUnrelatedField = new FinalUnrelated(); + try { + interfaceTypeTestFinalUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + } +} + +interface Itf { +} + +class SubMain extends Main implements Itf { +} + +class Unrelated { +} + +final class FinalUnrelated { +} diff --git a/test/496-checker-inlining-and-class-loader/expected.txt b/test/496-checker-inlining-and-class-loader/expected.txt new file mode 100644 index 0000000000..c6fcb51ecf --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/expected.txt @@ -0,0 +1,4 @@ +Request for LoadedByMyClassLoader +Request for Main +In between the two calls. +In $noinline$bar diff --git a/test/496-checker-inlining-and-class-loader/info.txt b/test/496-checker-inlining-and-class-loader/info.txt new file mode 100644 index 0000000000..aa4b256207 --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/info.txt @@ -0,0 +1,2 @@ +Regression test to ensure compilers preserve JLS +semantics of class loading. diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java new file mode 100644 index 0000000000..f6d0b41a58 --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/src/Main.java @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.List; + +class MyClassLoader extends ClassLoader { + MyClassLoader() throws Exception { + super(MyClassLoader.class.getClassLoader()); + + // Some magic to get access to the pathList field of BaseDexClassLoader. + ClassLoader loader = getClass().getClassLoader(); + Class<?> baseDexClassLoader = loader.getClass().getSuperclass(); + Field f = baseDexClassLoader.getDeclaredField("pathList"); + f.setAccessible(true); + Object pathList = f.get(loader); + + // Some magic to get access to the dexField field of pathList. + f = pathList.getClass().getDeclaredField("dexElements"); + f.setAccessible(true); + dexElements = (Object[]) f.get(pathList); + dexFileField = dexElements[0].getClass().getDeclaredField("dexFile"); + dexFileField.setAccessible(true); + } + + Object[] dexElements; + Field dexFileField; + + protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException { + System.out.println("Request for " + className); + + // We're only going to handle LoadedByMyClassLoader. + if (className != "LoadedByMyClassLoader") { + return getParent().loadClass(className); + } + + // Mimic what DexPathList.findClass is doing. + try { + for (Object element : dexElements) { + Object dex = dexFileField.get(element); + Method method = dex.getClass().getDeclaredMethod( + "loadClassBinaryName", String.class, ClassLoader.class, List.class); + + if (dex != null) { + Class clazz = (Class)method.invoke(dex, className, this, null); + if (clazz != null) { + return clazz; + } + } + } + } catch (Exception e) { /* Ignore */ } + return null; + } +} + +class LoadedByMyClassLoader { + /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (before) + /// CHECK: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: InvokeStaticOrDirect + /// CHECK-NEXT: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + + /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (after) + /// CHECK: LoadClass + /// CHECK-NEXT: ClinitCheck + /* We inlined Main.$inline$bar */ + /// CHECK-NEXT: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + + /// CHECK-START: void LoadedByMyClassLoader.bar() register (before) + /* Load and initialize Main */ + /// CHECK: LoadClass gen_clinit_check:true + /* Load and initialize System */ + /// CHECK-NEXT: LoadClass gen_clinit_check:true + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + public static void bar() { + Main.$inline$bar(); + System.out.println("In between the two calls."); + Main.$noinline$bar(); + } +} + +class Main { + public static void main(String[] args) throws Exception { + MyClassLoader o = new MyClassLoader(); + Class foo = o.loadClass("LoadedByMyClassLoader"); + Method m = foo.getDeclaredMethod("bar"); + m.invoke(null); + } + + public static void $inline$bar() { + } + + public static void $noinline$bar() { + try { + System.out.println("In $noinline$bar"); + } catch (Throwable t) { /* Ignore */ } + } +} diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc new file mode 100644 index 0000000000..f9b33a2874 --- /dev/null +++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "art_method-inl.h" +#include "jni.h" +#include "scoped_thread_state_change.h" +#include "stack.h" +#include "thread.h" + +namespace art { + +namespace { + +extern "C" JNIEXPORT jobject JNICALL Java_Main_cloneResolvedMethods(JNIEnv*, jclass, jclass cls) { + ScopedObjectAccess soa(Thread::Current()); + return soa.Vm()->AddGlobalRef( + soa.Self(), + soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetResolvedMethods()->Clone(soa.Self())); +} + +extern "C" JNIEXPORT void JNICALL Java_Main_restoreResolvedMethods( + JNIEnv*, jclass, jclass cls, jobject old_cache) { + ScopedObjectAccess soa(Thread::Current()); + mirror::PointerArray* now = soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetResolvedMethods(); + mirror::PointerArray* old = soa.Decode<mirror::PointerArray*>(old_cache); + for (size_t i = 0, e = old->GetLength(); i < e; ++i) { + now->SetElementPtrSize(i, old->GetElementPtrSize<void*>(i, sizeof(void*)), sizeof(void*)); + } +} + +} // namespace + +} // namespace art diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt new file mode 100644 index 0000000000..3e1d85e309 --- /dev/null +++ b/test/497-inlining-and-class-loader/expected.txt @@ -0,0 +1,7 @@ +java.lang.Exception + at Main.$noinline$bar(Main.java:127) + at Level2.$inline$bar(Level1.java:25) + at Level1.$inline$bar(Level1.java:19) + at LoadedByMyClassLoader.bar(Main.java:82) + at java.lang.reflect.Method.invoke(Native Method) + at Main.main(Main.java:101) diff --git a/test/497-inlining-and-class-loader/info.txt b/test/497-inlining-and-class-loader/info.txt new file mode 100644 index 0000000000..e7f02aaf34 --- /dev/null +++ b/test/497-inlining-and-class-loader/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing to ensure it is using +the correct class loader when walking inlined frames. diff --git a/test/497-inlining-and-class-loader/src/Level1.java b/test/497-inlining-and-class-loader/src/Level1.java new file mode 100644 index 0000000000..977af8321e --- /dev/null +++ b/test/497-inlining-and-class-loader/src/Level1.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Level1 { + public static void $inline$bar() { + Level2.$inline$bar(); + } +} + +class Level2 { + public static void $inline$bar() { + Main.$noinline$bar(); + } +} diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java new file mode 100644 index 0000000000..0f7eb599cb --- /dev/null +++ b/test/497-inlining-and-class-loader/src/Main.java @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.List; + +class MyClassLoader extends ClassLoader { + MyClassLoader() throws Exception { + super(MyClassLoader.class.getClassLoader()); + + // Some magic to get access to the pathList field of BaseDexClassLoader. + ClassLoader loader = getClass().getClassLoader(); + Class<?> baseDexClassLoader = loader.getClass().getSuperclass(); + Field f = baseDexClassLoader.getDeclaredField("pathList"); + f.setAccessible(true); + Object pathList = f.get(loader); + + // Some magic to get access to the dexField field of pathList. + f = pathList.getClass().getDeclaredField("dexElements"); + f.setAccessible(true); + dexElements = (Object[]) f.get(pathList); + dexFileField = dexElements[0].getClass().getDeclaredField("dexFile"); + dexFileField.setAccessible(true); + } + + Object[] dexElements; + Field dexFileField; + + static ClassLoader level1ClassLoader; + + protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException { + if (this != level1ClassLoader) { + if (className.equals("Level1")) { + return level1ClassLoader.loadClass(className); + } else if (className.equals("Level2")) { + throw new ClassNotFoundException("None of my methods require Level2!"); + } else if (!className.equals("LoadedByMyClassLoader")) { + // We're only going to handle LoadedByMyClassLoader. + return getParent().loadClass(className); + } + } else { + if (className != "Level1" && className != "Level2") { + return getParent().loadClass(className); + } + } + + // Mimic what DexPathList.findClass is doing. + try { + for (Object element : dexElements) { + Object dex = dexFileField.get(element); + Method method = dex.getClass().getDeclaredMethod( + "loadClassBinaryName", String.class, ClassLoader.class, List.class); + + if (dex != null) { + Class clazz = (Class)method.invoke(dex, className, this, null); + if (clazz != null) { + return clazz; + } + } + } + } catch (Exception e) { /* Ignore */ } + return null; + } +} + +class LoadedByMyClassLoader { + public static void bar() { + Level1.$inline$bar(); + } +} + +class Main { + static { + System.loadLibrary("arttest"); + } + + public static void main(String[] args) throws Exception { + // Clone resolved methods, to restore the original version just + // before we walk the stack in $noinline$bar. + savedResolvedMethods = cloneResolvedMethods(Main.class); + + MyClassLoader o = new MyClassLoader(); + MyClassLoader.level1ClassLoader = new MyClassLoader(); + Class foo = o.loadClass("LoadedByMyClassLoader"); + Method m = foo.getDeclaredMethod("bar"); + try { + m.invoke(null); + } catch (Error e) { /* Ignore */ } + } + + public static void $inline$bar() { + } + + public static void $noinline$bar() { + try { + // Be evil and clear all dex cache entries. + Field f = Class.class.getDeclaredField("dexCache"); + f.setAccessible(true); + Object dexCache = f.get(Main.class); + f = dexCache.getClass().getDeclaredField("resolvedTypes"); + f.setAccessible(true); + Object[] array = (Object[]) f.get(dexCache); + for (int i = 0; i < array.length; i++) { + array[i] = null; + } + restoreResolvedMethods(Main.class, savedResolvedMethods); + } catch (Throwable t) { /* Ignore */ } + + // This will walk the stack, trying to resolve methods in it. + // Because we cleared dex cache entries, we will have to find + // classes again, which require to use the correct class loader + // in the presence of inlining. + new Exception().printStackTrace(); + } + static Object savedResolvedMethods; + + static native Object cloneResolvedMethods(Class<?> cls); + static native void restoreResolvedMethods(Class<?> cls, Object saved); +} diff --git a/test/498-type-propagation/expected.txt b/test/498-type-propagation/expected.txt new file mode 100644 index 0000000000..ccaf6f8f0f --- /dev/null +++ b/test/498-type-propagation/expected.txt @@ -0,0 +1 @@ +Enter diff --git a/test/498-type-propagation/info.txt b/test/498-type-propagation/info.txt new file mode 100644 index 0000000000..b895e91f9d --- /dev/null +++ b/test/498-type-propagation/info.txt @@ -0,0 +1,2 @@ +Regression test for the SSA building of the optimizing +compiler. See comment in smali file. diff --git a/test/498-type-propagation/smali/TypePropagation.smali b/test/498-type-propagation/smali/TypePropagation.smali new file mode 100644 index 0000000000..088ca89985 --- /dev/null +++ b/test/498-type-propagation/smali/TypePropagation.smali @@ -0,0 +1,30 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTypePropagation; + +.super Ljava/lang/Object; + +.method public static method([I)V + .registers 2 + const/4 v0, 0 + # When building the SSA graph, we will create a phi for v0, which will be of type + # integer. Only when we get rid of that phi in the redundant phi elimination will + # we realize it's just null. + :start + if-eq v1, v0, :end + if-eq v1, v0, :start + :end + return-void +.end method diff --git a/test/498-type-propagation/src/Main.java b/test/498-type-propagation/src/Main.java new file mode 100644 index 0000000000..7a14172bf5 --- /dev/null +++ b/test/498-type-propagation/src/Main.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + public static void main(String[] args) throws Exception { + // Workaround for b/18051191. + System.out.println("Enter"); + Class<?> c = Class.forName("TypePropagation"); + Method m = c.getMethod("method", int[].class); + int[] array = new int[7]; + Object[] arguments = { array }; + m.invoke(null, arguments); + } +} diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index a6b216bf3a..85656374c5 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -16,4 +16,5 @@ MoveExc MoveExceptionOnEntry EmptySparseSwitch b/20224106 +b/17410612 Done! diff --git a/test/800-smali/smali/b_17410612.smali b/test/800-smali/smali/b_17410612.smali new file mode 100644 index 0000000000..17718cbf60 --- /dev/null +++ b/test/800-smali/smali/b_17410612.smali @@ -0,0 +1,14 @@ +.class public LB17410612; + +# Test that an invoke with a long parameter has the long parameter in +# a pair. This should fail in the verifier and not an abort in the compiler. + +.super Ljava/lang/Object; + +.method public static run()V + .registers 4 + const-wide v0, 0 # Make (v0, v1) a long + const-wide v2, 0 # Make (v2, v3) a long + invoke-static {v0, v3}, Ljava/lang/Long;->valueOf(J)Ljava/lang/Long; + return-void +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index 3e88364089..33df06d87a 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -81,6 +81,8 @@ public class Main { null)); testCases.add(new TestCase("b/20224106", "B20224106", "run", null, new VerifyError(), 0)); + testCases.add(new TestCase("b/17410612", "B17410612", "run", null, new VerifyError(), + 0)); } public void runTests() { diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 57d06c49cd..fcb9f8a779 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -34,7 +34,8 @@ LIBARTTEST_COMMON_SRC_FILES := \ 455-set-vreg/set_vreg_jni.cc \ 457-regs/regs_jni.cc \ 461-get-reference-vreg/get_reference_vreg_jni.cc \ - 466-get-live-vreg/get_live_vreg_jni.cc + 466-get-live-vreg/get_live_vreg_jni.cc \ + 497-inlining-and-class-loader/clear_dex_cache.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ifdef TARGET_2ND_ARCH @@ -74,6 +75,7 @@ define build-libarttest else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS) + LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread LOCAL_IS_HOST_MODULE := true LOCAL_MULTILIB := both diff --git a/test/Android.libnativebridgetest.mk b/test/Android.libnativebridgetest.mk index 5a5f72584f..e8cc7e45ce 100644 --- a/test/Android.libnativebridgetest.mk +++ b/test/Android.libnativebridgetest.mk @@ -60,6 +60,7 @@ define build-libnativebridgetest else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS := $(ART_HOST_CFLAGS) $(ART_HOST_DEBUG_CFLAGS) + LOCAL_ASFLAGS := $(ART_HOST_ASFLAGS) LOCAL_SHARED_LIBRARIES := libcutils LOCAL_LDLIBS := $(ART_HOST_LDLIBS) -ldl -lpthread ifeq ($(HOST_OS),linux) diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index fa13fe5c64..469df1f2b6 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -229,16 +229,10 @@ endif TEST_ART_BROKEN_NO_RELOCATE_TESTS := -# Tests that are broken with GC stress. -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := - -ifneq (,$(filter gcstress,$(GC_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DBEUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := +# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135 +ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ + $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES)) # 115-native-bridge setup is complicated. Need to implement it correctly for the target. ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \ @@ -259,6 +253,11 @@ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES)) +# 138-duplicate-classes-check. Turned off temporarily, b/21333911. +ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ + $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ + $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES)) + # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat. # Therefore we shouldn't run them in situations where we actually don't have these since they # explicitly test for them. These all also assume we have an image. @@ -339,6 +338,7 @@ TEST_ART_BROKEN_NDEBUG_TESTS := \ 457-regs \ 461-get-reference-vreg \ 466-get-live-vreg \ + 497-inlining-and-class-loader \ ifneq (,$(filter ndebug,$(RUN_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \ @@ -377,7 +377,8 @@ TEST_ART_BROKEN_JIT_RUN_TESTS := # Known broken tests for the default compiler (Quick). TEST_ART_BROKEN_DEFAULT_RUN_TESTS := \ - 457-regs + 457-regs \ + 496-checker-inlining-and-class-loader ifneq (,$(filter default,$(COMPILER_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ @@ -468,11 +469,6 @@ endif TEST_ART_BROKEN_HEAP_POISONING_RUN_TESTS := -# Test 137-cfi works in 32-bit only until we enable 64-bit ELF files. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),137-cfi,64) - # Clear variables ahead of appending to them when defining tests. $(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=)) $(foreach target, $(TARGET_TYPES), \ diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 240ed41ff2..09841bfcec 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -296,6 +296,10 @@ if [ "$RELOCATE" = "y" ]; then else FLAGS="$FLAGS -Xnorelocate" COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate" + if [ "$HOST" = "y" ]; then + # Increase ulimit to 64MB in case we are running hprof test. + ulimit -S 64000 || exit 1 + fi fi if [ "$HOST" = "n" ]; then diff --git a/test/run-test b/test/run-test index ed3309923b..ed033217b8 100755 --- a/test/run-test +++ b/test/run-test @@ -96,6 +96,7 @@ basic_verify="false" gc_verify="false" gc_stress="false" always_clean="no" +never_clean="no" have_dex2oat="yes" have_patchoat="yes" have_image="yes" @@ -270,6 +271,9 @@ while true; do elif [ "x$1" = "x--always-clean" ]; then always_clean="yes" shift + elif [ "x$1" = "x--never-clean" ]; then + never_clean="yes" + shift elif [ "x$1" = "x--dex2oat-swap" ]; then run_args="${run_args} --dex2oat-swap" shift @@ -472,6 +476,7 @@ if [ "$usage" = "yes" ]; then echo " --gcstress Run with gc stress testing" echo " --gcverify Run with gc verification" echo " --always-clean Delete the test files even if the test fails." + echo " --never-clean Keep the test files even if the test succeeds." echo " --android-root [path] The path on target for the android root. (/system by default)." echo " --dex2oat-swap Use a dex2oat swap file." ) 1>&2 @@ -668,7 +673,7 @@ fi ) 1>&2 # Clean up test files. -if [ "$always_clean" = "yes" -o "$good" = "yes" ]; then +if [ "$always_clean" = "yes" -o "$good" = "yes" ] && [ "$never_clean" = "no" ]; then cd "$oldwd" rm -rf "$tmp_dir" if [ "$target_mode" = "yes" -a "$build_exit" = "0" ]; then diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index 77e6b1ad14..62fd67bfd7 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -60,7 +60,7 @@ while true; do done if [[ $mode == "host" ]]; then - make_command="make $j_arg build-art-host-tests $common_targets" + make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so" echo "Executing $make_command" $make_command elif [[ $mode == "target" ]]; then @@ -70,7 +70,7 @@ elif [[ $mode == "target" ]]; then # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS. # Also, we build extra tools that will be used by tests, so that # they are compiled with our own linker. - make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto linker toybox toolbox sh" + make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh" echo "Executing env $env $make_command" env $env $make_command fi diff --git a/tools/checker/match/line.py b/tools/checker/match/line.py index 711d814b10..ce11e2a528 100644 --- a/tools/checker/match/line.py +++ b/tools/checker/match/line.py @@ -41,7 +41,7 @@ def matchWords(checkerWord, stringWord, variables, pos): if expression.name in variables: pattern = re.escape(variables[expression.name]) else: - Logger.testFailed("Multiple definitions of variable \"{}\"".format(expression.name), + Logger.testFailed("Missing definition of variable \"{}\"".format(expression.name), pos.fileName, pos.lineNo) else: pattern = expression.pattern diff --git a/tools/generate-operator-out.py b/tools/generate-operator-out.py index 2b57222049..c74508d9cd 100755 --- a/tools/generate-operator-out.py +++ b/tools/generate-operator-out.py @@ -154,10 +154,12 @@ def ProcessFile(filename): sys.stderr.write('%s\n' % (rest)) Confused(filename, line_number, raw_line) - if len(enclosing_classes) > 0: - if is_enum_class: - enum_value = enum_name + '::' + enum_value - else: + # If the enum is scoped, we must prefix enum value with enum name (which is already prefixed + # by enclosing classes). + if is_enum_class: + enum_value = enum_name + '::' + enum_value + else: + if len(enclosing_classes) > 0: enum_value = '::'.join(enclosing_classes) + '::' + enum_value _ENUMS[enum_name].append((enum_value, enum_text)) diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh index 344d2dedb3..4e76eb4354 100755 --- a/tools/run-libcore-tests.sh +++ b/tools/run-libcore-tests.sh @@ -33,7 +33,8 @@ if [ ! -f $test_jar ]; then fi # Packages that currently work correctly with the expectation files. -working_packages=("libcore.icu" +working_packages=("dalvik.system" + "libcore.icu" "libcore.io" "libcore.java.lang" "libcore.java.math" |