diff options
190 files changed, 7213 insertions, 2367 deletions
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk index ace6a73654..29b3573598 100644 --- a/build/Android.common_build.mk +++ b/build/Android.common_build.mk @@ -256,7 +256,9 @@ ifeq ($(HOST_OS),linux) ifndef SANITIZE_HOST art_host_non_debug_cflags += -Wframe-larger-than=2700 endif - art_target_non_debug_cflags += -Wframe-larger-than=1728 + ifndef SANITIZE_TARGET + art_target_non_debug_cflags += -Wframe-larger-than=1728 + endif endif endif endif diff --git a/build/Android.oat.mk b/build/Android.oat.mk index 710b130282..728469c2c4 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -113,7 +113,7 @@ $$(core_image_name): $$(HOST_CORE_DEX_LOCATIONS) $$(core_dex2oat_dependency) --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \ --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \ --instruction-set-features=$$($(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES) \ - --host --android-root=$$(HOST_OUT) --include-patch-information \ + --host --android-root=$$(HOST_OUT) --include-patch-information --generate-debug-info \ $$(PRIVATE_CORE_COMPILE_OPTIONS) $$(core_oat_name): $$(core_image_name) @@ -232,7 +232,7 @@ $$(core_image_name): $$(TARGET_CORE_DEX_FILES) $$(core_dex2oat_dependency) --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \ --instruction-set-variant=$$($(3)DEX2OAT_TARGET_CPU_VARIANT) \ --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \ - --android-root=$$(PRODUCT_OUT)/system --include-patch-information \ + --android-root=$$(PRODUCT_OUT)/system --include-patch-information --generate-debug-info \ $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1) $$(core_oat_name): $$(core_image_name) diff --git a/compiler/dex/gvn_dead_code_elimination.cc b/compiler/dex/gvn_dead_code_elimination.cc index 6d8a7dab2b..b1f5d870d4 100644 --- a/compiler/dex/gvn_dead_code_elimination.cc +++ b/compiler/dex/gvn_dead_code_elimination.cc @@ -1003,7 +1003,6 @@ bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() { vreg_chains_.GetMIRData(kill_heads_[v_reg])->PrevChange(v_reg)); } } - unused_vregs_->Union(vregs_to_kill_); for (auto it = changes_to_kill_.rbegin(), end = changes_to_kill_.rend(); it != end; ++it) { MIRData* data = vreg_chains_.GetMIRData(*it); DCHECK(!data->must_keep); @@ -1012,6 +1011,10 @@ bool GvnDeadCodeElimination::BackwardPassTryToKillLastMIR() { KillMIR(data); } + // Each dependent register not in vregs_to_kill_ is either already marked unused or + // it's one word of a wide register where the other word has been overwritten. + unused_vregs_->UnionIfNotIn(dependent_vregs_, vregs_to_kill_); + vreg_chains_.RemoveTrailingNops(); return true; } diff --git a/compiler/dex/gvn_dead_code_elimination_test.cc b/compiler/dex/gvn_dead_code_elimination_test.cc index de591d0edb..461c844a60 100644 --- a/compiler/dex/gvn_dead_code_elimination_test.cc +++ b/compiler/dex/gvn_dead_code_elimination_test.cc @@ -137,6 +137,8 @@ class GvnDeadCodeEliminationTest : public testing::Test { { bb, opcode, 0u, 0u, 1, { src1 }, 1, { result } } #define DEF_BINOP(bb, opcode, result, src1, src2) \ { bb, opcode, 0u, 0u, 2, { src1, src2 }, 1, { result } } +#define DEF_BINOP_WIDE(bb, opcode, result, src1, src2) \ + { bb, opcode, 0u, 0u, 4, { src1, src1 + 1, src2, src2 + 1 }, 2, { result, result + 1 } } void DoPrepareIFields(const IFieldDef* defs, size_t count) { cu_.mir_graph->ifield_lowering_infos_.clear(); @@ -1936,7 +1938,7 @@ TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) { DEF_CONST(3, Instruction::CONST, 0u, 1000u), DEF_MOVE(3, Instruction::MOVE, 1u, 0u), DEF_CONST(3, Instruction::CONST, 2u, 2000u), - { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u} }, + { 3, Instruction::INT_TO_LONG, 0, 0u, 1, { 2u }, 2, { 3u, 4u } }, DEF_MOVE_WIDE(3, Instruction::MOVE_WIDE, 5u, 3u), DEF_CONST(3, Instruction::CONST, 7u, 3000u), DEF_CONST(3, Instruction::CONST, 8u, 4000u), @@ -1983,4 +1985,85 @@ TEST_F(GvnDeadCodeEliminationTestSimple, MixedOverlaps1) { EXPECT_EQ(0u, int_to_long->dalvikInsn.vB); } +TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs1) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_CONST(3, Instruction::CONST, 1u, 2000u), + DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u), + DEF_CONST(3, Instruction::CONST, 3u, 1000u), // NOT killed (b/21702651). + DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u), // Killed (RecordPass) + DEF_CONST(3, Instruction::CONST, 5u, 2000u), // Killed with 9u (BackwardPass) + DEF_BINOP(3, Instruction::ADD_INT, 6u, 5u, 0u), // Killed (RecordPass) + DEF_CONST(3, Instruction::CONST, 7u, 4000u), + DEF_MOVE(3, Instruction::MOVE, 8u, 0u), // Killed with 6u (BackwardPass) + }; + + static const int32_t sreg_to_vreg_map[] = { 1, 2, 3, 0, 3, 0, 3, 4, 0 }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 7 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[2], value_names_[4]); + EXPECT_EQ(value_names_[1], value_names_[5]); + EXPECT_EQ(value_names_[2], value_names_[6]); + EXPECT_EQ(value_names_[0], value_names_[8]); + + static const bool eliminated[] = { + false, false, false, false, true, true, true, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + +TEST_F(GvnDeadCodeEliminationTestSimple, UnusedRegs2) { + static const MIRDef mirs[] = { + DEF_CONST(3, Instruction::CONST, 0u, 1000u), + DEF_CONST(3, Instruction::CONST, 1u, 2000u), + DEF_BINOP(3, Instruction::ADD_INT, 2u, 1u, 0u), + DEF_CONST(3, Instruction::CONST, 3u, 1000u), // Killed (BackwardPass; b/21702651) + DEF_BINOP(3, Instruction::ADD_INT, 4u, 1u, 3u), // Killed (RecordPass) + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 5u, 4000u), + { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 5u, 6u }, 1, { 7u } }, + DEF_BINOP(3, Instruction::ADD_INT, 8u, 7u, 0u), + DEF_CONST_WIDE(3, Instruction::CONST_WIDE, 9u, 4000u), // Killed with 12u (BackwardPass) + DEF_CONST(3, Instruction::CONST, 11u, 6000u), + { 3, Instruction::LONG_TO_INT, 0, 0u, 2, { 9u, 10u }, 1, { 12u } }, // Killed with 9u (BP) + }; + + static const int32_t sreg_to_vreg_map[] = { + 2, 3, 4, 1, 4, 5, 6 /* high word */, 0, 7, 0, 1 /* high word */, 8, 0 + }; + PrepareSRegToVRegMap(sreg_to_vreg_map); + + PrepareMIRs(mirs); + static const int32_t wide_sregs[] = { 5, 9 }; + MarkAsWideSRegs(wide_sregs); + PerformGVN_DCE(); + + ASSERT_EQ(arraysize(mirs), value_names_.size()); + static const size_t diff_indexes[] = { 0, 1, 2, 5, 6, 7, 9 }; + ExpectValueNamesNE(diff_indexes); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[2], value_names_[4]); + EXPECT_EQ(value_names_[5], value_names_[8]); + EXPECT_EQ(value_names_[6], value_names_[10]); + + static const bool eliminated[] = { + false, false, false, true, true, false, false, false, true, false, true, + }; + static_assert(arraysize(eliminated) == arraysize(mirs), "array size mismatch"); + for (size_t i = 0; i != arraysize(eliminated); ++i) { + bool actually_eliminated = (static_cast<int>(mirs_[i].dalvikInsn.opcode) == kMirOpNop); + EXPECT_EQ(eliminated[i], actually_eliminated) << i; + } +} + } // namespace art diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index cc1ba35b96..38342420ac 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -398,12 +398,13 @@ bool MIRGraph::IsBadMonitorExitCatch(NarrowDexOffset monitor_exit_offset, DCHECK(monitor_exit->Opcode() == Instruction::MONITOR_EXIT); int monitor_reg = monitor_exit->VRegA_11x(); const Instruction* check_insn = Instruction::At(current_code_item_->insns_ + catch_offset); - DCHECK(check_insn->Opcode() == Instruction::MOVE_EXCEPTION); - if (check_insn->VRegA_11x() == monitor_reg) { - // Unexpected move-exception to the same register. Probably not the pattern we're looking for. - return false; + if (check_insn->Opcode() == Instruction::MOVE_EXCEPTION) { + if (check_insn->VRegA_11x() == monitor_reg) { + // Unexpected move-exception to the same register. Probably not the pattern we're looking for. + return false; + } + check_insn = check_insn->Next(); } - check_insn = check_insn->Next(); while (true) { int dest = -1; bool wide = false; diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index b25e967609..e0c56fcc82 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -233,11 +233,32 @@ inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer return referrer_class == fields_class; } +inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) { + // Being loaded is a pre-requisite for being initialized but let's do the cheap check first. + // + // NOTE: When AOT compiling an app, we eagerly initialize app classes (and potentially their + // super classes in the boot image) but only those that have a trivial initialization, i.e. + // without <clinit>() or static values in the dex file for that class or any of its super + // classes. So while we could see the klass as initialized during AOT compilation and have + // it only loaded at runtime, the needed initialization would have to be trivial and + // unobservable from Java, so we may as well treat it as initialized. + if (!klass->IsInitialized()) { + return false; + } + return CanAssumeClassIsLoaded(klass); +} + +inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, + mirror::Class* klass) { + return (referrer_class != nullptr && referrer_class->IsSubClass(klass)) || + CanAssumeClassIsInitialized(klass); +} + inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class, ArtField* resolved_field) { DCHECK(resolved_field->IsStatic()); mirror::Class* fields_class = resolved_field->GetDeclaringClass(); - return fields_class == referrer_class || fields_class->IsInitialized(); + return CanReferrerAssumeClassIsInitialized(referrer_class, fields_class); } inline ArtMethod* CompilerDriver::ResolveMethod( @@ -394,7 +415,7 @@ inline bool CompilerDriver::IsMethodsClassInitialized(mirror::Class* referrer_cl return true; } mirror::Class* methods_class = resolved_method->GetDeclaringClass(); - return methods_class == referrer_class || methods_class->IsInitialized(); + return CanReferrerAssumeClassIsInitialized(referrer_class, methods_class); } } // namespace art diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 22fcf87524..84b6a52bda 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -659,7 +659,8 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De bool CompilerDriver::IsImageClass(const char* descriptor) const { if (!IsImage()) { - return true; + // NOTE: Currently unreachable, all callers check IsImage(). + return false; } else { return image_classes_->find(descriptor) != image_classes_->end(); } @@ -992,6 +993,24 @@ void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { } } +bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { + Runtime* runtime = Runtime::Current(); + if (!runtime->IsAotCompiler()) { + DCHECK(runtime->UseJit()); + // Having the klass reference here implies that the klass is already loaded. + return true; + } + if (!IsImage()) { + // Assume loaded only if klass is in the boot image. App classes cannot be assumed + // loaded because we don't even know what class loader will be used to load them. + bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace(); + return class_in_image; + } + std::string temp; + const char* descriptor = klass->GetDescriptor(&temp); + return IsImageClass(descriptor); +} + bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { if (IsImage() && IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 68c905eb22..f737007308 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -501,6 +501,16 @@ class CompilerDriver { uint32_t field_idx) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // Can we assume that the klass is initialized? + bool CanAssumeClassIsInitialized(mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Can we assume that the klass is loaded? + bool CanAssumeClassIsLoaded(mirror::Class* klass) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. // The only external contract is that unresolved method has flags 0 and resolved non-0. enum { diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 4971f0ef10..4d423d007f 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -26,11 +26,11 @@ namespace art { namespace dwarf { -constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; - // Run the tests only on host since we need objdump. #ifndef HAVE_ANDROID_OS +constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; + TEST_F(DwarfTest, DebugFrame) { const bool is64bit = false; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 32bde8e3b4..73e121f1cd 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -110,10 +110,6 @@ bool ImageWriter::PrepareImageAddressSpace() { CheckNoDexObjects(); } - if (!AllocMemory()) { - return false; - } - if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); CheckNonImageClassesRemoved(); @@ -123,6 +119,12 @@ bool ImageWriter::PrepareImageAddressSpace() { CalculateNewObjectOffsets(); Thread::Current()->TransitionFromRunnableToSuspended(kNative); + // This needs to happen after CalculateNewObjectOffsets since it relies on intern_table_bytes_ and + // bin size sums being calculated. + if (!AllocMemory()) { + return false; + } + return true; } @@ -205,7 +207,7 @@ bool ImageWriter::Write(const std::string& image_filename, } // Write out the image bitmap at the page aligned start of the image end. - const auto& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); + const ImageSection& bitmap_section = image_header->GetImageSection(ImageHeader::kSectionImageBitmap); CHECK_ALIGNED(bitmap_section.Offset(), kPageSize); if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), bitmap_section.Size(), bitmap_section.Offset())) { @@ -222,26 +224,10 @@ bool ImageWriter::Write(const std::string& image_filename, return true; } -void ImageWriter::SetImageOffset(mirror::Object* object, - ImageWriter::BinSlot bin_slot, - size_t offset) { +void ImageWriter::SetImageOffset(mirror::Object* object, size_t offset) { DCHECK(object != nullptr); DCHECK_NE(offset, 0U); - mirror::Object* obj = reinterpret_cast<mirror::Object*>(image_->Begin() + offset); - DCHECK_ALIGNED(obj, kObjectAlignment); - static size_t max_offset = 0; - max_offset = std::max(max_offset, offset); - image_bitmap_->Set(obj); // Mark the obj as mutated, since we will end up changing it. - { - // Remember the object-inside-of-the-image's hash code so we can restore it after the copy. - auto hash_it = saved_hashes_map_.find(bin_slot); - if (hash_it != saved_hashes_map_.end()) { - std::pair<BinSlot, uint32_t> slot_hash = *hash_it; - saved_hashes_.push_back(std::make_pair(obj, slot_hash.second)); - saved_hashes_map_.erase(hash_it); - } - } // The object is already deflated from when we set the bin slot. Just overwrite the lock word. object->SetLockWord(LockWord::FromForwardingAddress(offset), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); @@ -262,7 +248,7 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot size_t new_offset = image_objects_offset_begin_ + previous_bin_sizes + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); - SetImageOffset(object, bin_slot, new_offset); + SetImageOffset(object, new_offset); DCHECK_LT(new_offset, image_end_); } @@ -302,14 +288,14 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { // No hash, don't need to save it. break; case LockWord::kHashCode: - saved_hashes_map_[bin_slot] = lw.GetHashCode(); + DCHECK(saved_hashcode_map_.find(object) == saved_hashcode_map_.end()); + saved_hashcode_map_.emplace(object, lw.GetHashCode()); break; default: LOG(FATAL) << "Unreachable."; UNREACHABLE(); } - object->SetLockWord(LockWord::FromForwardingAddress(static_cast<uint32_t>(bin_slot)), - false); + object->SetLockWord(LockWord::FromForwardingAddress(bin_slot.Uint32Value()), false); DCHECK_EQ(object->GetLockWord(false).ReadBarrierState(), 0u); DCHECK(IsImageBinSlotAssigned(object)); } @@ -487,11 +473,8 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { ++bin_slot_count_[bin]; - DCHECK_LT(GetBinSizeSum(), image_->Size()); - // Grow the image closer to the end by the object we just assigned. image_end_ += offset_delta; - DCHECK_LT(image_end_, image_->Size()); } bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const { @@ -535,10 +518,8 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const } bool ImageWriter::AllocMemory() { - auto* runtime = Runtime::Current(); - const size_t heap_size = runtime->GetHeap()->GetTotalMemory(); - // Add linear alloc usage since we need to have room for the ArtFields. - const size_t length = RoundUp(heap_size + runtime->GetLinearAlloc()->GetUsedMemory(), kPageSize); + const size_t length = RoundUp(image_objects_offset_begin_ + GetBinSizeSum() + intern_table_bytes_, + kPageSize); std::string error_msg; image_.reset(MemMap::MapAnonymous("image writer image", nullptr, length, PROT_READ | PROT_WRITE, false, false, &error_msg)); @@ -547,9 +528,10 @@ bool ImageWriter::AllocMemory() { return false; } - // Create the image bitmap. - image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create("image bitmap", image_->Begin(), - RoundUp(length, kPageSize))); + // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. + CHECK_LE(image_end_, length); + image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( + "image bitmap", image_->Begin(), RoundUp(image_end_, kPageSize))); if (image_bitmap_.get() == nullptr) { LOG(ERROR) << "Failed to allocate memory for image bitmap"; return false; @@ -569,42 +551,6 @@ bool ImageWriter::ComputeLazyFieldsForClassesVisitor(Class* c, void* /*arg*/) { return true; } -// Collect all the java.lang.String in the heap and put them in the output strings_ array. -class StringCollector { - public: - StringCollector(Handle<mirror::ObjectArray<mirror::String>> strings, size_t index) - : strings_(strings), index_(index) { - } - static void Callback(Object* obj, void* arg) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - auto* collector = reinterpret_cast<StringCollector*>(arg); - if (obj->GetClass()->IsStringClass()) { - collector->strings_->SetWithoutChecks<false>(collector->index_++, obj->AsString()); - } - } - size_t GetIndex() const { - return index_; - } - - private: - Handle<mirror::ObjectArray<mirror::String>> strings_; - size_t index_; -}; - -// Compare strings based on length, used for sorting strings by length / reverse length. -class LexicographicalStringComparator { - public: - bool operator()(const mirror::HeapReference<mirror::String>& lhs, - const mirror::HeapReference<mirror::String>& rhs) const - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - mirror::String* lhs_s = lhs.AsMirrorPtr(); - mirror::String* rhs_s = rhs.AsMirrorPtr(); - uint16_t* lhs_begin = lhs_s->GetValue(); - uint16_t* rhs_begin = rhs_s->GetValue(); - return std::lexicographical_compare(lhs_begin, lhs_begin + lhs_s->GetLength(), - rhs_begin, rhs_begin + rhs_s->GetLength()); - } -}; - void ImageWriter::ComputeEagerResolvedStringsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) { if (!obj->GetClass()->IsStringClass()) { return; @@ -769,7 +715,8 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK_EQ(obj, obj->AsString()->Intern()); return; } - mirror::String* const interned = obj->AsString()->Intern(); + mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrong( + obj->AsString()->Intern()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { // interned obj is after us, allocate its location early @@ -965,7 +912,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // know where image_roots is going to end up image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment - DCHECK_LT(image_end_, image_->Size()); image_objects_offset_begin_ = image_end_; // Prepare bin slots for dex cache arrays. PrepareDexCacheArraySlots(); @@ -997,7 +943,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - DCHECK(saved_hashes_map_.empty()); // All binslot hashes should've been put into vector by now. DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); @@ -1010,6 +955,11 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_slot_previous_sizes_[native_reloc.bin_type]; } + // Calculate how big the intern table will be after being serialized. + auto* const intern_table = Runtime::Current()->GetInternTable(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Note that image_end_ is left at end of used mirror object section. } @@ -1039,6 +989,10 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { CHECK_EQ(image_objects_offset_begin_ + bin_slot_previous_sizes_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); + // Calculate the size of the interned strings. + auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); + cur_pos = interned_strings_section->End(); // Finally bitmap section. const size_t bitmap_bytes = image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; @@ -1046,16 +1000,19 @@ void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { cur_pos = bitmap_section->End(); if (kIsDebugBuild) { size_t idx = 0; - for (auto& section : sections) { + for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; ++idx; } LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_; } + const size_t image_end = static_cast<uint32_t>(interned_strings_section->End()); + CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) << + "Oat file should be right after the image."; // Create the header. new (image_->Begin()) ImageHeader( - PointerToLowMemUInt32(image_begin_), static_cast<uint32_t>(methods_section->End()), sections, - image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), + PointerToLowMemUInt32(image_begin_), image_end, + sections, image_roots_address_, oat_file_->GetOatHeader().GetChecksum(), PointerToLowMemUInt32(oat_file_begin), PointerToLowMemUInt32(oat_data_begin_), PointerToLowMemUInt32(oat_data_end), PointerToLowMemUInt32(oat_file_end), target_ptr_size_, compile_pic_); @@ -1068,6 +1025,37 @@ ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = ImageAddress(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + } + } + + private: + ImageWriter* const image_writer_; + + mirror::Object* ImageAddress(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + const size_t offset = image_writer_->GetImageOffset(obj); + auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); + VLOG(compiler) << "Update root from " << obj << " to " << dest; + return dest; + } +}; + void ImageWriter::CopyAndFixupNativeData() { // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_reloc_) { @@ -1088,7 +1076,7 @@ void ImageWriter::CopyAndFixupNativeData() { } // Fixup the image method roots. auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - const auto& methods_section = image_header->GetMethodsSection(); + const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { auto* m = image_methods_[i]; CHECK(m != nullptr); @@ -1101,18 +1089,35 @@ void ImageWriter::CopyAndFixupNativeData() { auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); } + // Write the intern table into the image. + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = Runtime::Current()->GetInternTable(); + uint8_t* const memory_ptr = image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(memory_ptr); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(memory_ptr); + CHECK_EQ(temp_table.Size(), intern_table->Size()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); + CHECK_EQ(intern_table_bytes, intern_table_bytes_); } void ImageWriter::CopyAndFixupObjects() { gc::Heap* heap = Runtime::Current()->GetHeap(); heap->VisitObjects(CopyAndFixupObjectsCallback, this); // Fix up the object previously had hash codes. - for (const std::pair<mirror::Object*, uint32_t>& hash_pair : saved_hashes_) { + for (const auto& hash_pair : saved_hashcode_map_) { Object* obj = hash_pair.first; DCHECK_EQ(obj->GetLockWord<kVerifyNone>(false).ReadBarrierState(), 0U); obj->SetLockWord<kVerifyNone>(LockWord::FromHashCode(hash_pair.second, 0U), false); } - saved_hashes_.clear(); + saved_hashcode_map_.clear(); } void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { @@ -1155,18 +1160,22 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } void ImageWriter::CopyAndFixupObject(Object* obj) { - // see GetLocalAddress for similar computation size_t offset = GetImageOffset(obj); auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); - const uint8_t* src = reinterpret_cast<const uint8_t*>(obj); + DCHECK_LT(offset, image_end_); + const auto* src = reinterpret_cast<const uint8_t*>(obj); + + image_bitmap_->Set(dst); // Mark the obj as live. - size_t n = obj->SizeOf(); + const size_t n = obj->SizeOf(); DCHECK_LE(offset + n, image_->Size()); memcpy(dst, src, n); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor // word. - dst->SetLockWord(LockWord::Default(), false); + const auto it = saved_hashcode_map_.find(obj); + dst->SetLockWord(it != saved_hashcode_map_.end() ? + LockWord::FromHashCode(it->second, 0u) : LockWord::Default(), false); FixupObject(obj, dst); } @@ -1176,7 +1185,7 @@ class FixupVisitor { FixupVisitor(ImageWriter* image_writer, Object* copy) : image_writer_(image_writer), copy_(copy) { } - void operator()(Object* obj, MemberOffset offset, bool /*is_static*/) const + void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset); // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the @@ -1186,7 +1195,7 @@ class FixupVisitor { } // java.lang.ref.Reference visitor. - void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const + void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( @@ -1490,4 +1499,11 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } +uint8_t* ImageWriter::GetOatFileBegin() const { + DCHECK_GT(intern_table_bytes_, 0u); + return image_begin_ + RoundUp( + image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + + bin_slot_sizes_[kBinArtMethodClean] + intern_table_bytes_, kPageSize); +} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index a35d6ad9c9..9d45ce2bd4 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -54,7 +54,7 @@ class ImageWriter FINAL { quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), bin_slot_sizes_(), bin_slot_previous_sizes_(), bin_slot_count_(), - dirty_methods_(0u), clean_methods_(0u) { + intern_table_bytes_(0u), dirty_methods_(0u), clean_methods_(0u) { CHECK_NE(image_begin, 0U); std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); } @@ -84,11 +84,7 @@ class ImageWriter FINAL { image_begin_ + RoundUp(sizeof(ImageHeader), kObjectAlignment) + it->second + offset); } - uint8_t* GetOatFileBegin() const { - return image_begin_ + RoundUp( - image_end_ + bin_slot_sizes_[kBinArtField] + bin_slot_sizes_[kBinArtMethodDirty] + - bin_slot_sizes_[kBinArtMethodClean], kPageSize); - } + uint8_t* GetOatFileBegin() const; bool Write(const std::string& image_filename, const std::string& oat_filename, const std::string& oat_location) @@ -158,7 +154,7 @@ class ImageWriter FINAL { // The offset in bytes from the beginning of the bin. Aligned to object size. uint32_t GetIndex() const; // Pack into a single uint32_t, for storing into a lock word. - explicit operator uint32_t() const { return lockword_; } + uint32_t Uint32Value() const { return lockword_; } // Comparison operator for map support bool operator<(const BinSlot& other) const { return lockword_ < other.lockword_; } @@ -170,7 +166,7 @@ class ImageWriter FINAL { // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetImageOffset(mirror::Object* object, BinSlot bin_slot, size_t offset) + void SetImageOffset(mirror::Object* object, size_t offset) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsImageOffsetAssigned(mirror::Object* object) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -330,11 +326,9 @@ class ImageWriter FINAL { // The start offsets of the dex cache arrays. SafeMap<const DexFile*, size_t> dex_cache_array_starts_; - // Saved hashes (objects are inside of the image so that they don't move). - std::vector<std::pair<mirror::Object*, uint32_t>> saved_hashes_; - - // Saved hashes (objects are bin slots to inside of the image, not yet allocated an address). - std::map<BinSlot, uint32_t> saved_hashes_map_; + // Saved hash codes. We use these to restore lockwords which were temporarily used to have + // forwarding addresses as well as copying over hash codes. + std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_; // Beginning target oat address for the pointers from the output image to its oat file. const uint8_t* oat_data_begin_; @@ -360,6 +354,9 @@ class ImageWriter FINAL { size_t bin_slot_previous_sizes_[kBinSize]; // Number of bytes in previous bins. size_t bin_slot_count_[kBinSize]; // Number of objects in a bin + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_; + // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. @@ -376,8 +373,9 @@ class ImageWriter FINAL { uint64_t dirty_methods_; uint64_t clean_methods_; - friend class FixupVisitor; friend class FixupClassVisitor; + friend class FixupRootVisitor; + friend class FixupVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); }; diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 3a0d520e47..016f28ef1e 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -56,7 +56,7 @@ class JNICFITest : public CFITest { jni_asm->IncreaseFrameSize(32); jni_asm->DecreaseFrameSize(32); jni_asm->RemoveFrame(frame_size, callee_save_regs); - jni_asm->EmitSlowPaths(); + jni_asm->FinalizeCode(); std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); MemoryRegion code(&actual_asm[0], actual_asm.size()); jni_asm->FinalizeInstructions(code); diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 4d7d86cce6..85fd6962fa 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -474,7 +474,7 @@ CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver, DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size)); // 17. Finalize code generation - __ EmitSlowPaths(); + __ FinalizeCode(); size_t cs = __ CodeSize(); std::vector<uint8_t> managed_code(cs); MemoryRegion code(&managed_code[0], managed_code.size()); diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index d0104300d3..a3e889f0f6 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -82,6 +82,7 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunkCode() { arm::kLoadWord, arm::PC, arm::R0, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); assembler.bkpt(0); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index ee48789ad2..29355d6968 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -233,7 +233,7 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunkCode() { kArm64PointerSize).Int32Value()); assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); // Ensure we emit the literal pool. - assembler.EmitSlowPaths(); + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); assembler.FinalizeInstructions(code); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index b2b54965b5..97b3725da1 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -126,11 +126,14 @@ class ValueBound : public ValueObject { return instruction_ == bound.instruction_ && constant_ == bound.constant_; } - static HInstruction* FromArrayLengthToNewArrayIfPossible(HInstruction* instruction) { - // Null check on the NewArray should have been eliminated by instruction - // simplifier already. - if (instruction->IsArrayLength() && instruction->InputAt(0)->IsNewArray()) { - return instruction->InputAt(0)->AsNewArray(); + static HInstruction* FromArrayLengthToArray(HInstruction* instruction) { + DCHECK(instruction->IsArrayLength() || instruction->IsNewArray()); + if (instruction->IsArrayLength()) { + HInstruction* input = instruction->InputAt(0); + if (input->IsNullCheck()) { + input = input->AsNullCheck()->InputAt(0); + } + return input; } return instruction; } @@ -146,8 +149,9 @@ class ValueBound : public ValueObject { // Some bounds are created with HNewArray* as the instruction instead // of HArrayLength*. They are treated the same. - instruction1 = FromArrayLengthToNewArrayIfPossible(instruction1); - instruction2 = FromArrayLengthToNewArrayIfPossible(instruction2); + // HArrayLength with the same array input are considered equal also. + instruction1 = FromArrayLengthToArray(instruction1); + instruction2 = FromArrayLengthToArray(instruction2); return instruction1 == instruction2; } @@ -271,7 +275,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // Loop header of loop_info. Exiting loop is normal. return false; } - const GrowableArray<HBasicBlock*> successors = block->GetSuccessors(); + const GrowableArray<HBasicBlock*>& successors = block->GetSuccessors(); for (size_t i = 0; i < successors.Size(); i++) { if (!loop_info->Contains(*successors.Get(i))) { // One of the successors exits the loop. @@ -293,8 +297,14 @@ class ArrayAccessInsideLoopFinder : public ValueObject { void Run() { HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation(); - for (HBlocksInLoopIterator it_loop(*loop_info); !it_loop.Done(); it_loop.Advance()) { - HBasicBlock* block = it_loop.Current(); + HBlocksInLoopReversePostOrderIterator it_loop(*loop_info); + HBasicBlock* block = it_loop.Current(); + DCHECK(block == induction_variable_->GetBlock()); + // Skip loop header. Since narrowed value range of a MonotonicValueRange only + // applies to the loop body (after the test at the end of the loop header). + it_loop.Advance(); + for (; !it_loop.Done(); it_loop.Advance()) { + block = it_loop.Current(); DCHECK(block->IsInLoop()); if (!DominatesAllBackEdges(block, loop_info)) { // In order not to trigger deoptimization unnecessarily, make sure @@ -308,30 +318,35 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // that the loop will loop through the full monotonic value range from // initial_ to end_. So adding deoptimization might be too aggressive and can // trigger deoptimization unnecessarily even if the loop won't actually throw - // AIOOBE. Otherwise, the loop induction variable is going to cover the full - // monotonic value range from initial_ to end_, and deoptimizations are added - // iff the loop will throw AIOOBE. + // AIOOBE. found_array_length_ = nullptr; return; } for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr; instruction = instruction->GetNext()) { - if (!instruction->IsArrayGet() && !instruction->IsArraySet()) { + if (!instruction->IsBoundsCheck()) { continue; } - HInstruction* index = instruction->InputAt(1); - if (!index->IsBoundsCheck()) { + + HInstruction* length_value = instruction->InputAt(1); + if (length_value->IsIntConstant()) { + // TODO: may optimize for constant case. continue; } - HArrayLength* array_length = index->InputAt(1)->AsArrayLength(); - if (array_length == nullptr) { - DCHECK(index->InputAt(1)->IsIntConstant()); - // TODO: may optimize for constant case. + if (length_value->IsPhi()) { + // When adding deoptimizations in outer loops, we might create + // a phi for the array length, and update all uses of the + // length in the loop to that phi. Therefore, inner loops having + // bounds checks on the same array will use that phi. + // TODO: handle these cases. continue; } + DCHECK(length_value->IsArrayLength()); + HArrayLength* array_length = length_value->AsArrayLength(); + HInstruction* array = array_length->InputAt(0); if (array->IsNullCheck()) { array = array->AsNullCheck()->InputAt(0); @@ -347,7 +362,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { continue; } - index = index->AsBoundsCheck()->InputAt(0); + HInstruction* index = instruction->AsBoundsCheck()->InputAt(0); HInstruction* left = index; int32_t right = 0; if (left == induction_variable_ || @@ -375,7 +390,7 @@ class ArrayAccessInsideLoopFinder : public ValueObject { // The instruction that corresponds to a MonotonicValueRange. HInstruction* induction_variable_; - // The array length of the array that's accessed inside the loop. + // The array length of the array that's accessed inside the loop body. HArrayLength* found_array_length_; // The lowest and highest constant offsets relative to induction variable @@ -411,6 +426,8 @@ class ValueRange : public ArenaObject<kArenaAllocMisc> { ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } + bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { if (other_range == nullptr) { @@ -495,13 +512,30 @@ class MonotonicValueRange : public ValueRange { ValueBound GetBound() const { return bound_; } void SetEnd(HInstruction* end) { end_ = end; } void SetInclusive(bool inclusive) { inclusive_ = inclusive; } - HBasicBlock* GetLoopHead() const { + HBasicBlock* GetLoopHeader() const { DCHECK(induction_variable_->GetBlock()->IsLoopHeader()); return induction_variable_->GetBlock(); } MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + HBasicBlock* GetLoopHeaderSuccesorInLoop() { + HBasicBlock* header = GetLoopHeader(); + HInstruction* instruction = header->GetLastInstruction(); + DCHECK(instruction->IsIf()); + HIf* h_if = instruction->AsIf(); + HLoopInformation* loop_info = header->GetLoopInformation(); + bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor()); + bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor()); + + // Just in case it's some strange loop structure. + if (true_successor_in_loop && false_successor_in_loop) { + return nullptr; + } + DCHECK(true_successor_in_loop || false_successor_in_loop); + return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor(); + } + // If it's certain that this value range fits in other_range. bool FitsIn(ValueRange* other_range) const OVERRIDE { if (other_range == nullptr) { @@ -593,12 +627,114 @@ class MonotonicValueRange : public ValueRange { } } + // Try to add HDeoptimize's in the loop pre-header first to narrow this range. + // For example, this loop: + // + // for (int i = start; i < end; i++) { + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // will be transformed to: + // + // int array_length_in_loop_body_if_needed; + // if (start >= end) { + // array_length_in_loop_body_if_needed = 0; + // } else { + // if (start < 1) deoptimize(); + // if (array == null) deoptimize(); + // array_length = array.length; + // if (end > array_length - 1) deoptimize; + // array_length_in_loop_body_if_needed = array_length; + // } + // for (int i = start; i < end; i++) { + // // No more null check and bounds check. + // // array.length value is replaced with array_length_in_loop_body_if_needed + // // in the loop body. + // array[i - 1] = array[i] + array[i + 1]; + // } + // + // We basically first go through the loop body and find those array accesses whose + // index is at a constant offset from the induction variable ('i' in the above example), + // and update offset_low and offset_high along the way. We then add the following + // deoptimizations in the loop pre-header (suppose end is not inclusive). + // if (start < -offset_low) deoptimize(); + // if (end >= array.length - offset_high) deoptimize(); + // It might be necessary to first hoist array.length (and the null check on it) out of + // the loop with another deoptimization. + // + // In order not to trigger deoptimization unnecessarily, we want to make a strong + // guarantee that no deoptimization is triggered if the loop body itself doesn't + // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop + // body must throw AIOOBE). + // This is achieved by the following: + // 1) We only process loops that iterate through the full monotonic range from + // initial_ to end_. We do the following checks to make sure that's the case: + // a) The loop doesn't have early exit (via break, return, etc.) + // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_. + // 2) We only collect array accesses of blocks in the loop body that dominate + // all loop back edges, these array accesses are guaranteed to happen + // at each loop iteration. + // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses + // when the induction variable is at initial_ and end_ must be in a legal range. + // Since the added deoptimizations are basically checking the induction variable + // at initial_ and end_ values, no deoptimization will be triggered either. + // + // A special case is the loop body isn't entered at all. In that case, we may still + // add deoptimization due to the analysis described above. In order not to trigger + // deoptimization, we do a test between initial_ and end_ first and skip over + // the added deoptimization. + ValueRange* NarrowWithDeoptimization() { + if (increment_ != 1 && increment_ != -1) { + // In order not to trigger deoptimization unnecessarily, we want to + // make sure the loop iterates through the full range from initial_ to + // end_ so that boundaries are covered by the loop. An increment of 2, + // for example, may skip end_. + return this; + } + + if (end_ == nullptr) { + // No full info to add deoptimization. + return this; + } + + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + if (!initial_->GetBlock()->Dominates(pre_header) || + !end_->GetBlock()->Dominates(pre_header)) { + // Can't add a check in loop pre-header if the value isn't available there. + return this; + } + + ArrayAccessInsideLoopFinder finder(induction_variable_); + + if (!finder.HasFoundArrayLength()) { + // No array access was found inside the loop that can benefit + // from deoptimization. + return this; + } + + if (!AddDeoptimization(finder)) { + return this; + } + + // After added deoptimizations, induction variable fits in + // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. + ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); + ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); + // We've narrowed the range after added deoptimizations. + return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); + } + // Returns true if adding a (constant >= value) check for deoptimization // is allowed and will benefit compiled code. - bool CanAddDeoptimizationConstant(HInstruction* value, - int32_t constant, - bool* is_proven) { + bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + // See if we can prove the relationship first. if (value->IsIntConstant()) { if (value->AsIntConstant()->GetValue() >= constant) { @@ -615,22 +751,118 @@ class MonotonicValueRange : public ValueRange { return true; } + // Try to filter out cases that the loop entry test will never be true. + bool LoopEntryTestUseful() { + if (initial_->IsIntConstant() && end_->IsIntConstant()) { + int32_t initial_val = initial_->AsIntConstant()->GetValue(); + int32_t end_val = end_->AsIntConstant()->GetValue(); + if (increment_ == 1) { + if (inclusive_) { + return initial_val > end_val; + } else { + return initial_val >= end_val; + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + return initial_val < end_val; + } else { + return initial_val <= end_val; + } + } + } + return true; + } + + // Returns the block for adding deoptimization. + HBasicBlock* TransformLoopForDeoptimizationIfNeeded() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + // Deoptimization is only added when both initial_ and end_ are defined + // before the loop. + DCHECK(initial_->GetBlock()->Dominates(pre_header)); + DCHECK(end_->GetBlock()->Dominates(pre_header)); + + // If it can be proven the loop body is definitely entered (unless exception + // is thrown in the loop header for which triggering deoptimization is fine), + // there is no need for tranforming the loop. In that case, deoptimization + // will just be added in the loop pre-header. + if (!LoopEntryTestUseful()) { + return pre_header; + } + + HGraph* graph = header->GetGraph(); + graph->TransformLoopHeaderForBCE(header); + HBasicBlock* new_pre_header = header->GetDominator(); + DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader()); + HBasicBlock* if_block = new_pre_header->GetDominator(); + HBasicBlock* dummy_block = if_block->GetSuccessors().Get(0); // True successor. + HBasicBlock* deopt_block = if_block->GetSuccessors().Get(1); // False successor. + + dummy_block->AddInstruction(new (graph->GetArena()) HGoto()); + deopt_block->AddInstruction(new (graph->GetArena()) HGoto()); + new_pre_header->AddInstruction(new (graph->GetArena()) HGoto()); + return deopt_block; + } + + // Adds a test between initial_ and end_ to see if the loop body is entered. + // If the loop body isn't entered at all, it jumps to the loop pre-header (after + // transformation) to avoid any deoptimization. + void AddLoopBodyEntryTest() { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + HBasicBlock* if_block = pre_header->GetDominator(); + HGraph* graph = header->GetGraph(); + + HCondition* cond; + if (increment_ == 1) { + if (inclusive_) { + cond = new (graph->GetArena()) HGreaterThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_); + } + } else { + DCHECK_EQ(increment_, -1); + if (inclusive_) { + cond = new (graph->GetArena()) HLessThan(initial_, end_); + } else { + cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_); + } + } + HIf* h_if = new (graph->GetArena()) HIf(cond); + if_block->AddInstruction(cond); + if_block->AddInstruction(h_if); + } + // Adds a check that (value >= constant), and HDeoptimize otherwise. void AddDeoptimizationConstant(HInstruction* value, - int32_t constant) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t constant, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + if (loop_entry_test_block_added) { + DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors().Get(1)); + } + HIntConstant* const_instr = graph->GetIntConstant(constant); HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr); HDeoptimize* deoptimize = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } // Returns true if adding a (value <= array_length + offset) check for deoptimization @@ -640,6 +872,26 @@ class MonotonicValueRange : public ValueRange { int32_t offset, bool* is_proven) { *is_proven = false; + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader(); + DCHECK(value->GetBlock()->Dominates(pre_header)); + + if (array_length->GetBlock() == header) { + // array_length_in_loop_body_if_needed only has correct value when the loop + // body is entered. We bail out in this case. Usually array_length defined + // in the loop header is already hoisted by licm. + return false; + } else { + // array_length is defined either before the loop header already, or in + // the loop body since it's used in the loop body. If it's defined in the loop body, + // a phi array_length_in_loop_body_if_needed is used to replace it. In that case, + // all the uses of array_length must be dominated by its definition in the loop + // body. array_length_in_loop_body_if_needed is guaranteed to be the same as + // array_length once the loop body is entered so all the uses of the phi will + // use the correct value. + } + if (offset > 0) { // There might be overflow issue. // TODO: handle this, possibly with some distance relationship between @@ -667,56 +919,99 @@ class MonotonicValueRange : public ValueRange { // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise. void AddDeoptimizationArrayLength(HInstruction* value, HArrayLength* array_length, - int32_t offset) { - HBasicBlock* block = induction_variable_->GetBlock(); - DCHECK(block->IsLoopHeader()); - HGraph* graph = block->GetGraph(); - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); + int32_t offset, + HBasicBlock* deopt_block, + bool loop_entry_test_block_added) { + HBasicBlock* header = induction_variable_->GetBlock(); + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + if (loop_entry_test_block_added) { + DCHECK(deopt_block->GetSuccessors().Get(0) == pre_header); + } else { + DCHECK(deopt_block == pre_header); + } + HGraph* graph = header->GetGraph(); + HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck(); // We may need to hoist null-check and array_length out of loop first. - if (!array_length->GetBlock()->Dominates(pre_header)) { + if (!array_length->GetBlock()->Dominates(deopt_block)) { + // array_length must be defined in the loop body. + DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock())); + DCHECK(array_length->GetBlock() != header); + HInstruction* array = array_length->InputAt(0); HNullCheck* null_check = array->AsNullCheck(); if (null_check != nullptr) { array = null_check->InputAt(0); } - // We've already made sure array is defined before the loop when collecting + // We've already made sure the array is defined before the loop when collecting // array accesses for the loop. - DCHECK(array->GetBlock()->Dominates(pre_header)); - if (null_check != nullptr && !null_check->GetBlock()->Dominates(pre_header)) { + DCHECK(array->GetBlock()->Dominates(deopt_block)); + if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) { // Hoist null check out of loop with a deoptimization. HNullConstant* null_constant = graph->GetNullConstant(); HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant); // TODO: for one dex_pc, share the same deoptimization slow path. HDeoptimize* null_check_deoptimize = new (graph->GetArena()) HDeoptimize(null_check_cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(null_check_cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore( - null_check_deoptimize, pre_header->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore( + null_check_deoptimize, deopt_block->GetLastInstruction()); // Eliminate null check in the loop. null_check->ReplaceWith(array); null_check->GetBlock()->RemoveInstruction(null_check); null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + suspend_check->GetEnvironment(), header); } - // Hoist array_length out of loop. - array_length->MoveBefore(pre_header->GetLastInstruction()); + + HArrayLength* new_array_length = new (graph->GetArena()) HArrayLength(array); + deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction()); + + if (loop_entry_test_block_added) { + // Replace array_length defined inside the loop body with a phi + // array_length_in_loop_body_if_needed. This is a synthetic phi so there is + // no vreg number for it. + HPhi* phi = new (graph->GetArena()) HPhi( + graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt); + // Set to 0 if the loop body isn't entered. + phi->SetRawInputAt(0, graph->GetIntConstant(0)); + // Set to array.length if the loop body is entered. + phi->SetRawInputAt(1, new_array_length); + pre_header->AddPhi(phi); + array_length->ReplaceWith(phi); + // Make sure phi is only used after the loop body is entered. + if (kIsDebugBuild) { + for (HUseIterator<HInstruction*> it(phi->GetUses()); + !it.Done(); + it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock())); + } + } + } else { + array_length->ReplaceWith(new_array_length); + } + + array_length->GetBlock()->RemoveInstruction(array_length); + // Use new_array_length for deopt. + array_length = new_array_length; } - HIntConstant* offset_instr = graph->GetIntConstant(offset); - HAdd* add = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); - HCondition* cond = new (graph->GetArena()) HGreaterThan(value, add); - HDeoptimize* deoptimize = new (graph->GetArena()) - HDeoptimize(cond, suspend_check->GetDexPc()); - pre_header->InsertInstructionBefore(add, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(cond, pre_header->GetLastInstruction()); - pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); - deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( - suspend_check->GetEnvironment(), block); + HInstruction* added = array_length; + if (offset != 0) { + HIntConstant* offset_instr = graph->GetIntConstant(offset); + added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr); + deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction()); + } + HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added); + HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc()); + deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction()); + deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction()); + deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header); } - // Add deoptimizations in loop pre-header with the collected array access + // Adds deoptimizations in loop pre-header with the collected array access // data so that value ranges can be established in loop body. // Returns true if deoptimizations are successfully added, or if it's proven // it's not necessary. @@ -733,70 +1028,60 @@ class MonotonicValueRange : public ValueRange { return false; } + HBasicBlock* deopt_block; + bool loop_entry_test_block_added = false; bool is_constant_proven, is_length_proven; + + HInstruction* const_comparing_instruction; + int32_t const_compared_to; + HInstruction* array_length_comparing_instruction; + int32_t array_length_offset; if (increment_ == 1) { // Increasing from initial_ to end_. - int32_t offset = inclusive_ ? -offset_high - 1 : -offset_high; - if (CanAddDeoptimizationConstant(initial_, -offset_low, &is_constant_proven) && - CanAddDeoptimizationArrayLength(end_, array_length, offset, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(initial_, -offset_low); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(end_, array_length, offset); + const_comparing_instruction = initial_; + const_compared_to = -offset_low; + array_length_comparing_instruction = end_; + array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high; + } else { + const_comparing_instruction = end_; + const_compared_to = inclusive_ ? -offset_low : -offset_low - 1; + array_length_comparing_instruction = initial_; + array_length_offset = -offset_high - 1; + } + + if (CanAddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + &is_constant_proven) && + CanAddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + &is_length_proven)) { + if (!is_constant_proven || !is_length_proven) { + deopt_block = TransformLoopForDeoptimizationIfNeeded(); + loop_entry_test_block_added = (deopt_block != pre_header); + if (loop_entry_test_block_added) { + // Loop body may be entered. + AddLoopBodyEntryTest(); } - return true; } - } else if (increment_ == -1) { - // Decreasing from initial_ to end_. - int32_t constant = inclusive_ ? -offset_low : -offset_low - 1; - if (CanAddDeoptimizationConstant(end_, constant, &is_constant_proven) && - CanAddDeoptimizationArrayLength( - initial_, array_length, -offset_high - 1, &is_length_proven)) { - if (!is_constant_proven) { - AddDeoptimizationConstant(end_, constant); - } - if (!is_length_proven) { - AddDeoptimizationArrayLength(initial_, array_length, -offset_high - 1); - } - return true; + if (!is_constant_proven) { + AddDeoptimizationConstant(const_comparing_instruction, + const_compared_to, + deopt_block, + loop_entry_test_block_added); + } + if (!is_length_proven) { + AddDeoptimizationArrayLength(array_length_comparing_instruction, + array_length, + array_length_offset, + deopt_block, + loop_entry_test_block_added); } + return true; } return false; } - // Try to add HDeoptimize's in the loop pre-header first to narrow this range. - ValueRange* NarrowWithDeoptimization() { - if (increment_ != 1 && increment_ != -1) { - // TODO: possibly handle overflow/underflow issues with deoptimization. - return this; - } - - if (end_ == nullptr) { - // No full info to add deoptimization. - return this; - } - - ArrayAccessInsideLoopFinder finder(induction_variable_); - - if (!finder.HasFoundArrayLength()) { - // No array access was found inside the loop that can benefit - // from deoptimization. - return this; - } - - if (!AddDeoptimization(finder)) { - return this; - } - - // After added deoptimizations, induction variable fits in - // [-offset_low, array.length-1-offset_high], adjusted with collected offsets. - ValueBound lower = ValueBound(0, -finder.GetOffsetLow()); - ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh()); - // We've narrowed the range after added deoptimizations. - return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper); - } - private: HPhi* const induction_variable_; // Induction variable for this monotonic value range. HInstruction* const initial_; // Initial value. @@ -819,12 +1104,17 @@ class BCEVisitor : public HGraphVisitor { // it's likely some AIOOBE will be thrown. static constexpr int32_t kMaxConstantForAddingDeoptimize = INT_MAX - 1024 * 1024; + // Added blocks for loop body entry test. + bool IsAddedBlock(HBasicBlock* block) const { + return block->GetBlockId() >= initial_block_size_; + } + explicit BCEVisitor(HGraph* graph) - : HGraphVisitor(graph), - maps_(graph->GetBlocks().Size()), - need_to_revisit_block_(false) {} + : HGraphVisitor(graph), maps_(graph->GetBlocks().Size()), + need_to_revisit_block_(false), initial_block_size_(graph->GetBlocks().Size()) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + DCHECK(!IsAddedBlock(block)); first_constant_index_bounds_check_map_.clear(); HGraphVisitor::VisitBasicBlock(block); if (need_to_revisit_block_) { @@ -839,6 +1129,10 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + if (IsAddedBlock(basic_block)) { + // Added blocks don't keep value ranges. + return nullptr; + } int block_id = basic_block->GetBlockId(); if (maps_.at(block_id) == nullptr) { std::unique_ptr<ArenaSafeMap<int, ValueRange*>> map( @@ -853,8 +1147,12 @@ class BCEVisitor : public HGraphVisitor { ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); - if (map->find(instruction->GetId()) != map->end()) { - return map->Get(instruction->GetId()); + if (map != nullptr) { + if (map->find(instruction->GetId()) != map->end()) { + return map->Get(instruction->GetId()); + } + } else { + DCHECK(IsAddedBlock(basic_block)); } basic_block = basic_block->GetDominator(); } @@ -971,7 +1269,7 @@ class BCEVisitor : public HGraphVisitor { if (left_range != nullptr) { left_monotonic_range = left_range->AsMonotonicValueRange(); if (left_monotonic_range != nullptr) { - HBasicBlock* loop_head = left_monotonic_range->GetLoopHead(); + HBasicBlock* loop_head = left_monotonic_range->GetLoopHeader(); if (instruction->GetBlock() != loop_head) { // For monotonic value range, don't handle `instruction` // if it's not defined in the loop header. @@ -1013,7 +1311,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() < 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondLT); @@ -1047,7 +1345,7 @@ class BCEVisitor : public HGraphVisitor { // Update the info for monotonic value range. if (left_monotonic_range->GetInductionVariable() == left && left_monotonic_range->GetIncrement() > 0 && - block == left_monotonic_range->GetLoopHead() && + block == left_monotonic_range->GetLoopHeader() && instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) { left_monotonic_range->SetEnd(right); left_monotonic_range->SetInclusive(cond == kCondGT); @@ -1083,7 +1381,16 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); - DCHECK(array_length->IsIntConstant() || array_length->IsArrayLength()); + DCHECK(array_length->IsIntConstant() || + array_length->IsArrayLength() || + array_length->IsPhi()); + + if (array_length->IsPhi()) { + // Input 1 of the phi contains the real array.length once the loop body is + // entered. That value will be used for bound analysis. The graph is still + // strickly in SSA form. + array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength(); + } if (!index->IsIntConstant()) { ValueRange* index_range = LookupValueRange(index, block); @@ -1238,25 +1545,26 @@ class BCEVisitor : public HGraphVisitor { } if (left_range->IsMonotonicValueRange() && - block == left_range->AsMonotonicValueRange()->GetLoopHead()) { + block == left_range->AsMonotonicValueRange()->GetLoopHeader()) { // The comparison is for an induction variable in the loop header. DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable()); - HBasicBlock* loop_body_successor; - if (LIKELY(block->GetLoopInformation()-> - Contains(*instruction->IfFalseSuccessor()))) { - loop_body_successor = instruction->IfFalseSuccessor(); - } else { - loop_body_successor = instruction->IfTrueSuccessor(); + HBasicBlock* loop_body_successor = + left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop(); + if (loop_body_successor == nullptr) { + // In case it's some strange loop structure. + return; } ValueRange* new_left_range = LookupValueRange(left, loop_body_successor); - if (new_left_range == left_range) { + if ((new_left_range == left_range) || + // Range narrowed with deoptimization is usually more useful than + // a constant range. + new_left_range->IsConstantValueRange()) { // We are not successful in narrowing the monotonic value range to // a regular value range. Try using deoptimization. new_left_range = left_range->AsMonotonicValueRange()-> NarrowWithDeoptimization(); if (new_left_range != left_range) { - GetValueRangeMap(instruction->IfFalseSuccessor())-> - Overwrite(left->GetId(), new_left_range); + GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range); } } } @@ -1511,6 +1819,9 @@ class BCEVisitor : public HGraphVisitor { // eliminate those bounds checks. bool need_to_revisit_block_; + // Initial number of blocks. + int32_t initial_block_size_; + DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; @@ -1527,7 +1838,22 @@ void BoundsCheckElimination::Run() { // value can be narrowed further down in the dominator tree. // // TODO: only visit blocks that dominate some array accesses. - visitor.VisitReversePostOrder(); + HBasicBlock* last_visited_block = nullptr; + for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* current = it.Current(); + if (current == last_visited_block) { + // We may insert blocks into the reverse post order list when processing + // a loop header. Don't process it again. + DCHECK(current->IsLoopHeader()); + continue; + } + if (visitor.IsAddedBlock(current)) { + // Skip added blocks. Their effects are already taken care of. + continue; + } + visitor.VisitBasicBlock(current); + last_visited_block = current; + } } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index e383ec664b..4701bddd48 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -440,22 +440,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -464,6 +458,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, -1, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -472,6 +467,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -481,6 +477,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // array[i] = 10; // Can't eliminate due to overflow concern. } graph = BuildSSAGraph1(&allocator, &bounds_check, 0, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2(graph); bounds_check_elimination_with_increment_2.Run(); @@ -489,6 +486,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination1) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph1(&allocator, &bounds_check, 1, 2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_increment_2_from_1(graph); bounds_check_elimination_with_increment_2_from_1.Run(); @@ -579,22 +577,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph2(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph2(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -603,6 +595,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, -1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_minus_1(graph); bounds_check_elimination_with_initial_minus_1.Run(); @@ -611,6 +604,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -1, kCondLT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_less_than(graph); bounds_check_elimination_with_less_than.Run(); @@ -619,6 +613,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination2) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } graph = BuildSSAGraph2(&allocator, &bounds_check, 0, -2); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_minus_2(graph); bounds_check_elimination_increment_minus_2.Run(); @@ -710,15 +705,17 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); + BoundsCheckElimination bounds_check_elimination(graph); + bounds_check_elimination.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 1, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -728,6 +725,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 0, 1, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -737,6 +735,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination3) { // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } graph = BuildSSAGraph3(&allocator, &bounds_check, 1, 8, kCondGE); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_increment_8(graph); bounds_check_elimination_increment_8.Run(); @@ -828,22 +827,16 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { HInstruction* bounds_check = nullptr; HGraph* graph = BuildSSAGraph4(&allocator, &bounds_check, 0); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); + RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination(graph); bounds_check_elimination.Run(); - ASSERT_FALSE(IsRemoved(bounds_check)); - - // This time add gvn. Need gvn to eliminate the second - // HArrayLength which uses the null check as its input. - graph = BuildSSAGraph4(&allocator, &bounds_check, 0); - graph->BuildDominatorTree(); - RunSimplifierAndGvn(graph); - BoundsCheckElimination bounds_check_elimination_after_gvn(graph); - bounds_check_elimination_after_gvn.Run(); ASSERT_TRUE(IsRemoved(bounds_check)); // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 1); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_initial_1(graph); bounds_check_elimination_with_initial_1.Run(); @@ -852,6 +845,7 @@ TEST(BoundsCheckEliminationTest, LoopArrayBoundsElimination4) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } graph = BuildSSAGraph4(&allocator, &bounds_check, 0, kCondGT); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); BoundsCheckElimination bounds_check_elimination_with_greater_than(graph); bounds_check_elimination_with_greater_than.Run(); @@ -1027,6 +1021,7 @@ TEST(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_body_add->AddSuccessor(outer_header); graph->BuildDominatorTree(); + graph->AnalyzeNaturalLoops(); RunSimplifierAndGvn(graph); // gvn should remove the same bounds check. ASSERT_FALSE(IsRemoved(bounds_check1)); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index cbd042901d..946c0602cf 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -603,7 +603,12 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, const char* descriptor = dex_file_->StringDataByIdx(proto_id.shorty_idx_); Primitive::Type return_type = Primitive::GetType(descriptor[0]); bool is_instance_call = invoke_type != kStatic; - size_t number_of_arguments = strlen(descriptor) - (is_instance_call ? 0 : 1); + // Remove the return type from the 'proto'. + size_t number_of_arguments = strlen(descriptor) - 1; + if (is_instance_call) { + // One extra argument for 'this'. + ++number_of_arguments; + } MethodReference target_method(dex_file_, method_idx); uintptr_t direct_code; @@ -614,7 +619,8 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_, dex_pc, true, true, &optimized_invoke_type, &target_method, &table_index, &direct_code, &direct_method)) { - VLOG(compiler) << "Did not compile " << PrettyMethod(method_idx, *dex_file_) + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) << " because a method call could not be resolved"; MaybeRecordStat(MethodCompilationStat::kNotCompiledUnresolvedMethod); return false; @@ -723,10 +729,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, } } - invoke = new (arena_) HInvokeStaticOrDirect( - arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index, - is_recursive, string_init_offset, invoke_type, optimized_invoke_type, - clinit_check_requirement); + invoke = new (arena_) HInvokeStaticOrDirect(arena_, + number_of_arguments, + return_type, + dex_pc, + target_method.dex_method_index, + is_recursive, + string_init_offset, + invoke_type, + optimized_invoke_type, + clinit_check_requirement); } size_t start_index = 0; @@ -740,19 +752,29 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, start_index = 1; } - uint32_t descriptor_index = 1; + uint32_t descriptor_index = 1; // Skip the return type. uint32_t argument_index = start_index; if (is_string_init) { start_index = 1; } - for (size_t i = start_index; i < number_of_vreg_arguments; i++, argument_index++) { + for (size_t i = start_index; + // Make sure we don't go over the expected arguments or over the number of + // dex registers given. If the instruction was seen as dead by the verifier, + // it hasn't been properly checked. + (i < number_of_vreg_arguments) && (argument_index < number_of_arguments); + i++, argument_index++) { Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); - if (!is_range && is_wide && args[i] + 1 != args[i + 1]) { - LOG(WARNING) << "Non sequential register pair in " << dex_compilation_unit_->GetSymbol() - << " at " << dex_pc; - // We do not implement non sequential register pair. - MaybeRecordStat(MethodCompilationStat::kNotCompiledNonSequentialRegPair); + if (!is_range + && is_wide + && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { + // Longs and doubles should be in pairs, that is, sequential registers. The verifier should + // reject any class where this is violated. However, the verifier only does these checks + // on non trivially dead instructions, so we just bailout the compilation. + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of non-sequential dex register pair in wide argument"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); @@ -761,7 +783,14 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, i++; } } - DCHECK_EQ(argument_index, number_of_arguments); + + if (argument_index != number_of_arguments) { + VLOG(compiler) << "Did not compile " + << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_) + << " because of wrong number of arguments in invoke instruction"; + MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); + return false; + } if (invoke->IsInvokeStaticOrDirect()) { invoke->SetArgumentAt(argument_index, graph_->GetCurrentMethod()); @@ -1206,14 +1235,20 @@ bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const { } void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for PackedSwitch contains: + // (a) number of entries (may be zero) + // (b) first and lowest switch case value (entry 0, always present) + // (c) list of target pcs (entries 1 <= i <= N) SwitchTable table(instruction, dex_pc, false); // Value to test against. HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + // Retrieve number of entries. uint16_t num_entries = table.GetNumEntries(); - // There should be at least one entry here. - DCHECK_GT(num_entries, 0U); + if (num_entries == 0) { + return; + } // Chained cmp-and-branch, starting from starting_key. int32_t starting_key = table.GetEntryAt(0); @@ -1225,6 +1260,10 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } void HGraphBuilder::BuildSparseSwitch(const Instruction& instruction, uint32_t dex_pc) { + // Verifier guarantees that the payload for SparseSwitch contains: + // (a) number of entries (may be zero) + // (b) sorted key values (entries 0 <= i < N) + // (c) target pcs corresponding to the switch values (entries N <= i < 2*N) SwitchTable table(instruction, dex_pc, true); // Value to test against. @@ -1424,21 +1463,16 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32 } case Instruction::RETURN: { - DCHECK_NE(return_type_, Primitive::kPrimNot); - DCHECK_NE(return_type_, Primitive::kPrimLong); - DCHECK_NE(return_type_, Primitive::kPrimDouble); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_OBJECT: { - DCHECK(return_type_ == Primitive::kPrimNot); BuildReturn(instruction, return_type_); break; } case Instruction::RETURN_WIDE: { - DCHECK(return_type_ == Primitive::kPrimDouble || return_type_ == Primitive::kPrimLong); BuildReturn(instruction, return_type_); break; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 049b3e3a40..09f7d86605 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -236,7 +236,6 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, const GrowableArray<HBasicBlock*>& block_order) { block_order_ = &block_order; DCHECK(block_order_->Get(0) == GetGraph()->GetEntryBlock()); - DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), block_order_->Get(1))); ComputeSpillMask(); first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; @@ -508,19 +507,14 @@ void CodeGenerator::BuildNativeGCMap( dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); - uint32_t max_native_offset = 0; - for (size_t i = 0; i < pc_infos_.Size(); i++) { - uint32_t native_offset = pc_infos_.Get(i).native_pc; - if (native_offset > max_native_offset) { - max_native_offset = native_offset; - } - } + uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); - GcMapBuilder builder(data, pc_infos_.Size(), max_native_offset, dex_gc_map.RegWidth()); - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t native_offset = pc_info.native_pc; - uint32_t dex_pc = pc_info.dex_pc; + size_t num_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + GcMapBuilder builder(data, num_stack_maps, max_native_offset, dex_gc_map.RegWidth()); + for (size_t i = 0; i != num_stack_maps; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t native_offset = stack_map_entry.native_pc_offset; + uint32_t dex_pc = stack_map_entry.dex_pc; const uint8_t* references = dex_gc_map.FindBitMap(dex_pc, false); CHECK(references != nullptr) << "Missing ref for dex pc 0x" << std::hex << dex_pc; builder.AddEntry(native_offset, references); @@ -528,17 +522,17 @@ void CodeGenerator::BuildNativeGCMap( } void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const { - for (size_t i = 0; i < pc_infos_.Size(); i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - uint32_t pc2dex_offset = pc_info.native_pc; - int32_t pc2dex_dalvik_offset = pc_info.dex_pc; + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + uint32_t pc2dex_offset = stack_map_entry.native_pc_offset; + int32_t pc2dex_dalvik_offset = stack_map_entry.dex_pc; src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset})); } } void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { uint32_t pc2dex_data_size = 0u; - uint32_t pc2dex_entries = pc_infos_.Size(); + uint32_t pc2dex_entries = stack_map_stream_.GetNumberOfStackMaps(); uint32_t pc2dex_offset = 0u; int32_t pc2dex_dalvik_offset = 0; uint32_t dex2pc_data_size = 0u; @@ -547,11 +541,11 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { int32_t dex2pc_dalvik_offset = 0; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset); - pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + pc2dex_data_size += UnsignedLeb128Size(stack_map_entry.native_pc_offset - pc2dex_offset); + pc2dex_data_size += SignedLeb128Size(stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } // Walk over the blocks and find which ones correspond to catch block entries. @@ -586,12 +580,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { dex2pc_dalvik_offset = 0u; for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - DCHECK(pc2dex_offset <= pc_info.native_pc); - write_pos = EncodeUnsignedLeb128(write_pos, pc_info.native_pc - pc2dex_offset); - write_pos = EncodeSignedLeb128(write_pos, pc_info.dex_pc - pc2dex_dalvik_offset); - pc2dex_offset = pc_info.native_pc; - pc2dex_dalvik_offset = pc_info.dex_pc; + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + DCHECK(pc2dex_offset <= stack_map_entry.native_pc_offset); + write_pos = EncodeUnsignedLeb128(write_pos, stack_map_entry.native_pc_offset - pc2dex_offset); + write_pos = EncodeSignedLeb128(write_pos, stack_map_entry.dex_pc - pc2dex_dalvik_offset); + pc2dex_offset = stack_map_entry.native_pc_offset; + pc2dex_dalvik_offset = stack_map_entry.dex_pc; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -617,9 +611,9 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const { auto it = table.PcToDexBegin(); auto it2 = table.DexToPcBegin(); for (size_t i = 0; i < pc2dex_entries; i++) { - struct PcInfo pc_info = pc_infos_.Get(i); - CHECK_EQ(pc_info.native_pc, it.NativePcOffset()); - CHECK_EQ(pc_info.dex_pc, it.DexPc()); + const StackMapStream::StackMapEntry& stack_map_entry = stack_map_stream_.GetStackMap(i); + CHECK_EQ(stack_map_entry.native_pc_offset, it.NativePcOffset()); + CHECK_EQ(stack_map_entry.dex_pc, it.DexPc()); ++it; } for (size_t i = 0; i < graph_->GetBlocks().Size(); ++i) { @@ -695,14 +689,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - struct PcInfo pc_info; - pc_info.dex_pc = outer_dex_pc; - pc_info.native_pc = GetAssembler()->CodeSize(); - pc_infos_.Add(pc_info); + uint32_t native_pc = GetAssembler()->CodeSize(); if (instruction == nullptr) { // For stack overflow checks. - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, pc_info.native_pc, 0, 0, 0, 0); + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); stack_map_stream_.EndStackMapEntry(); return; } @@ -719,8 +710,8 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); - stack_map_stream_.BeginStackMapEntry(pc_info.dex_pc, - pc_info.native_pc, + stack_map_stream_.BeginStackMapEntry(outer_dex_pc, + native_pc, register_mask, locations->GetStackMask(), outer_environment_size, diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c6ebf6dbd8..5b0abd76b3 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -64,11 +64,6 @@ class CodeAllocator { DISALLOW_COPY_AND_ASSIGN(CodeAllocator); }; -struct PcInfo { - uint32_t dex_pc; - uintptr_t native_pc; -}; - class SlowPathCode : public ArenaObject<kArenaAllocSlowPaths> { public: SlowPathCode() { @@ -363,16 +358,15 @@ class CodeGenerator { number_of_register_pairs_(number_of_register_pairs), core_callee_save_mask_(core_callee_save_mask), fpu_callee_save_mask_(fpu_callee_save_mask), + stack_map_stream_(graph->GetArena()), + block_order_(nullptr), is_baseline_(false), graph_(graph), compiler_options_(compiler_options), - pc_infos_(graph->GetArena(), 32), slow_paths_(graph->GetArena(), 8), - block_order_(nullptr), current_block_index_(0), is_leaf_(true), - requires_current_method_(false), - stack_map_stream_(graph->GetArena()) {} + requires_current_method_(false) {} // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -442,6 +436,11 @@ class CodeGenerator { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; + StackMapStream stack_map_stream_; + + // The order to use for code generation. + const GrowableArray<HBasicBlock*>* block_order_; + // Whether we are using baseline. bool is_baseline_; @@ -455,12 +454,8 @@ class CodeGenerator { HGraph* const graph_; const CompilerOptions& compiler_options_; - GrowableArray<PcInfo> pc_infos_; GrowableArray<SlowPathCode*> slow_paths_; - // The order to use for code generation. - const GrowableArray<HBasicBlock*>* block_order_; - // The current block index in `block_order_` of the block // we are generating code for. size_t current_block_index_; @@ -471,8 +466,6 @@ class CodeGenerator { // Whether an instruction in the graph accesses the current method. bool requires_current_method_; - StackMapStream stack_map_stream_; - friend class OptimizingCFITest; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3d3e35d0fc..f6ae45238c 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -392,12 +392,38 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(false /* can_relocate_branches */), + assembler_(), isa_features_(isa_features) { // Save the PC register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(PC)); } +void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { + // Ensure that we fix up branches and literal loads and emit the literal pool. + __ FinalizeCode(); + + // Adjust native pc offsets in stack maps. + for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset; + uint32_t new_position = __ GetAdjustedPosition(old_position); + stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + } + // Adjust native pc offsets of block labels. + for (size_t block_idx = 0u, end = block_order_->Size(); block_idx != end; ++block_idx) { + HBasicBlock* block = block_order_->Get(block_idx); + // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid + // FirstNonEmptyBlock() which could lead to adjusting a label more than once. + DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); + Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_EQ(block_label->IsBound(), !block->IsSingleGoto()); + if (block_label->IsBound()) { + __ AdjustLabelPosition(block_label); + } + } + + CodeGenerator::Finalize(allocator); +} + Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { switch (type) { case Primitive::kPrimLong: { @@ -2831,7 +2857,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location left = locations->InAt(0); Location right = locations->InAt(1); - NearLabel less, greater, done; + Label less, greater, done; Primitive::Type type = compare->InputAt(0)->GetType(); switch (type) { case Primitive::kPrimLong: { @@ -2927,7 +2953,7 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicStore(Register addr, Register temp1, Register temp2, HInstruction* instruction) { - NearLabel fail; + Label fail; if (offset != 0) { __ LoadImmediate(temp1, offset); __ add(IP, addr, ShifterOperand(temp1)); @@ -3607,7 +3633,7 @@ void CodeGeneratorARM::MarkGCCard(Register temp, Register object, Register value, bool can_be_null) { - NearLabel is_null; + Label is_null; if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } @@ -4036,7 +4062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { Register cls = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - NearLabel done, zero; + Label done, zero; SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -4093,19 +4119,15 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { instruction, locations->InAt(1), locations->GetTemp(0), instruction->GetDexPc()); codegen_->AddSlowPath(slow_path); - NearLabel done; // avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, slow_path->GetExitLabel()); } // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, temp, obj, class_offset); __ cmp(temp, ShifterOperand(cls)); __ b(slow_path->GetEntryLabel(), NE); __ Bind(slow_path->GetExitLabel()); - if (instruction->MustDoNullCheck()) { - __ Bind(&done); - } } void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index af2481661a..1599a23568 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -139,10 +139,16 @@ class LocationsBuilderARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -163,10 +169,16 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr); - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + ArmAssembler* GetAssembler() const { return assembler_; } private: @@ -286,6 +298,8 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + void Finalize(CodeAllocator* allocator) OVERRIDE; + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 2d2419a284..f96810ff80 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -147,9 +147,17 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + Arm64Assembler* GetAssembler() const { return assembler_; } vixl::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->vixl_masm_; } @@ -188,9 +196,17 @@ class LocationsBuilderARM64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) + #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBinaryOp(HBinaryOperation* instr); void HandleFieldSet(HInstruction* instruction); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index faf3cf9ffa..696d8d549e 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -124,10 +124,16 @@ class LocationsBuilderX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); @@ -148,10 +154,16 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index e46994c79e..215754cd46 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -134,10 +134,16 @@ class LocationsBuilderX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); @@ -158,10 +164,16 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { #define DECLARE_VISIT_INSTRUCTION(name, super) \ void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } + X86_64Assembler* GetAssembler() const { return assembler_; } private: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 17a006cc3a..fdfe518e95 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -122,10 +122,6 @@ void HDeadCodeElimination::RemoveDeadInstructions() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() - // The current method needs to stay in the graph in case of inlining. - // It is always passed anyway, and keeping it in the graph does not - // affect the generated code. - && !inst->IsCurrentMethod() // If we added an explicit barrier then we should keep it. && !inst->IsMemoryBarrier() && !inst->HasUses()) { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index fd2e4e81df..b64791788d 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -21,6 +21,7 @@ #include "licm.h" #include "nodes.h" #include "optimization.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_liveness_analysis.h" @@ -354,6 +355,24 @@ class HGraphVisualizerPrinter : public HGraphVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } + } else if (IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + && is_after_pass_) { + if (instruction->GetType() == Primitive::kPrimNot) { + if (instruction->IsLoadClass()) { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") + << PrettyClass(instruction->AsLoadClass()->GetLoadedClassRTI().GetTypeHandle().Get()); + } else { + ReferenceTypeInfo info = instruction->GetReferenceTypeInfo(); + if (info.IsTop()) { + StartAttributeStream("klass") << "java.lang.Object"; + } else { + ScopedObjectAccess soa(Thread::Current()); + StartAttributeStream("klass") << PrettyClass(info.GetTypeHandle().Get()); + } + StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; + } + } } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index c3fc33735a..92ebf060eb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -27,6 +27,7 @@ #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" +#include "reference_type_propagation.h" #include "register_allocator.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" @@ -57,7 +58,7 @@ void HInliner::Run() { next_block = (i == blocks.Size() - 1) ? nullptr : blocks.Get(i + 1); for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) { HInstruction* next = instruction->GetNext(); - HInvokeStaticOrDirect* call = instruction->AsInvokeStaticOrDirect(); + HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { // We use the original invoke type to ensure the resolution of the called method @@ -83,6 +84,93 @@ void HInliner::Run() { } } +static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return method->IsFinal() || method->GetDeclaringClass()->IsFinal(); +} + +/** + * Given the `resolved_method` looked up in the dex cache, try to find + * the actual runtime target of an interface or virtual call. + * Return nullptr if the runtime target cannot be proven. + */ +static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resolved_method) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // No need to lookup further, the resolved method will be the target. + return resolved_method; + } + + HInstruction* receiver = invoke->InputAt(0); + if (receiver->IsNullCheck()) { + // Due to multiple levels of inlining within the same pass, it might be that + // null check does not have the reference type of the actual receiver. + receiver = receiver->InputAt(0); + } + ReferenceTypeInfo info = receiver->GetReferenceTypeInfo(); + if (info.IsTop()) { + // We have no information on the receiver. + return nullptr; + } else if (!info.IsExact()) { + // We currently only support inlining with known receivers. + // TODO: Remove this check, we should be able to inline final methods + // on unknown receivers. + return nullptr; + } else if (info.GetTypeHandle()->IsInterface()) { + // Statically knowing that the receiver has an interface type cannot + // help us find what is the target method. + return nullptr; + } else if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(info.GetTypeHandle().Get())) { + // The method that we're trying to call is not in the receiver's class or super classes. + return nullptr; + } + + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + size_t pointer_size = cl->GetImagePointerSize(); + if (invoke->IsInvokeInterface()) { + resolved_method = info.GetTypeHandle()->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke->IsInvokeVirtual()); + resolved_method = info.GetTypeHandle()->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + + if (resolved_method == nullptr) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (resolved_method->IsAbstract()) { + // The information we had on the receiver was not enough to find + // the target method. Since we check above the exact type of the receiver, + // the only reason this can happen is an IncompatibleClassChangeError. + return nullptr; + } else if (IsMethodOrDeclaringClassFinal(resolved_method)) { + // A final method has to be the target method. + return resolved_method; + } else if (info.IsExact()) { + // If we found a method and the receiver's concrete type is statically + // known, we know for sure the target. + return resolved_method; + } else { + // Even if we did find a method, the receiver type was not enough to + // statically find the runtime target. + return nullptr; + } +} + +static uint32_t FindMethodIndexIn(ArtMethod* method, + const DexFile& dex_file, + uint32_t referrer_index) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (method->GetDexFile()->GetLocation().compare(dex_file.GetLocation()) == 0) { + return method->GetDexMethodIndex(); + } else { + return method->FindDexMethodIndexInOtherDexFile(dex_file, referrer_index); + } +} + bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) const { ScopedObjectAccess soa(Thread::Current()); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); @@ -99,6 +187,25 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + resolved_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + if (resolved_method == nullptr) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(method_index, caller_dex_file) + << " could not be statically determined"; + return false; + } + // We have found a method, but we need to find where that method is for the caller's + // dex file. + method_index = FindMethodIndexIn(resolved_method, caller_dex_file, method_index); + if (method_index == DexFile::kDexNoIndex) { + VLOG(compiler) << "Interface or virtual call to " + << PrettyMethod(resolved_method) + << " cannot be inlined because unaccessible to caller"; + return false; + } + } + bool same_dex_file = true; const DexFile& outer_dex_file = *outer_compilation_unit_.GetDexFile(); if (resolved_method->GetDexFile()->GetLocation().compare(outer_dex_file.GetLocation()) != 0) { @@ -149,7 +256,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con return false; } - if (!TryBuildAndInline(resolved_method, invoke_instruction, method_index, same_dex_file)) { + if (!TryBuildAndInline(resolved_method, invoke_instruction, same_dex_file)) { return false; } @@ -160,11 +267,11 @@ bool HInliner::TryInline(HInvoke* invoke_instruction, uint32_t method_index) con bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const { ScopedObjectAccess soa(Thread::Current()); const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + const DexFile& callee_dex_file = *resolved_method->GetDexFile(); + uint32_t method_index = resolved_method->GetDexMethodIndex(); DexCompilationUnit dex_compilation_unit( nullptr, @@ -204,7 +311,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } HGraph* callee_graph = new (graph_->GetArena()) HGraph( graph_->GetArena(), - caller_dex_file, + callee_dex_file, method_index, requires_ctor_barrier, compiler_driver_->GetInstructionSet(), @@ -221,7 +328,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, &inline_stats); if (!builder.BuildGraph(*code_item)) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be built, so cannot be inlined"; // There could be multiple reasons why the graph could not be built, including // unaccessible methods/fields due to using a different dex cache. We do not mark @@ -231,14 +338,14 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " cannot be inlined because of the register allocator"; resolved_method->SetShouldNotInline(); return false; } if (!callee_graph->TryBuildingSsa()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; resolved_method->SetShouldNotInline(); return false; @@ -247,11 +354,13 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // Run simple optimizations on the graph. HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); + ReferenceTypePropagation type_propagation(callee_graph, handles_); InstructionSimplifier simplify(callee_graph, stats_); HOptimization* optimizations[] = { &dce, &fold, + &type_propagation, &simplify, }; @@ -265,6 +374,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, outer_compilation_unit_, dex_compilation_unit, compiler_driver_, + handles_, stats_, depth_ + 1); inliner.Run(); @@ -275,7 +385,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // a throw predecessor. HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an infinite loop"; resolved_method->SetShouldNotInline(); return false; @@ -289,7 +399,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, } } if (has_throw_predecessor) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because one branch always throws"; resolved_method->SetShouldNotInline(); return false; @@ -300,7 +410,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it contains a loop"; resolved_method->SetShouldNotInline(); return false; @@ -314,21 +424,21 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the // resolution conflict is currently too high. - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because it has an interface call."; resolved_method->SetShouldNotInline(); return false; } if (!same_dex_file && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " needs an environment and is in a different dex file"; return false; } if (!same_dex_file && current->NeedsDexCache()) { - VLOG(compiler) << "Method " << PrettyMethod(method_index, caller_dex_file) + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; // Do not flag the method as not-inlineable. A caller within the same diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index f7d8cf8715..24044b73a1 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -34,13 +34,15 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, CompilerDriver* compiler_driver, + StackHandleScopeCollection* handles, OptimizingCompilerStats* stats, size_t depth = 0) : HOptimization(outer_graph, true, kInlinerPassName, stats), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), compiler_driver_(compiler_driver), - depth_(depth) {} + depth_(depth), + handles_(handles) {} void Run() OVERRIDE; @@ -50,13 +52,13 @@ class HInliner : public HOptimization { bool TryInline(HInvoke* invoke_instruction, uint32_t method_index) const; bool TryBuildAndInline(ArtMethod* resolved_method, HInvoke* invoke_instruction, - uint32_t method_index, bool same_dex_file) const; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CompilerDriver* const compiler_driver_; const size_t depth_; + StackHandleScopeCollection* const handles_; DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index fcb3471821..2daeeb3c0c 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -186,33 +186,94 @@ bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* ins return false; } -void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (!check_cast->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { - check_cast->ClearMustDoNullCheck(); - } - - if (!load_class->IsResolved()) { +// Returns whether doing a type test between the class of `object` against `klass` has +// a statically known outcome. The result of the test is stored in `outcome`. +static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { + if (!klass->IsResolved()) { // If the class couldn't be resolve it's not safe to compare against it. It's // default type would be Top which might be wider that the actual class type // and thus producing wrong results. - return; + return false; } - ReferenceTypeInfo obj_rti = check_cast->InputAt(0)->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + + ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); + ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (class_rti.IsSupertypeOf(obj_rti)) { + *outcome = true; + return true; + } else if (obj_rti.IsExact()) { + // The test failed at compile time so will also fail at runtime. + *outcome = false; + return true; + } else if (!class_rti.IsInterface() + && !obj_rti.IsInterface() + && !obj_rti.IsSupertypeOf(class_rti)) { + // Different type hierarchy. The test will fail. + *outcome = false; + return true; + } + return false; +} + +void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { + HInstruction* object = check_cast->InputAt(0); + if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + check_cast->ClearMustDoNullCheck(); + } + + if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); if (stats_ != nullptr) { stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); } + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(check_cast->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome) { + check_cast->GetBlock()->RemoveInstruction(check_cast); + if (stats_ != nullptr) { + stats_->RecordStat(MethodCompilationStat::kRemovedCheckedCast); + } + } else { + // Don't do anything for exceptional cases for now. Ideally we should remove + // all instructions and blocks this instruction dominates. + } } } void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { - if (!instruction->InputAt(0)->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + HInstruction* object = instruction->InputAt(0); + bool can_be_null = true; + if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + can_be_null = false; instruction->ClearMustDoNullCheck(); } + + HGraph* graph = GetGraph(); + if (object->IsNullConstant()) { + instruction->ReplaceWith(graph->GetIntConstant(0)); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + + bool outcome; + if (TypeCheckHasKnownOutcome(instruction->InputAt(1)->AsLoadClass(), object, &outcome)) { + if (outcome && can_be_null) { + // Type test will succeed, we just need a null test. + HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + instruction->GetBlock()->InsertInstructionBefore(test, instruction); + instruction->ReplaceWith(test); + } else { + // We've statically determined the result of the instanceof. + instruction->ReplaceWith(graph->GetIntConstant(outcome)); + } + RecordSimplification(); + instruction->GetBlock()->RemoveInstruction(instruction); + } } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 024462081f..668956a614 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -36,6 +36,9 @@ class InstructionSimplifier : public HOptimization { static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; void Run() OVERRIDE; + + private: + DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index cd91d2c87b..68c197e607 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -288,7 +288,10 @@ void HGraph::InsertConstant(HConstant* constant) { } HNullConstant* HGraph::GetNullConstant() { - if (cached_null_constant_ == nullptr) { + // For simplicity, don't bother reviving the cached null constant if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { cached_null_constant_ = new (arena_) HNullConstant(); InsertConstant(cached_null_constant_); } @@ -296,7 +299,10 @@ HNullConstant* HGraph::GetNullConstant() { } HCurrentMethod* HGraph::GetCurrentMethod() { - if (cached_current_method_ == nullptr) { + // For simplicity, don't bother reviving the cached current method if it is + // not null and not in a block. Otherwise, we need to clear the instruction + // id and/or any invariants the graph is assuming when adding new instructions. + if ((cached_current_method_ == nullptr) || (cached_current_method_->GetBlock() == nullptr)) { cached_current_method_ = new (arena_) HCurrentMethod( Is64BitInstructionSet(instruction_set_) ? Primitive::kPrimLong : Primitive::kPrimInt); if (entry_block_->GetFirstInstruction() == nullptr) { @@ -1510,6 +1516,81 @@ void HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { invoke->GetBlock()->RemoveInstruction(invoke); } +/* + * Loop will be transformed to: + * old_pre_header + * | + * if_block + * / \ + * dummy_block deopt_block + * \ / + * new_pre_header + * | + * header + */ +void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HBasicBlock* pre_header = header->GetDominator(); + + // Need this to avoid critical edge. + HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + // Need this to avoid critical edge. + HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(if_block); + AddBlock(dummy_block); + AddBlock(deopt_block); + AddBlock(new_pre_header); + + header->ReplacePredecessor(pre_header, new_pre_header); + pre_header->successors_.Reset(); + pre_header->dominated_blocks_.Reset(); + + pre_header->AddSuccessor(if_block); + if_block->AddSuccessor(dummy_block); // True successor + if_block->AddSuccessor(deopt_block); // False successor + dummy_block->AddSuccessor(new_pre_header); + deopt_block->AddSuccessor(new_pre_header); + + pre_header->dominated_blocks_.Add(if_block); + if_block->SetDominator(pre_header); + if_block->dominated_blocks_.Add(dummy_block); + dummy_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(deopt_block); + deopt_block->SetDominator(if_block); + if_block->dominated_blocks_.Add(new_pre_header); + new_pre_header->SetDominator(if_block); + new_pre_header->dominated_blocks_.Add(header); + header->SetDominator(new_pre_header); + + size_t index_of_header = 0; + while (reverse_post_order_.Get(index_of_header) != header) { + index_of_header++; + } + MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1); + reverse_post_order_.Put(index_of_header++, if_block); + reverse_post_order_.Put(index_of_header++, dummy_block); + reverse_post_order_.Put(index_of_header++, deopt_block); + reverse_post_order_.Put(index_of_header++, new_pre_header); + + HLoopInformation* info = pre_header->GetLoopInformation(); + if (info != nullptr) { + if_block->SetLoopInformation(info); + dummy_block->SetLoopInformation(info); + deopt_block->SetLoopInformation(info); + new_pre_header->SetLoopInformation(info); + for (HLoopInformationOutwardIterator loop_it(*pre_header); + !loop_it.Done(); + loop_it.Advance()) { + loop_it.Current()->Add(if_block); + loop_it.Current()->Add(dummy_block); + loop_it.Current()->Add(deopt_block); + loop_it.Current()->Add(new_pre_header); + } + } +} + std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs) { ScopedObjectAccess soa(Thread::Current()); os << "[" diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index f87775e195..9443653db7 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -195,6 +195,10 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { // Inline this graph in `outer_graph`, replacing the given `invoke` instruction. void InlineInto(HGraph* outer_graph, HInvoke* invoke); + // Need to add a couple of blocks to test if the loop body is entered and + // put deoptimization instructions, etc. + void TransformLoopHeaderForBCE(HBasicBlock* header); + // Removes `block` from the graph. void DeleteDeadBlock(HBasicBlock* block); @@ -331,6 +335,7 @@ class HGraph : public ArenaObject<kArenaAllocMisc> { } // If not found or previously deleted, create and cache a new instruction. + // Don't bother reviving a previously deleted instruction, for simplicity. if (constant == nullptr || constant->GetBlock() == nullptr) { constant = new (arena_) InstructionType(value); cache->Overwrite(value, constant); @@ -824,7 +829,7 @@ class HLoopInformationOutwardIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator); }; -#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -894,6 +899,21 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + +#define FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ M(Constant, Instruction) \ @@ -1281,6 +1301,9 @@ class ReferenceTypeInfo : ValueObject { bool IsExact() const { return is_exact_; } bool IsTop() const { return is_top_; } + bool IsInterface() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + return !IsTop() && GetTypeHandle()->IsInterface(); + } Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } @@ -2461,7 +2484,7 @@ class HInvoke : public HInstruction { intrinsic_ = intrinsic; } - bool IsInlined() const { + bool IsFromInlinedInvoke() const { return GetEnvironment()->GetParent() != nullptr; } @@ -3581,7 +3604,7 @@ class HLoadClass : public HExpression<1> { bool CanThrow() const OVERRIDE { // May call runtime and and therefore can throw. // TODO: finer grain decision. - return !is_referrers_class_; + return CanCallRuntime(); } ReferenceTypeInfo GetLoadedClassRTI() { @@ -4246,6 +4269,39 @@ class HBlocksInLoopIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator); }; +// Iterator over the blocks that art part of the loop. Includes blocks part +// of an inner loop. The order in which the blocks are iterated is reverse +// post order. +class HBlocksInLoopReversePostOrderIterator : public ValueObject { + public: + explicit HBlocksInLoopReversePostOrderIterator(const HLoopInformation& info) + : blocks_in_loop_(info.GetBlocks()), + blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()), + index_(0) { + if (!blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + Advance(); + } + } + + bool Done() const { return index_ == blocks_.Size(); } + HBasicBlock* Current() const { return blocks_.Get(index_); } + void Advance() { + ++index_; + for (size_t e = blocks_.Size(); index_ < e; ++index_) { + if (blocks_in_loop_.IsBitSet(blocks_.Get(index_)->GetBlockId())) { + break; + } + } + } + + private: + const BitVector& blocks_in_loop_; + const GrowableArray<HBasicBlock*>& blocks_; + size_t index_; + + DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator); +}; + inline int64_t Int64FromConstant(HConstant* constant) { DCHECK(constant->IsIntConstant() || constant->IsLongConstant()); return constant->IsIntConstant() ? constant->AsIntConstant()->GetValue() diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index ccf8de9f6a..2d1c0ba9f9 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ +#include "base/arena_object.h" #include "nodes.h" #include "optimizing_compiler_stats.h" @@ -25,7 +26,7 @@ namespace art { /** * Abstraction to implement an optimization pass. */ -class HOptimization : public ValueObject { +class HOptimization : public ArenaObject<kArenaAllocMisc> { public: HOptimization(HGraph* graph, bool is_in_ssa_form, diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index b0d1433667..fe3bb1a2b4 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -71,6 +71,8 @@ class OptimizingCFITest : public CFITest { } } } + GrowableArray<HBasicBlock*> blocks(&allocator, 0); + code_gen->block_order_ = &blocks; code_gen->ComputeSpillMask(); code_gen->SetFrameSize(frame_size); code_gen->GenerateFrameEntry(); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f6ef2f7e82..8d43adaada 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -318,43 +318,61 @@ static void RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassInfoPrinter* pass_info_printer, StackHandleScopeCollection* handles) { - HDeadCodeElimination dce1(graph, stats, - HDeadCodeElimination::kInitialDeadCodeEliminationPassName); - HDeadCodeElimination dce2(graph, stats, - HDeadCodeElimination::kFinalDeadCodeEliminationPassName); - HConstantFolding fold1(graph); - InstructionSimplifier simplify1(graph, stats); - HBooleanSimplifier boolean_simplify(graph); - - HInliner inliner(graph, dex_compilation_unit, dex_compilation_unit, driver, stats); - - HConstantFolding fold2(graph, "constant_folding_after_inlining"); - SideEffectsAnalysis side_effects(graph); - GVNOptimization gvn(graph, side_effects); - LICM licm(graph, side_effects); - BoundsCheckElimination bce(graph); - ReferenceTypePropagation type_propagation(graph, handles); - InstructionSimplifier simplify2(graph, stats, "instruction_simplifier_after_types"); - - IntrinsicsRecognizer intrinsics(graph, driver); + ArenaAllocator* arena = graph->GetArena(); + HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); + HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( + graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); + HConstantFolding* fold1 = new (arena) HConstantFolding(graph); + InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); + HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + + HInliner* inliner = new (arena) HInliner( + graph, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + + HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); + SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); + LICM* licm = new (arena) LICM(graph, *side_effects); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph); + ReferenceTypePropagation* type_propagation = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_types"); + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_after_bce"); + ReferenceTypePropagation* type_propagation2 = + new (arena) ReferenceTypePropagation(graph, handles); + InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + graph, stats, "instruction_simplifier_before_codegen"); + + IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations[] = { - &intrinsics, - &dce1, - &fold1, - &simplify1, - &inliner, + intrinsics, + fold1, + simplify1, + type_propagation, + dce1, + simplify2, + inliner, + // Run another type propagation phase: inlining will open up more opprotunities + // to remove checkast/instanceof and null checks. + type_propagation2, // BooleanSimplifier depends on the InstructionSimplifier removing redundant // suspend checks to recognize empty blocks. - &boolean_simplify, - &fold2, - &side_effects, - &gvn, - &licm, - &bce, - &type_propagation, - &simplify2, - &dce2, + boolean_simplify, + fold2, + side_effects, + gvn, + licm, + bce, + simplify3, + dce2, + // The codegen has a few assumptions that only the instruction simplifier can + // satisfy. For example, the code generator does not expect to see a + // HTypeConversion from a type to the same type. + simplify4, }; RunOptimizations(optimizations, arraysize(optimizations), pass_info_printer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index b6b1bb1cad..53d052b2bc 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -19,6 +19,7 @@ #include <sstream> #include <string> +#include <type_traits> #include "atomic.h" @@ -37,8 +38,8 @@ enum MethodCompilationStat { kNotCompiledClassNotVerified, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, + kNotCompiledMalformedOpcode, kNotCompiledNoCodegen, - kNotCompiledNonSequentialRegPair, kNotCompiledPathological, kNotCompiledSpaceFilter, kNotCompiledUnhandledInstruction, @@ -84,14 +85,15 @@ class OptimizingCompilerStats { for (int i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(i) << ": " << compile_stats_[i]; + LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + << compile_stats_[i]; } } } } private: - std::string PrintMethodCompilationStat(int stat) const { + std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { switch (stat) { case kAttemptCompilation : return "kAttemptCompilation"; case kCompiledBaseline : return "kCompiledBaseline"; @@ -105,8 +107,8 @@ class OptimizingCompilerStats { case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified"; case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod"; case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches"; + case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode"; case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen"; - case kNotCompiledNonSequentialRegPair : return "kNotCompiledNonSequentialRegPair"; case kNotCompiledPathological : return "kNotCompiledPathological"; case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter"; case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction"; @@ -120,9 +122,12 @@ class OptimizingCompilerStats { case kRemovedCheckedCast: return "kRemovedCheckedCast"; case kRemovedDeadInstruction: return "kRemovedDeadInstruction"; case kRemovedNullCheck: return "kRemovedNullCheck"; - default: LOG(FATAL) << "invalid stat"; + + case kLastStat: break; // Invalid to print out. } - return ""; + LOG(FATAL) << "invalid stat " + << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); + UNREACHABLE(); } AtomicInteger compile_stats_[kLastStat]; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index a249aa9711..ca928ae0f2 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -86,16 +86,6 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire DCHECK(last_input != nullptr) << "Last input is not HLoadClass. It is " << last_input->DebugName(); - // The static call will initialize the class so there's no need for a clinit check if - // it's the first user. - // There is one special case where we still need the clinit check, when inlining. Because - // currently the callee is responsible for reporting parameters to the GC, the code - // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. - // Therefore we cannot allocate any object in that code, including loading a new class. - if (last_input == invoke->GetPrevious() && !invoke->IsInlined()) { - last_input->SetMustGenerateClinitCheck(false); - } - // Remove a load class instruction as last input of a static // invoke, which has been added (along with a clinit check, // removed by PrepareForRegisterAllocation::VisitClinitCheck @@ -104,10 +94,20 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire // stage (i.e., after inlining has been performed). invoke->RemoveLoadClassAsLastInput(); - // If the load class instruction is no longer used, remove it from - // the graph. - if (!last_input->HasUses() && !(last_input->MustGenerateClinitCheck() && invoke->IsInlined())) { - last_input->GetBlock()->RemoveInstruction(last_input); + // The static call will initialize the class so there's no need for a clinit check if + // it's the first user. + // There is one special case where we still need the clinit check, when inlining. Because + // currently the callee is responsible for reporting parameters to the GC, the code + // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC. + // Therefore we cannot allocate any object in that code, including loading a new class. + if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) { + last_input->SetMustGenerateClinitCheck(false); + + // If the load class instruction is no longer used, remove it from + // the graph. + if (!last_input->HasUses()) { + last_input->GetBlock()->RemoveInstruction(last_input); + } } } } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 4f1f45769d..a048c856c5 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -23,6 +23,30 @@ namespace art { +class RTPVisitor : public HGraphDelegateVisitor { + public: + RTPVisitor(HGraph* graph, StackHandleScopeCollection* handles) + : HGraphDelegateVisitor(graph), + handles_(handles) {} + + void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; + void VisitLoadClass(HLoadClass* load_class) OVERRIDE; + void VisitNewArray(HNewArray* instr) OVERRIDE; + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass, bool is_exact); + void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; + void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; + void VisitInvoke(HInvoke* instr) OVERRIDE; + void VisitArrayGet(HArrayGet* instr) OVERRIDE; + void UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact); + + private: + StackHandleScopeCollection* handles_; +}; + void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -35,23 +59,13 @@ void ReferenceTypePropagation::Run() { void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // TODO: handle other instructions that give type info - // (Call/array accesses) + // (array accesses) + RTPVisitor visitor(graph_, handles_); // Initialize exact types first for faster convergence. for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - // TODO: Make ReferenceTypePropagation a visitor or create a new one. - if (instr->IsNewInstance()) { - VisitNewInstance(instr->AsNewInstance()); - } else if (instr->IsLoadClass()) { - VisitLoadClass(instr->AsLoadClass()); - } else if (instr->IsNewArray()) { - VisitNewArray(instr->AsNewArray()); - } else if (instr->IsInstanceFieldGet()) { - VisitInstanceFieldGet(instr->AsInstanceFieldGet()); - } else if (instr->IsStaticFieldGet()) { - VisitStaticFieldGet(instr->AsStaticFieldGet()); - } + instr->Accept(&visitor); } // Handle Phis. @@ -166,35 +180,39 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { } } -void ReferenceTypePropagation::SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass) { +void RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, + mirror::Class* klass, + bool is_exact) { if (klass != nullptr) { ScopedObjectAccess soa(Thread::Current()); MutableHandle<mirror::Class> handle = handles_->NewHandle(klass); - instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, true)); + is_exact = is_exact || klass->IsFinal(); + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); } } -void ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr, - uint16_t type_idx, - const DexFile& dex_file) { +void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, + uint16_t type_idx, + const DexFile& dex_file, + bool is_exact) { DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(dex_file); // Get type from dex cache assuming it was populated by the verifier. - SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx)); + SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void ReferenceTypePropagation::VisitNewInstance(HNewInstance* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewInstance(HNewInstance* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::VisitNewArray(HNewArray* instr) { - UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile()); +void RTPVisitor::VisitNewArray(HNewArray* instr) { + UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } -void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, - const FieldInfo& info) { +void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, + const FieldInfo& info) { // The field index is unknown only during tests. if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { return; @@ -206,18 +224,18 @@ void ReferenceTypePropagation::UpdateFieldAccessTypeInfo(HInstruction* instr, ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), dex_cache); DCHECK(field != nullptr); mirror::Class* klass = field->GetType<false>(); - SetClassAsTypeInfo(instr, klass); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } -void ReferenceTypePropagation::VisitInstanceFieldGet(HInstanceFieldGet* instr) { +void RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitStaticFieldGet(HStaticFieldGet* instr) { +void RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* instr) { UpdateFieldAccessTypeInfo(instr, instr->GetFieldInfo()); } -void ReferenceTypePropagation::VisitLoadClass(HLoadClass* instr) { +void RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); mirror::DexCache* dex_cache = Runtime::Current()->GetClassLinker()->FindDexCache(instr->GetDexFile()); @@ -295,6 +313,34 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { return !previous_rti.IsEqual(instr->GetReferenceTypeInfo()); } +void RTPVisitor::VisitInvoke(HInvoke* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + mirror::DexCache* dex_cache = cl->FindDexCache(instr->GetDexFile()); + ArtMethod* method = dex_cache->GetResolvedMethod( + instr->GetDexMethodIndex(), cl->GetImagePointerSize()); + DCHECK(method != nullptr); + mirror::Class* klass = method->GetReturnType(false); + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); +} + +void RTPVisitor::VisitArrayGet(HArrayGet* instr) { + if (instr->GetType() != Primitive::kPrimNot) { + return; + } + + HInstruction* parent = instr->InputAt(0); + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> handle = parent->GetReferenceTypeInfo().GetTypeHandle(); + if (handle.GetReference() != nullptr && handle->IsObjectArrayClass()) { + SetClassAsTypeInfo(instr, handle->GetComponentType(), /* is_exact */ false); + } +} + void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo(); // Be sure that we don't go over the bounded type. diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 74e425fb3e..0d687d25cb 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,23 +40,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: - void VisitNewInstance(HNewInstance* new_instance); - void VisitLoadClass(HLoadClass* load_class); - void VisitNewArray(HNewArray* instr); void VisitPhi(HPhi* phi); void VisitBasicBlock(HBasicBlock* block); - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, mirror::Class* klass); - void UpdateBoundType(HBoundType* bound_type) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void UpdatePhi(HPhi* phi) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); void BoundTypeForIfInstanceOf(HBasicBlock* block); - void UpdateReferenceTypeInfo(HInstruction* instr, uint16_t type_idx, const DexFile& dex_file); - void VisitInstanceFieldGet(HInstanceFieldGet* instr); - void VisitStaticFieldGet(HStaticFieldGet* instr); - void ProcessWorklist(); void AddToWorklist(HInstruction* instr); void AddDependentInstructionsToWorklist(HInstruction* instr); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index c4612af393..2a86e60e14 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -184,22 +184,24 @@ void SsaBuilder::FixNullConstantType() { } HInstruction* left = equality_instr->InputAt(0); HInstruction* right = equality_instr->InputAt(1); - HInstruction* null_instr = nullptr; + HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && right->IsIntConstant()) { - null_instr = right; - } else if ((right->GetType() == Primitive::kPrimNot) && left->IsIntConstant()) { - null_instr = left; + if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + int_operand = right; + } else if ((right->GetType() == Primitive::kPrimNot) + && (left->GetType() == Primitive::kPrimInt)) { + int_operand = left; } else { continue; } // If we got here, we are comparing against a reference and the int constant // should be replaced with a null constant. - if (null_instr->IsIntConstant()) { - DCHECK_EQ(0, null_instr->AsIntConstant()->GetValue()); - equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), null_instr == right ? 1 : 0); - } + // Both type propagation and redundant phi elimination ensure `int_operand` + // can only be the 0 constant. + DCHECK(int_operand->IsIntConstant()); + DCHECK_EQ(0, int_operand->AsIntConstant()->GetValue()); + equality_instr->ReplaceInput(GetGraph()->GetNullConstant(), int_operand == right ? 1 : 0); } } } @@ -255,21 +257,18 @@ void SsaBuilder::BuildSsa() { PrimitiveTypePropagation type_propagation(GetGraph()); type_propagation.Run(); - // 5) Fix the type for null constants which are part of an equality comparison. - FixNullConstantType(); - - // 6) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This will be fixed during the type propagation but + // 5) When creating equivalent phis we copy the inputs of the original phi which + // may be improperly typed. This was fixed during the type propagation in 4) but // as a result we may end up with two equivalent phis with the same type for // the same dex register. This pass cleans them up. EquivalentPhisCleanup(); - // 7) Mark dead phis again. Step 4) may have introduced new phis. - // Step 6) might enable the death of new phis. + // 6) Mark dead phis again. Step 4) may have introduced new phis. + // Step 5) might enable the death of new phis. SsaDeadPhiElimination dead_phis(GetGraph()); dead_phis.MarkDeadPhis(); - // 8) Now that the graph is correctly typed, we can get rid of redundant phis. + // 7) Now that the graph is correctly typed, we can get rid of redundant phis. // Note that we cannot do this phase before type propagation, otherwise // we could get rid of phi equivalents, whose presence is a requirement for the // type propagation phase. Note that this is to satisfy statement (a) of the @@ -277,6 +276,13 @@ void SsaBuilder::BuildSsa() { SsaRedundantPhiElimination redundant_phi(GetGraph()); redundant_phi.Run(); + // 8) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + // 9) Make sure environments use the right phi "equivalent": a phi marked dead // can have a phi equivalent that is not dead. We must therefore update // all environment uses of the dead phi to use its equivalent. Note that there diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 42b9182d55..65610d54a6 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -49,7 +49,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, } dex_pc_max_ = std::max(dex_pc_max_, dex_pc); - native_pc_offset_max_ = std::max(native_pc_offset_max_, native_pc_offset); register_mask_max_ = std::max(register_mask_max_, register_mask); current_dex_register_ = 0; } @@ -128,16 +127,25 @@ void StackMapStream::EndInlineInfoEntry() { current_inline_info_ = InlineInfoEntry(); } +uint32_t StackMapStream::ComputeMaxNativePcOffset() const { + uint32_t max_native_pc_offset = 0u; + for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { + max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + } + return max_native_pc_offset; +} + size_t StackMapStream::PrepareForFillIn() { int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too. stack_mask_size_ = RoundUp(stack_mask_number_of_bits, kBitsPerByte) / kBitsPerByte; inline_info_size_ = ComputeInlineInfoSize(); dex_register_maps_size_ = ComputeDexRegisterMapsSize(); + uint32_t max_native_pc_offset = ComputeMaxNativePcOffset(); stack_map_encoding_ = StackMapEncoding::CreateFromSizes(stack_mask_size_, inline_info_size_, dex_register_maps_size_, dex_pc_max_, - native_pc_offset_max_, + max_native_pc_offset, register_mask_max_); stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 274d573350..550ed70e0f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -67,7 +67,6 @@ class StackMapStream : public ValueObject { inline_infos_(allocator, 2), stack_mask_max_(-1), dex_pc_max_(0), - native_pc_offset_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), @@ -126,6 +125,22 @@ class StackMapStream : public ValueObject { uint32_t num_dex_registers); void EndInlineInfoEntry(); + size_t GetNumberOfStackMaps() const { + return stack_maps_.Size(); + } + + const StackMapEntry& GetStackMap(size_t i) const { + DCHECK_LT(i, stack_maps_.Size()); + return stack_maps_.GetRawStorage()[i]; + } + + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + DCHECK_LT(i, stack_maps_.Size()); + stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + } + + uint32_t ComputeMaxNativePcOffset() const; + // Prepares the stream to fill in a memory region. Must be called before FillIn. // Returns the size (in bytes) needed to store this stream. size_t PrepareForFillIn(); @@ -163,7 +178,6 @@ class StackMapStream : public ValueObject { GrowableArray<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; - uint32_t native_pc_offset_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index cb51ed8fc8..facc6304e5 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -17,21 +17,21 @@ #include "trampoline_compiler.h" #include "jni_env_ext.h" -#include "utils/arm/assembler_arm.h" +#include "utils/arm/assembler_thumb2.h" #include "utils/arm64/assembler_arm64.h" #include "utils/mips/assembler_mips.h" #include "utils/mips64/assembler_mips64.h" #include "utils/x86/assembler_x86.h" #include "utils/x86_64/assembler_x86_64.h" -#define __ assembler-> +#define __ assembler. namespace art { namespace arm { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<ArmAssembler> assembler(static_cast<ArmAssembler*>(Assembler::Create(kThumb2))); + Thumb2Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (R0) in interpreter ABI. @@ -46,10 +46,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention } __ bkpt(0); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -58,7 +59,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace arm64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Arm64Assembler> assembler(static_cast<Arm64Assembler*>(Assembler::Create(kArm64))); + Arm64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (X0) in interpreter ABI. @@ -82,11 +83,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention break; } - assembler->EmitSlowPaths(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -95,7 +96,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<4> offset) { - std::unique_ptr<MipsAssembler> assembler(static_cast<MipsAssembler*>(Assembler::Create(kMips))); + MipsAssembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -112,10 +113,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -124,7 +126,7 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace mips64 { static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention abi, ThreadOffset<8> offset) { - std::unique_ptr<Mips64Assembler> assembler(static_cast<Mips64Assembler*>(Assembler::Create(kMips64))); + Mips64Assembler assembler; switch (abi) { case kInterpreterAbi: // Thread* is first argument (A0) in interpreter ABI. @@ -141,10 +143,11 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention __ Nop(); __ Break(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -152,16 +155,17 @@ static const std::vector<uint8_t>* CreateTrampoline(EntryPointCallingConvention namespace x86 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { - std::unique_ptr<X86Assembler> assembler(static_cast<X86Assembler*>(Assembler::Create(kX86))); + X86Assembler assembler; // All x86 trampolines call via the Thread* held in fs. __ fs()->jmp(Address::Absolute(offset)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } @@ -169,17 +173,17 @@ static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<4> offset) { namespace x86_64 { static const std::vector<uint8_t>* CreateTrampoline(ThreadOffset<8> offset) { - std::unique_ptr<x86_64::X86_64Assembler> - assembler(static_cast<x86_64::X86_64Assembler*>(Assembler::Create(kX86_64))); + x86_64::X86_64Assembler assembler; // All x86 trampolines call via the Thread* held in gs. __ gs()->jmp(x86_64::Address::Absolute(offset, true)); __ int3(); - size_t cs = assembler->CodeSize(); + __ FinalizeCode(); + size_t cs = __ CodeSize(); std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*entry_stub)[0], entry_stub->size()); - assembler->FinalizeInstructions(code); + __ FinalizeInstructions(code); return entry_stub.release(); } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 350efca3e2..f8ca48ef57 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_ARM_H_ +#include <type_traits> #include <vector> #include "base/bit_utils.h" @@ -33,14 +34,47 @@ namespace arm { class Arm32Assembler; class Thumb2Assembler; -// This class indicates that the label and its uses -// will fall into a range that is encodable in 16bits on thumb2. -class NearLabel : public Label { +// Assembler literal is a value embedded in code, retrieved using a PC-relative load. +class Literal { public: - NearLabel() {} + static constexpr size_t kMaxSize = 8; + + Literal(uint32_t size, const uint8_t* data) + : label_(), size_(size) { + DCHECK_LE(size, Literal::kMaxSize); + memcpy(data_, data, size); + } + + template <typename T> + T GetValue() const { + DCHECK_EQ(size_, sizeof(T)); + T value; + memcpy(&value, data_, sizeof(T)); + return value; + } + + uint32_t GetSize() const { + return size_; + } + + const uint8_t* GetData() const { + return data_; + } + + Label* GetLabel() { + return &label_; + } + + const Label* GetLabel() const { + return &label_; + } private: - DISALLOW_COPY_AND_ASSIGN(NearLabel); + Label label_; + const uint32_t size_; + uint8_t data_[kMaxSize]; + + DISALLOW_COPY_AND_ASSIGN(Literal); }; class ShifterOperand { @@ -529,9 +563,6 @@ class ArmAssembler : public Assembler { // Branch instructions. virtual void b(Label* label, Condition cond = AL) = 0; - virtual void b(NearLabel* label, Condition cond = AL) { - b(static_cast<Label*>(label), cond); - } virtual void bl(Label* label, Condition cond = AL) = 0; virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; @@ -541,9 +572,41 @@ class ArmAssembler : public Assembler { void Pad(uint32_t bytes); + // Adjust label position. + void AdjustLabelPosition(Label* label) { + DCHECK(label->IsBound()); + uint32_t old_position = static_cast<uint32_t>(label->Position()); + uint32_t new_position = GetAdjustedPosition(old_position); + label->Reinitialize(); + DCHECK_GE(static_cast<int>(new_position), 0); + label->BindTo(static_cast<int>(new_position)); + } + + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + virtual uint32_t GetAdjustedPosition(uint32_t old_position) = 0; + // Macros. // Most of these are pure virtual as they need to be implemented per instruction set. + // Create a new literal with a given value. + // NOTE: Force the template parameter to be explicitly specified. In the absence of + // std::omit_from_type_deduction<T> or std::identity<T>, use std::decay<T>. + template <typename T> + Literal* NewLiteral(typename std::decay<T>::type value) { + static_assert(std::is_integral<T>::value, "T must be an integral type."); + return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value)); + } + + // Create a new literal with the given data. + virtual Literal* NewLiteral(size_t size, const uint8_t* data) = 0; + + // Load literal. + virtual void LoadLiteral(Register rt, Literal* literal) = 0; + virtual void LoadLiteral(Register rt, Register rt2, Literal* literal) = 0; + virtual void LoadLiteral(SRegister sd, Literal* literal) = 0; + virtual void LoadLiteral(DRegister dd, Literal* literal) = 0; + // Add signed constant value to rd. May clobber IP. virtual void AddConstant(Register rd, int32_t value, Condition cond = AL) = 0; virtual void AddConstant(Register rd, Register rn, int32_t value, @@ -667,9 +730,6 @@ class ArmAssembler : public Assembler { virtual void Bind(Label* label) = 0; virtual void CompareAndBranchIfZero(Register r, Label* label) = 0; - virtual void CompareAndBranchIfZero(Register r, NearLabel* label) { - CompareAndBranchIfZero(r, static_cast<Label*>(label)); - } virtual void CompareAndBranchIfNonZero(Register r, Label* label) = 0; // diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index cdf62bf885..6e60ddc260 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1354,6 +1354,41 @@ int Arm32Assembler::DecodeBranchOffset(int32_t inst) { } +uint32_t Arm32Assembler::GetAdjustedPosition(uint32_t old_position ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +Literal* Arm32Assembler::NewLiteral(size_t size ATTRIBUTE_UNUSED, + const uint8_t* data ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(Register rt ATTRIBUTE_UNUSED, Register rt2 ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(SRegister sd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + +void Arm32Assembler::LoadLiteral(DRegister dd ATTRIBUTE_UNUSED, + Literal* literal ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unimplemented."; + UNREACHABLE(); +} + void Arm32Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); } diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index 3164623fd9..1c38eec12c 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -238,7 +238,16 @@ class Arm32Assembler FINAL : public ArmAssembler { // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 26cb6c3739..f9e1ac672e 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -25,6 +25,309 @@ namespace art { namespace arm { +void Thumb2Assembler::BindLabel(Label* label, uint32_t bound_pc) { + CHECK(!label->IsBound()); + + while (label->IsLinked()) { + FixupId fixup_id = label->Position(); // The id for linked Fixup. + Fixup* fixup = GetFixup(fixup_id); // Get the Fixup at this id. + fixup->Resolve(bound_pc); // Fixup can be resolved now. + // Add this fixup as a dependency of all later fixups. + for (FixupId id = fixup_id + 1u, end = fixups_.size(); id != end; ++id) { + GetFixup(id)->AddDependent(fixup_id); + } + uint32_t fixup_location = fixup->GetLocation(); + uint16_t next = buffer_.Load<uint16_t>(fixup_location); // Get next in chain. + buffer_.Store<int16_t>(fixup_location, 0); + label->position_ = next; // Move to next. + } + label->BindTo(bound_pc); +} + +void Thumb2Assembler::BindLiterals() { + // We don't add the padding here, that's done only after adjusting the Fixup sizes. + uint32_t code_size = buffer_.Size(); + for (Literal& lit : literals_) { + Label* label = lit.GetLabel(); + BindLabel(label, code_size); + code_size += lit.GetSize(); + } +} + +void Thumb2Assembler::AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate) { + uint32_t adjustment = fixup->AdjustSizeIfNeeded(*current_code_size); + if (adjustment != 0u) { + *current_code_size += adjustment; + for (FixupId dependent_id : fixup->Dependents()) { + Fixup* dependent = GetFixup(dependent_id); + dependent->IncreaseAdjustment(adjustment); + if (buffer_.Load<int16_t>(dependent->GetLocation()) == 0) { + buffer_.Store<int16_t>(dependent->GetLocation(), 1); + fixups_to_recalculate->push_back(dependent_id); + } + } + } +} + +uint32_t Thumb2Assembler::AdjustFixups() { + uint32_t current_code_size = buffer_.Size(); + std::deque<FixupId> fixups_to_recalculate; + if (kIsDebugBuild) { + // We will use the placeholders in the buffer_ to mark whether the fixup has + // been added to the fixups_to_recalculate. Make sure we start with zeros. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + for (Fixup& fixup : fixups_) { + AdjustFixupIfNeeded(&fixup, ¤t_code_size, &fixups_to_recalculate); + } + while (!fixups_to_recalculate.empty()) { + // Pop the fixup. + FixupId fixup_id = fixups_to_recalculate.front(); + fixups_to_recalculate.pop_front(); + Fixup* fixup = GetFixup(fixup_id); + DCHECK_NE(buffer_.Load<int16_t>(fixup->GetLocation()), 0); + buffer_.Store<int16_t>(fixup->GetLocation(), 0); + // See if it needs adjustment. + AdjustFixupIfNeeded(fixup, ¤t_code_size, &fixups_to_recalculate); + } + if (kIsDebugBuild) { + // Check that no fixup is marked as being in fixups_to_recalculate anymore. + for (Fixup& fixup : fixups_) { + CHECK_EQ(buffer_.Load<int16_t>(fixup.GetLocation()), 0); + } + } + + // Adjust literal pool labels for padding. + DCHECK_EQ(current_code_size & 1u, 0u); + uint32_t literals_adjustment = current_code_size + (current_code_size & 2) - buffer_.Size(); + if (literals_adjustment != 0u) { + for (Literal& literal : literals_) { + Label* label = literal.GetLabel(); + DCHECK(label->IsBound()); + int old_position = label->Position(); + label->Reinitialize(); + label->BindTo(old_position + literals_adjustment); + } + } + + return current_code_size; +} + +void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { + // Move non-fixup code to its final place and emit fixups. + // Process fixups in reverse order so that we don't repeatedly move the same data. + size_t src_end = buffer_.Size(); + size_t dest_end = adjusted_code_size; + buffer_.Resize(dest_end); + DCHECK_GE(dest_end, src_end); + for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { + Fixup* fixup = &*i; + if (fixup->GetOriginalSize() == fixup->GetSize()) { + // The size of this Fixup didn't change. To avoid moving the data + // in small chunks, emit the code to its original position. + fixup->Emit(&buffer_, adjusted_code_size); + fixup->Finalize(dest_end - src_end); + } else { + // Move the data between the end of the fixup and src_end to its final location. + size_t old_fixup_location = fixup->GetLocation(); + size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); + size_t data_size = src_end - src_begin; + size_t dest_begin = dest_end - data_size; + buffer_.Move(dest_begin, src_begin, data_size); + src_end = old_fixup_location; + dest_end = dest_begin - fixup->GetSizeInBytes(); + // Finalize the Fixup and emit the data to the new location. + fixup->Finalize(dest_end - src_end); + fixup->Emit(&buffer_, adjusted_code_size); + } + } + CHECK_EQ(src_end, dest_end); +} + +void Thumb2Assembler::EmitLiterals() { + if (!literals_.empty()) { + // Load literal instructions (LDR, LDRD, VLDR) require 4-byte alignment. + // We don't support byte and half-word literals. + uint32_t code_size = buffer_.Size(); + DCHECK_EQ(code_size & 1u, 0u); + if ((code_size & 2u) != 0u) { + Emit16(0); + } + for (Literal& literal : literals_) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + DCHECK_EQ(static_cast<size_t>(literal.GetLabel()->Position()), buffer_.Size()); + DCHECK(literal.GetSize() == 4u || literal.GetSize() == 8u); + for (size_t i = 0, size = literal.GetSize(); i != size; ++i) { + buffer_.Emit<uint8_t>(literal.GetData()[i]); + } + } + } +} + +inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int16_t encoding = B15 | B14; + if (cond != AL) { + DCHECK(IsInt<9>(offset)); + encoding |= B12 | (static_cast<int32_t>(cond) << 8) | ((offset >> 1) & 0xff); + } else { + DCHECK(IsInt<12>(offset)); + encoding |= B13 | ((offset >> 1) & 0x7ff); + } + return encoding; +} + +inline int32_t Thumb2Assembler::BEncoding32(int32_t offset, Condition cond) { + DCHECK_EQ(offset & 1, 0); + int32_t s = (offset >> 31) & 1; // Sign bit. + int32_t encoding = B31 | B30 | B29 | B28 | B15 | + (s << 26) | // Sign bit goes to bit 26. + ((offset >> 1) & 0x7ff); // imm11 goes to bits 0-10. + if (cond != AL) { + DCHECK(IsInt<21>(offset)); + // Encode cond, move imm6 from bits 12-17 to bits 16-21 and move J1 and J2. + encoding |= (static_cast<int32_t>(cond) << 22) | ((offset & 0x3f000) << (16 - 12)) | + ((offset & (1 << 19)) >> (19 - 13)) | // Extract J1 from bit 19 to bit 13. + ((offset & (1 << 18)) >> (18 - 11)); // Extract J2 from bit 18 to bit 11. + } else { + DCHECK(IsInt<25>(offset)); + int32_t j1 = ((offset >> 23) ^ s ^ 1) & 1; // Calculate J1 from I1 extracted from bit 23. + int32_t j2 = ((offset >> 22)^ s ^ 1) & 1; // Calculate J2 from I2 extracted from bit 22. + // Move imm10 from bits 12-21 to bits 16-25 and add J1 and J2. + encoding |= B12 | ((offset & 0x3ff000) << (16 - 12)) | + (j1 << 13) | (j2 << 11); + } + return encoding; +} + +inline int16_t Thumb2Assembler::CbxzEncoding16(Register rn, int32_t offset, Condition cond) { + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 1, 0); + DCHECK(IsUint<7>(offset)); + DCHECK(cond == EQ || cond == NE); + return B15 | B13 | B12 | B8 | (cond == NE ? B11 : 0) | static_cast<int32_t>(rn) | + ((offset & 0x3e) << (3 - 1)) | // Move imm5 from bits 1-5 to bits 3-7. + ((offset & 0x40) << (9 - 6)); // Move i from bit 6 to bit 11 +} + +inline int16_t Thumb2Assembler::CmpRnImm8Encoding16(Register rn, int32_t value) { + DCHECK(!IsHighRegister(rn)); + DCHECK(IsUint<8>(value)); + return B13 | B11 | (rn << 8) | value; +} + +inline int16_t Thumb2Assembler::AddRdnRmEncoding16(Register rdn, Register rm) { + // The high bit of rn is moved across 4-bit rm. + return B14 | B10 | (static_cast<int32_t>(rm) << 3) | + (static_cast<int32_t>(rdn) & 7) | ((static_cast<int32_t>(rdn) & 8) << 4); +} + +inline int32_t Thumb2Assembler::MovwEncoding32(Register rd, int32_t value) { + DCHECK(IsUint<16>(value)); + return B31 | B30 | B29 | B28 | B25 | B22 | + (static_cast<int32_t>(rd) << 8) | + ((value & 0xf000) << (16 - 12)) | // Move imm4 from bits 12-15 to bits 16-19. + ((value & 0x0800) << (26 - 11)) | // Move i from bit 11 to bit 26. + ((value & 0x0700) << (12 - 8)) | // Move imm3 from bits 8-10 to bits 12-14. + (value & 0xff); // Keep imm8 in bits 0-7. +} + +inline int32_t Thumb2Assembler::MovtEncoding32(Register rd, int32_t value) { + DCHECK_EQ(value & 0xffff, 0); + int32_t movw_encoding = MovwEncoding32(rd, (value >> 16) & 0xffff); + return movw_encoding | B25 | B23; +} + +inline int32_t Thumb2Assembler::MovModImmEncoding32(Register rd, int32_t value) { + uint32_t mod_imm = ModifiedImmediate(value); + DCHECK_NE(mod_imm, kInvalidModifiedImmediate); + return B31 | B30 | B29 | B28 | B22 | B19 | B18 | B17 | B16 | + (static_cast<int32_t>(rd) << 8) | static_cast<int32_t>(mod_imm); +} + +inline int16_t Thumb2Assembler::LdrLitEncoding16(Register rt, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<10>(offset)); + return B14 | B11 | (static_cast<int32_t>(rt) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::LdrLitEncoding32(Register rt, int32_t offset) { + // NOTE: We don't support negative offset, i.e. U=0 (B23). + return LdrRtRnImm12Encoding(rt, PC, offset); +} + +inline int32_t Thumb2Assembler::LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | + B24 /* P = 1 */ | B23 /* U = 1 */ | B22 | 0 /* W = 0 */ | B20 | + (static_cast<int32_t>(rn) << 16) | (static_cast<int32_t>(rt) << 12) | + (static_cast<int32_t>(rt2) << 8) | (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrsEncoding32(SRegister sd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | + (static_cast<int32_t>(rn) << 16) | + ((static_cast<int32_t>(sd) & 0x01) << (22 - 0)) | // Move D from bit 0 to bit 22. + ((static_cast<int32_t>(sd) & 0x1e) << (12 - 1)) | // Move Vd from bits 1-4 to bits 12-15. + (offset >> 2); +} + +inline int32_t Thumb2Assembler::VldrdEncoding32(DRegister dd, Register rn, int32_t offset) { + DCHECK_EQ(offset & 3, 0); + CHECK(IsUint<10>(offset)); + return B31 | B30 | B29 | B27 | B26 | B24 | + B23 /* U = 1 */ | B20 | B11 | B9 | B8 | + (rn << 16) | + ((static_cast<int32_t>(dd) & 0x10) << (22 - 4)) | // Move D from bit 4 to bit 22. + ((static_cast<int32_t>(dd) & 0x0f) << (12 - 0)) | // Move Vd from bits 0-3 to bits 12-15. + (offset >> 2); +} + +inline int16_t Thumb2Assembler::LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset) { + DCHECK(!IsHighRegister(rt)); + DCHECK(!IsHighRegister(rn)); + DCHECK_EQ(offset & 3, 0); + DCHECK(IsUint<7>(offset)); + return B14 | B13 | B11 | + (static_cast<int32_t>(rn) << 3) | static_cast<int32_t>(rt) | + (offset << (6 - 2)); // Move imm5 from bits 2-6 to bits 6-10. +} + +int32_t Thumb2Assembler::Fixup::LoadWideOrFpEncoding(Register rbase, int32_t offset) const { + switch (type_) { + case kLoadLiteralWide: + return LdrdEncoding32(rn_, rt2_, rbase, offset); + case kLoadFPLiteralSingle: + return VldrsEncoding32(sd_, rbase, offset); + case kLoadFPLiteralDouble: + return VldrdEncoding32(dd_, rbase, offset); + default: + LOG(FATAL) << "Unexpected type: " << static_cast<int>(type_); + UNREACHABLE(); + } +} + +inline int32_t Thumb2Assembler::LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset) { + DCHECK(IsUint<12>(offset)); + return B31 | B30 | B29 | B28 | B27 | B23 | B22 | B20 | (rn << 16) | (rt << 12) | offset; +} + +void Thumb2Assembler::FinalizeCode() { + ArmAssembler::FinalizeCode(); + BindLiterals(); + uint32_t adjusted_code_size = AdjustFixups(); + EmitFixups(adjusted_code_size); + EmitLiterals(); +} + bool Thumb2Assembler::ShifterOperandCanHold(Register rd ATTRIBUTE_UNUSED, Register rn ATTRIBUTE_UNUSED, Opcode opcode, @@ -671,17 +974,11 @@ void Thumb2Assembler::vcmpdz(DRegister dd, Condition cond) { EmitVFPddd(cond, B23 | B21 | B20 | B18 | B16 | B6, dd, D0, D0); } - void Thumb2Assembler::b(Label* label, Condition cond) { EmitBranch(cond, label, false, false); } -void Thumb2Assembler::b(NearLabel* label, Condition cond) { - EmitBranch(cond, label, false, false, /* is_near */ true); -} - - void Thumb2Assembler::bl(Label* label, Condition cond) { CheckCondition(cond); EmitBranch(cond, label, true, false); @@ -1308,80 +1605,359 @@ void Thumb2Assembler::EmitShift(Register rd, Register rn, Shift shift, Register } } +inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { + switch (size) { + case kBranch16Bit: + return 2u; + case kBranch32Bit: + return 4u; + + case kCbxz16Bit: + return 2u; + case kCbxz32Bit: + return 4u; + case kCbxz48Bit: + return 6u; + + case kLiteral1KiB: + return 2u; + case kLiteral4KiB: + return 4u; + case kLiteral64KiB: + return 8u; + case kLiteral1MiB: + return 10u; + case kLiteralFar: + return 14u; + + case kLongOrFPLiteral1KiB: + return 4u; + case kLongOrFPLiteral256KiB: + return 10u; + case kLongOrFPLiteralFar: + return 14u; + } + LOG(FATAL) << "Unexpected size: " << static_cast<int>(size); + UNREACHABLE(); +} + +inline uint32_t Thumb2Assembler::Fixup::GetOriginalSizeInBytes() const { + return SizeInBytes(original_size_); +} + +inline uint32_t Thumb2Assembler::Fixup::GetSizeInBytes() const { + return SizeInBytes(size_); +} + +inline size_t Thumb2Assembler::Fixup::LiteralPoolPaddingSize(uint32_t current_code_size) { + // The code size must be a multiple of 2. + DCHECK_EQ(current_code_size & 1u, 0u); + // If it isn't a multiple of 4, we need to add a 2-byte padding before the literal pool. + return current_code_size & 2; +} + +inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) const { + static constexpr int32_t int32_min = std::numeric_limits<int32_t>::min(); + static constexpr int32_t int32_max = std::numeric_limits<int32_t>::max(); + DCHECK_LE(target_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(location_, static_cast<uint32_t>(int32_max)); + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max)); + int32_t diff = static_cast<int32_t>(target_) - static_cast<int32_t>(location_); + if (target_ > location_) { + DCHECK_LE(adjustment_, static_cast<uint32_t>(int32_max - diff)); + diff += static_cast<int32_t>(adjustment_); + } else { + DCHECK_LE(int32_min + static_cast<int32_t>(adjustment_), diff); + diff -= static_cast<int32_t>(adjustment_); + } + // The default PC adjustment for Thumb2 is 4 bytes. + DCHECK_GE(diff, int32_min + 4); + diff -= 4; + // Add additional adjustment for instructions preceding the PC usage, padding + // before the literal pool and rounding down the PC for literal loads. + switch (GetSize()) { + case kBranch16Bit: + case kBranch32Bit: + break; + case kCbxz16Bit: + break; + case kCbxz32Bit: + case kCbxz48Bit: + DCHECK_GE(diff, int32_min + 2); + diff -= 2; // Extra CMP Rn, #0, 16-bit. + break; -void Thumb2Assembler::Branch::Emit(AssemblerBuffer* buffer) const { - bool link = type_ == kUnconditionalLinkX || type_ == kUnconditionalLink; - bool x = type_ == kUnconditionalX || type_ == kUnconditionalLinkX; - int32_t offset = target_ - location_; + case kLiteral1KiB: + case kLiteral4KiB: + case kLongOrFPLiteral1KiB: + DCHECK(diff >= 0 || (GetSize() == kLiteral1KiB && diff == -2)); + diff += LiteralPoolPaddingSize(current_code_size); + // Load literal instructions round down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. Since we already adjusted for the target + // being aligned, current PC alignment can be inferred from diff. + DCHECK_EQ(diff & 1, 0); + diff = diff + (diff & 2); + DCHECK_GE(diff, 0); + break; + case kLiteral1MiB: + case kLiteral64KiB: + case kLongOrFPLiteral256KiB: + DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 4; // One extra 32-bit MOV. + diff += LiteralPoolPaddingSize(current_code_size); + break; + case kLiteralFar: + case kLongOrFPLiteralFar: + DCHECK_GE(diff, 8); // The target must be at least 4 bytes after the ADD rX, PC. + diff -= 8; // Extra MOVW+MOVT; both 32-bit. + diff += LiteralPoolPaddingSize(current_code_size); + break; + } + return diff; +} - if (size_ == k32Bit) { - int32_t encoding = B31 | B30 | B29 | B28 | B15; - if (link) { - // BL or BLX immediate. - encoding |= B14; - if (!x) { - encoding |= B12; - } else { - // Bottom bit of offset must be 0. - CHECK_EQ((offset & 1), 0); +inline size_t Thumb2Assembler::Fixup::IncreaseSize(Size new_size) { + DCHECK_NE(target_, kUnresolved); + Size old_size = size_; + size_ = new_size; + DCHECK_GT(SizeInBytes(new_size), SizeInBytes(old_size)); + size_t adjustment = SizeInBytes(new_size) - SizeInBytes(old_size); + if (target_ > location_) { + adjustment_ += adjustment; + } + return adjustment; +} + +uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) { + uint32_t old_code_size = current_code_size; + switch (GetSize()) { + case kBranch16Bit: + if (IsInt(cond_ != AL ? 9 : 12, GetOffset(current_code_size))) { + break; } - } else { - if (x) { - LOG(FATAL) << "Invalid use of BX"; - UNREACHABLE(); - } else { - if (cond_ == AL) { - // Can use the T4 encoding allowing a 24 bit offset. - if (!x) { - encoding |= B12; - } - } else { - // Must be T3 encoding with a 20 bit offset. - encoding |= cond_ << 22; - } + current_code_size += IncreaseSize(kBranch32Bit); + FALLTHROUGH_INTENDED; + case kBranch32Bit: + // We don't support conditional branches beyond +-1MiB + // or unconditional branches beyond +-16MiB. + break; + + case kCbxz16Bit: + if (IsUint<7>(GetOffset(current_code_size))) { + break; } - } - encoding = Thumb2Assembler::EncodeBranchOffset(offset, encoding); - buffer->Store<int16_t>(location_, static_cast<int16_t>(encoding >> 16)); - buffer->Store<int16_t>(location_+2, static_cast<int16_t>(encoding & 0xffff)); - } else { - if (IsCompareAndBranch()) { - offset -= 4; - uint16_t i = (offset >> 6) & 1; - uint16_t imm5 = (offset >> 1) & 31U /* 0b11111 */; - int16_t encoding = B15 | B13 | B12 | - (type_ == kCompareAndBranchNonZero ? B11 : 0) | - static_cast<uint32_t>(rn_) | - B8 | - i << 9 | - imm5 << 3; + current_code_size += IncreaseSize(kCbxz32Bit); + FALLTHROUGH_INTENDED; + case kCbxz32Bit: + if (IsInt<9>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kCbxz48Bit); + FALLTHROUGH_INTENDED; + case kCbxz48Bit: + // We don't support conditional branches beyond +-1MiB. + break; + + case kLiteral1KiB: + DCHECK(!IsHighRegister(rn_)); + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral4KiB); + FALLTHROUGH_INTENDED; + case kLiteral4KiB: + if (IsUint<12>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral64KiB); + FALLTHROUGH_INTENDED; + case kLiteral64KiB: + // Can't handle high register which we can encounter by fall-through from kLiteral4KiB. + if (!IsHighRegister(rn_) && IsUint<16>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteral1MiB); + FALLTHROUGH_INTENDED; + case kLiteral1MiB: + if (IsUint<20>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLiteralFar); + FALLTHROUGH_INTENDED; + case kLiteralFar: + // This encoding can reach any target. + break; + + case kLongOrFPLiteral1KiB: + if (IsUint<10>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteral256KiB); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteral256KiB: + if (IsUint<18>(GetOffset(current_code_size))) { + break; + } + current_code_size += IncreaseSize(kLongOrFPLiteralFar); + FALLTHROUGH_INTENDED; + case kLongOrFPLiteralFar: + // This encoding can reach any target. + break; + } + return current_code_size - old_code_size; +} + +void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { + switch (GetSize()) { + case kBranch16Bit: { + DCHECK(type_ == kUnconditional || type_ == kConditional); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int16_t encoding = BEncoding16(GetOffset(code_size), cond_); buffer->Store<int16_t>(location_, encoding); - } else { - offset -= 4; // Account for PC offset. - int16_t encoding; - // 16 bit. - if (cond_ == AL) { - encoding = B15 | B14 | B13 | - ((offset >> 1) & 0x7ff); - } else { - encoding = B15 | B14 | B12 | - cond_ << 8 | ((offset >> 1) & 0xff); + break; + } + case kBranch32Bit: { + DCHECK(type_ == kConditional || type_ == kUnconditional || + type_ == kUnconditionalLink || type_ == kUnconditionalLinkX); + DCHECK_EQ(type_ == kConditional, cond_ != AL); + int32_t encoding = BEncoding32(GetOffset(code_size), cond_); + if (type_ == kUnconditionalLink) { + DCHECK_NE(encoding & B12, 0); + encoding |= B14; + } else if (type_ == kUnconditionalLinkX) { + DCHECK_NE(encoding & B12, 0); + encoding ^= B14 | B12; } + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + + case kCbxz16Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, encoding); + break; + } + case kCbxz32Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2, b_encoding); + break; + } + case kCbxz48Bit: { + DCHECK(type_ == kCompareAndBranchXZero); + DCHECK(cond_ == EQ || cond_ == NE); + int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); + int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); + buffer->Store<int16_t>(location_, cmp_encoding); + buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); + buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kLiteral1KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); buffer->Store<int16_t>(location_, encoding); + break; + } + case kLiteral4KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. + int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLiteral64KiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int16_t ldr_encoding = LdrRtRnImm5Encoding16(rn_, rn_, 0); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding); + break; + } + case kLiteral1MiB: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLiteralFar: { + DCHECK(type_ == kLoadLiteralNarrow); + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); + int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + + case kLongOrFPLiteral1KiB: { + int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. + buffer->Store<int16_t>(location_, encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + break; + } + case kLongOrFPLiteral256KiB: { + int32_t offset = GetOffset(code_size); + int32_t mov_encoding = MovModImmEncoding32(IP, offset & ~0x3ff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, offset & 0x3ff); // DCHECKs type_. + buffer->Store<int16_t>(location_, mov_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; + } + case kLongOrFPLiteralFar: { + int32_t offset = GetOffset(code_size); + int32_t movw_encoding = MovwEncoding32(IP, offset & 0xffff); + int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); + int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); + int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. + buffer->Store<int16_t>(location_, movw_encoding >> 16); + buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + break; } } } - uint16_t Thumb2Assembler::EmitCompareAndBranch(Register rn, uint16_t prev, bool n) { CHECK(IsLowRegister(rn)); uint32_t location = buffer_.Size(); // This is always unresolved as it must be a forward branch. Emit16(prev); // Previous link. - return AddBranch(n ? Branch::kCompareAndBranchNonZero : Branch::kCompareAndBranchZero, - location, rn); + return AddFixup(Fixup::CompareAndBranch(location, rn, n ? NE : EQ)); } @@ -1619,47 +2195,53 @@ void Thumb2Assembler::EmitMultiMemOp(Condition cond, } } - -void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near) { +void Thumb2Assembler::EmitBranch(Condition cond, Label* label, bool link, bool x) { + bool use32bit = IsForced32Bit() || !CanRelocateBranches(); uint32_t pc = buffer_.Size(); - Branch::Type branch_type; + Fixup::Type branch_type; if (cond == AL) { if (link) { + use32bit = true; if (x) { - branch_type = Branch::kUnconditionalLinkX; // BLX. + branch_type = Fixup::kUnconditionalLinkX; // BLX. } else { - branch_type = Branch::kUnconditionalLink; // BX. + branch_type = Fixup::kUnconditionalLink; // BX. } } else { - branch_type = Branch::kUnconditional; // B. + branch_type = Fixup::kUnconditional; // B. } } else { - branch_type = Branch::kConditional; // B<cond>. + branch_type = Fixup::kConditional; // B<cond>. } + Fixup::Size size = use32bit ? Fixup::kBranch32Bit : Fixup::kBranch16Bit; + FixupId branch_id = AddFixup(Fixup::Branch(pc, branch_type, size, cond)); + if (label->IsBound()) { - Branch::Size size = AddBranch(branch_type, pc, label->Position(), cond); // Resolved branch. - - // The branch is to a bound label which means that it's a backwards branch. We know the - // current size of it so we can emit the appropriate space. Note that if it's a 16 bit - // branch the size may change if it so happens that other branches change size that change - // the distance to the target and that distance puts this branch over the limit for 16 bits. - if (size == Branch::k16Bit) { - Emit16(0); // Space for a 16 bit branch. - } else { - Emit32(0); // Space for a 32 bit branch. + // The branch is to a bound label which means that it's a backwards branch. + // Record this branch as a dependency of all Fixups between the label and the branch. + GetFixup(branch_id)->Resolve(label->Position()); + for (FixupId fixup_id = branch_id; fixup_id != 0u; ) { + --fixup_id; + Fixup* fixup = GetFixup(fixup_id); + DCHECK_GE(label->Position(), 0); + if (fixup->GetLocation() < static_cast<uint32_t>(label->Position())) { + break; + } + fixup->AddDependent(branch_id); } + Emit16(0); } else { - // Branch is to an unbound label. Emit space for it. - uint16_t branch_id = AddBranch(branch_type, pc, cond, is_near); // Unresolved branch. - if (force_32bit_ || (!CanRelocateBranches() && !is_near)) { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - Emit16(0); // another 16 bits. - } else { - Emit16(static_cast<uint16_t>(label->position_)); // Emit current label link. - } - label->LinkTo(branch_id); // Link to the branch ID. + // Branch target is an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); } + + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); } @@ -2274,82 +2856,8 @@ void Thumb2Assembler::Mov(Register rd, Register rm, Condition cond) { } -// A branch has changed size. Make a hole for it. -void Thumb2Assembler::MakeHoleForBranch(uint32_t location, uint32_t delta) { - // Move the contents of the buffer using: Move(newposition, oldposition) - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - buffer_.Move(location + delta, location); -} - - void Thumb2Assembler::Bind(Label* label) { - CHECK(!label->IsBound()); - uint32_t bound_pc = buffer_.Size(); - std::vector<Branch*> changed_branches; - - while (label->IsLinked()) { - uint16_t position = label->Position(); // Branch id for linked branch. - Branch* branch = GetBranch(position); // Get the branch at this id. - bool changed = branch->Resolve(bound_pc); // Branch can be resolved now. - uint32_t branch_location = branch->GetLocation(); - uint16_t next = buffer_.Load<uint16_t>(branch_location); // Get next in chain. - if (changed) { - DCHECK(CanRelocateBranches()); - MakeHoleForBranch(branch->GetLocation(), 2); - if (branch->IsCompareAndBranch()) { - // A cbz/cbnz instruction has changed size. There is no valid encoding for - // a 32 bit cbz/cbnz so we need to change this to an instruction pair: - // cmp rn, #0 - // b<eq|ne> target - bool n = branch->GetType() == Branch::kCompareAndBranchNonZero; - Condition cond = n ? NE : EQ; - branch->Move(2); // Move the branch forward by 2 bytes. - branch->ResetTypeAndCondition(Branch::kConditional, cond); - branch->ResetSize(Branch::k16Bit); - - // Now add a compare instruction in the place the branch was. - buffer_.Store<int16_t>(branch_location, - B13 | B11 | static_cast<int16_t>(branch->GetRegister()) << 8); - - // Since have moved made a hole in the code we need to reload the - // current pc. - bound_pc = buffer_.Size(); - - // Now resolve the newly added branch. - changed = branch->Resolve(bound_pc); - if (changed) { - MakeHoleForBranch(branch->GetLocation(), 2); - changed_branches.push_back(branch); - } - } else { - changed_branches.push_back(branch); - } - } - label->position_ = next; // Move to next. - } - label->BindTo(bound_pc); - - // Now relocate any changed branches. Do this until there are no more changes. - std::vector<Branch*> branches_to_process = changed_branches; - while (branches_to_process.size() != 0) { - changed_branches.clear(); - for (auto& changed_branch : branches_to_process) { - for (auto& branch : branches_) { - bool changed = branch->Relocate(changed_branch->GetLocation(), 2); - if (changed) { - changed_branches.push_back(branch); - } - } - branches_to_process = changed_branches; - } - } -} - - -void Thumb2Assembler::EmitBranches() { - for (auto& branch : branches_) { - branch->Emit(&buffer_); - } + BindLabel(label, buffer_.Size()); } @@ -2487,6 +2995,85 @@ int Thumb2Assembler::DecodeBranchOffset(int32_t instr) { return imm32; } +uint32_t Thumb2Assembler::GetAdjustedPosition(uint32_t old_position) { + // We can reconstruct the adjustment by going through all the fixups from the beginning + // up to the old_position. Since we expect AdjustedPosition() to be called in a loop + // with increasing old_position, we can use the data from last AdjustedPosition() to + // continue where we left off and the whole loop should be O(m+n) where m is the number + // of positions to adjust and n is the number of fixups. + if (old_position < last_old_position_) { + last_position_adjustment_ = 0u; + last_old_position_ = 0u; + last_fixup_id_ = 0u; + } + while (last_fixup_id_ != fixups_.size()) { + Fixup* fixup = GetFixup(last_fixup_id_); + if (fixup->GetLocation() >= old_position + last_position_adjustment_) { + break; + } + if (fixup->GetSize() != fixup->GetOriginalSize()) { + last_position_adjustment_ += fixup->GetSizeInBytes() - fixup->GetOriginalSizeInBytes(); + } + ++last_fixup_id_; + } + last_old_position_ = old_position; + return old_position + last_position_adjustment_; +} + +Literal* Thumb2Assembler::NewLiteral(size_t size, const uint8_t* data) { + DCHECK(size == 4u || size == 8u) << size; + literals_.emplace_back(size, data); + return &literals_.back(); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + bool use32bit = IsForced32Bit() || IsHighRegister(rt); + uint32_t location = buffer_.Size(); + Fixup::Size size = use32bit ? Fixup::kLiteral4KiB : Fixup::kLiteral1KiB; + FixupId fixup_id = AddFixup(Fixup::LoadNarrowLiteral(location, rt, size)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + if (use32bit) { + Emit16(0); + } + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(Register rt, Register rt2, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = + AddFixup(Fixup::LoadWideLiteral(location, rt, rt2, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(SRegister sd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 4u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadSingleLiteral(location, sd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} + +void Thumb2Assembler::LoadLiteral(DRegister dd, Literal* literal) { + DCHECK_EQ(literal->GetSize(), 8u); + DCHECK(!literal->GetLabel()->IsBound()); + uint32_t location = buffer_.Size(); + FixupId fixup_id = AddFixup(Fixup::LoadDoubleLiteral(location, dd, Fixup::kLongOrFPLiteral1KiB)); + Emit16(static_cast<uint16_t>(literal->GetLabel()->position_)); + literal->GetLabel()->LinkTo(fixup_id); + Emit16(0); + DCHECK_EQ(location + GetFixup(fixup_id)->GetSizeInBytes(), buffer_.Size()); +} void Thumb2Assembler::AddConstant(Register rd, int32_t value, Condition cond) { AddConstant(rd, rd, value, cond); @@ -2763,16 +3350,6 @@ void Thumb2Assembler::CompareAndBranchIfZero(Register r, Label* label) { } -void Thumb2Assembler::CompareAndBranchIfZero(Register r, NearLabel* label) { - if (IsLowRegister(r)) { - cbz(r, label); - } else { - cmp(r, ShifterOperand(0)); - b(label, EQ); - } -} - - void Thumb2Assembler::CompareAndBranchIfNonZero(Register r, Label* label) { if (CanRelocateBranches() && IsLowRegister(r)) { cbnz(r, label); diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 2382b74c30..5e6969b4c2 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ #define ART_COMPILER_UTILS_ARM_ASSEMBLER_THUMB2_H_ +#include <deque> #include <vector> #include "base/logging.h" @@ -34,13 +35,15 @@ class Thumb2Assembler FINAL : public ArmAssembler { : can_relocate_branches_(can_relocate_branches), force_32bit_(false), it_cond_index_(kNoItCondition), - next_condition_(AL) { + next_condition_(AL), + fixups_(), + literals_(), + last_position_adjustment_(0u), + last_old_position_(0u), + last_fixup_id_(0u) { } virtual ~Thumb2Assembler() { - for (auto& branch : branches_) { - delete branch; - } } bool IsThumb() const OVERRIDE { @@ -55,10 +58,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { return can_relocate_branches_; } - void FinalizeInstructions(const MemoryRegion& region) OVERRIDE { - EmitBranches(); - Assembler::FinalizeInstructions(region); - } + void FinalizeCode() OVERRIDE; // Data-processing instructions. void and_(Register rd, Register rn, const ShifterOperand& so, Condition cond = AL) OVERRIDE; @@ -238,7 +238,6 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Branch instructions. void b(Label* label, Condition cond = AL); - void b(NearLabel* label, Condition cond = AL); void bl(Label* label, Condition cond = AL); void blx(Label* label); void blx(Register rm, Condition cond = AL) OVERRIDE; @@ -273,13 +272,23 @@ class Thumb2Assembler FINAL : public ArmAssembler { void Mov(Register rd, Register rm, Condition cond = AL) OVERRIDE; void CompareAndBranchIfZero(Register r, Label* label) OVERRIDE; - void CompareAndBranchIfZero(Register r, NearLabel* label) OVERRIDE; void CompareAndBranchIfNonZero(Register r, Label* label) OVERRIDE; // Memory barriers. void dmb(DmbOptions flavor) OVERRIDE; - // Macros. + // Get the final position of a label after local fixup based on the old position + // recorded before FinalizeCode(). + uint32_t GetAdjustedPosition(uint32_t old_position) OVERRIDE; + + using ArmAssembler::NewLiteral; // Make the helper template visible. + + Literal* NewLiteral(size_t size, const uint8_t* data) OVERRIDE; + void LoadLiteral(Register rt, Literal* literal) OVERRIDE; + void LoadLiteral(Register rt, Register rt2, Literal* literal) OVERRIDE; + void LoadLiteral(SRegister sd, Literal* literal) OVERRIDE; + void LoadLiteral(DRegister dd, Literal* literal) OVERRIDE; + // Add signed constant value to rd. May clobber IP. void AddConstant(Register rd, int32_t value, Condition cond = AL) OVERRIDE; void AddConstant(Register rd, Register rn, int32_t value, @@ -340,6 +349,244 @@ class Thumb2Assembler FINAL : public ArmAssembler { } private: + typedef uint16_t FixupId; + + // Fixup: branches and literal pool references. + // + // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This + // depends on both the type of branch and the offset to which it is branching. The 16-bit + // cbz and cbnz instructions may also need to be replaced with a separate 16-bit compare + // instruction and a 16- or 32-bit branch instruction. Load from a literal pool can also be + // 16-bit or 32-bit instruction and, if the method is large, we may need to use a sequence + // of instructions to make up for the limited range of load literal instructions (up to + // 4KiB for the 32-bit variant). When generating code for these insns we don't know the + // size before hand, so we assume it is the smallest available size and determine the final + // code offsets and sizes and emit code in FinalizeCode(). + // + // To handle this, we keep a record of every branch and literal pool load in the program. + // The actual instruction encoding for these is delayed until we know the final size of + // every instruction. When we bind a label to a branch we don't know the final location yet + // as some preceding instructions may need to be expanded, so we record a non-final offset. + // In FinalizeCode(), we expand the sizes of branches and literal loads that are out of + // range. With each expansion, we need to update dependent Fixups, i.e. insntructios with + // target on the other side of the expanded insn, as their offsets change and this may + // trigger further expansion. + // + // All Fixups have a 'fixup id' which is a 16 bit unsigned number used to identify the + // Fixup. For each unresolved label we keep a singly-linked list of all Fixups pointing + // to it, using the fixup ids as links. The first link is stored in the label's position + // (the label is linked but not bound), the following links are stored in the code buffer, + // in the placeholder where we will eventually emit the actual code. + + class Fixup { + public: + // Branch type. + enum Type : uint8_t { + kConditional, // B<cond>. + kUnconditional, // B. + kUnconditionalLink, // BL. + kUnconditionalLinkX, // BLX. + kCompareAndBranchXZero, // cbz/cbnz. + kLoadLiteralNarrow, // Load narrrow integer literal. + kLoadLiteralWide, // Load wide integer literal. + kLoadFPLiteralSingle, // Load FP literal single. + kLoadFPLiteralDouble, // Load FP literal double. + }; + + // Calculated size of branch instruction based on type and offset. + enum Size : uint8_t { + // Branch variants. + kBranch16Bit, + kBranch32Bit, + // NOTE: We don't support branches which would require multiple instructions, i.e. + // conditinoal branches beyond +-1MiB and unconditional branches beyond +-16MiB. + + // CBZ/CBNZ variants. + kCbxz16Bit, // CBZ/CBNZ rX, label; X < 8; 7-bit positive offset. + kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. + kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + + // Load integer literal variants. + // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. + kLiteral1KiB, + // LDR rX, label; 32-bit variant up to 4KiB offset; 4 bytes. + kLiteral4KiB, + // MOV rX, imm16 + ADD rX, pc + LDR rX, [rX]; X < 8; up to 64KiB offset; 8 bytes. + kLiteral64KiB, + // MOV rX, modimm + ADD rX, pc + LDR rX, [rX, #imm12]; up to 1MiB offset; 10 bytes. + kLiteral1MiB, + // NOTE: We don't provide the 12-byte version of kLiteralFar below where the LDR is 16-bit. + // MOV rX, imm16 + MOVT rX, imm16 + ADD rX, pc + LDR rX, [rX]; any offset; 14 bytes. + kLiteralFar, + + // Load long or FP literal variants. + // VLDR s/dX, label; 32-bit insn, up to 1KiB offset; 4 bytes. + kLongOrFPLiteral1KiB, + // MOV ip, modimm + ADD ip, pc + VLDR s/dX, [IP, #imm8*4]; up to 256KiB offset; 10 bytes. + kLongOrFPLiteral256KiB, + // MOV ip, imm16 + MOVT ip, imm16 + ADD ip, pc + VLDR s/dX, [IP]; any offset; 14 bytes. + kLongOrFPLiteralFar, + }; + + // Unresolved branch possibly with a condition. + static Fixup Branch(uint32_t location, Type type, Size size = kBranch16Bit, + Condition cond = AL) { + DCHECK(type == kConditional || type == kUnconditional || + type == kUnconditionalLink || type == kUnconditionalLinkX); + DCHECK(size == kBranch16Bit || size == kBranch32Bit); + DCHECK(size == kBranch32Bit || (type == kConditional || type == kUnconditional)); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, kNoDRegister, + cond, type, size, location); + } + + // Unresolved compare-and-branch instruction with a register and condition (EQ or NE). + static Fixup CompareAndBranch(uint32_t location, Register rn, Condition cond) { + DCHECK(cond == EQ || cond == NE); + return Fixup(rn, kNoRegister, kNoSRegister, kNoDRegister, + cond, kCompareAndBranchXZero, kCbxz16Bit, location); + } + + // Load narrow literal. + static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size = kLiteral1KiB) { + DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || + size == kLiteral1MiB || size == kLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadLiteralNarrow, size, location); + } + + // Load wide literal. + static Fixup LoadWideLiteral(uint32_t location, Register rt, Register rt2, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + DCHECK(!IsHighRegister(rt) || (size != kLiteral1KiB && size != kLiteral64KiB)); + return Fixup(rt, rt2, kNoSRegister, kNoDRegister, + AL, kLoadLiteralWide, size, location); + } + + // Load FP single literal. + static Fixup LoadSingleLiteral(uint32_t location, SRegister sd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, sd, kNoDRegister, + AL, kLoadFPLiteralSingle, size, location); + } + + // Load FP double literal. + static Fixup LoadDoubleLiteral(uint32_t location, DRegister dd, + Size size = kLongOrFPLiteral1KiB) { + DCHECK(size == kLongOrFPLiteral1KiB || size == kLongOrFPLiteral256KiB || + size == kLongOrFPLiteralFar); + return Fixup(kNoRegister, kNoRegister, kNoSRegister, dd, + AL, kLoadFPLiteralDouble, size, location); + } + + Type GetType() const { + return type_; + } + + Size GetOriginalSize() const { + return original_size_; + } + + Size GetSize() const { + return size_; + } + + uint32_t GetOriginalSizeInBytes() const; + + uint32_t GetSizeInBytes() const; + + uint32_t GetLocation() const { + return location_; + } + + uint32_t GetAdjustment() const { + return adjustment_; + } + + const std::vector<FixupId>& Dependents() const { + return dependents_; + } + + void AddDependent(FixupId dependent_id) { + dependents_.push_back(dependent_id); + } + + // Resolve a branch when the target is known. + void Resolve(uint32_t target) { + DCHECK_EQ(target_, kUnresolved); + DCHECK_NE(target, kUnresolved); + target_ = target; + } + + // Check if the current size is OK for current location_, target_ and adjustment_. + // If not, increase the size. Return the size increase, 0 if unchanged. + // If the target if after this Fixup, also add the difference to adjustment_, + // so that we don't need to consider forward Fixups as their own dependencies. + uint32_t AdjustSizeIfNeeded(uint32_t current_code_size); + + // Increase adjustments. This is called for dependents of a Fixup when its size changes. + void IncreaseAdjustment(uint32_t increase) { + adjustment_ += increase; + } + + // Finalize the branch with an adjustment to the location. Both location and target are updated. + void Finalize(uint32_t location_adjustment) { + DCHECK_NE(target_, kUnresolved); + location_ += location_adjustment; + target_ += location_adjustment; + } + + // Emit the branch instruction into the assembler buffer. This does the + // encoding into the thumb instruction. + void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + + private: + Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, + Condition cond, Type type, Size size, uint32_t location) + : rn_(rn), + rt2_(rt2), + sd_(sd), + dd_(dd), + cond_(cond), + type_(type), + original_size_(size), size_(size), + location_(location), + target_(kUnresolved), + adjustment_(0u), + dependents_() { + } + static size_t SizeInBytes(Size size); + + // The size of padding added before the literal pool. + static size_t LiteralPoolPaddingSize(uint32_t current_code_size); + + // Returns the offset from the PC-using insn to the target. + int32_t GetOffset(uint32_t current_code_size) const; + + size_t IncreaseSize(Size new_size); + + int32_t LoadWideOrFpEncoding(Register rbase, int32_t offset) const; + + static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. + + const Register rn_; // Rn for cbnz/cbz, Rt for literal loads. + Register rt2_; // For kLoadLiteralWide. + SRegister sd_; // For kLoadFPLiteralSingle. + DRegister dd_; // For kLoadFPLiteralDouble. + const Condition cond_; + const Type type_; + Size original_size_; + Size size_; + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t adjustment_; // The number of extra bytes inserted between location_ and target_. + std::vector<FixupId> dependents_; // Fixups that require adjustment when current size changes. + }; + // Emit a single 32 or 16 bit data processing instruction. void EmitDataProcessing(Condition cond, Opcode opcode, @@ -432,7 +679,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { void EmitVPushPop(uint32_t reg, int nregs, bool push, bool dbl, Condition cond); - void EmitBranch(Condition cond, Label* label, bool link, bool x, bool is_near = false); + void EmitBranch(Condition cond, Label* label, bool link, bool x); static int32_t EncodeBranchOffset(int32_t offset, int32_t inst); static int DecodeBranchOffset(int32_t inst); int32_t EncodeTstOffset(int offset, int32_t inst); @@ -475,275 +722,53 @@ class Thumb2Assembler FINAL : public ArmAssembler { CheckCondition(cond); } - // Branches. - // - // The thumb2 architecture allows branches to be either 16 or 32 bit instructions. This - // depends on both the type of branch and the offset to which it is branching. When - // generating code for branches we don't know the size before hand (if the branch is - // going forward, because we haven't seen the target address yet), so we need to assume - // that it is going to be one of 16 or 32 bits. When we know the target (the label is 'bound') - // we can determine the actual size of the branch. However, if we had guessed wrong before - // we knew the target there will be no room in the instruction sequence for the new - // instruction (assume that we never decrease the size of a branch). - // - // To handle this, we keep a record of every branch in the program. The actual instruction - // encoding for these is delayed until we know the final size of every branch. When we - // bind a label to a branch (we then know the target address) we determine if the branch - // has changed size. If it has we need to move all the instructions in the buffer after - // the branch point forward by the change in size of the branch. This will create a gap - // in the code big enough for the new branch encoding. However, since we have moved - // a chunk of code we need to relocate the branches in that code to their new address. - // - // Creating a hole in the code for the new branch encoding might cause another branch that was - // 16 bits to become 32 bits, so we need to find this in another pass. - // - // We also need to deal with a cbz/cbnz instruction that becomes too big for its offset - // range. We do this by converting it to two instructions: - // cmp Rn, #0 - // b<cond> target - // But we also need to handle the case where the conditional branch is out of range and - // becomes a 32 bit conditional branch. - // - // All branches have a 'branch id' which is a 16 bit unsigned number used to identify - // the branch. Unresolved labels use the branch id to link to the next unresolved branch. - - class Branch { - public: - // Branch type. - enum Type { - kUnconditional, // B. - kConditional, // B<cond>. - kCompareAndBranchZero, // cbz. - kCompareAndBranchNonZero, // cbnz. - kUnconditionalLink, // BL. - kUnconditionalLinkX, // BLX. - kUnconditionalX // BX. - }; - - // Calculated size of branch instruction based on type and offset. - enum Size { - k16Bit, - k32Bit - }; - - // Unresolved branch possibly with a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), - cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Unresolved compare-and-branch instruction with a register. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, Register rn) : - assembler_(assembler), type_(type), location_(location), - target_(kUnresolved), cond_(AL), rn_(rn) { - CHECK(IsCompareAndBranch()); - size_ = CalculateSize(); - } - - // Resolved branch (can't be compare-and-branch) with a target and possibly a condition. - Branch(const Thumb2Assembler* assembler, Type type, uint32_t location, uint32_t target, - Condition cond = AL) : - assembler_(assembler), type_(type), location_(location), - target_(target), cond_(cond), rn_(R0) { - CHECK(!IsCompareAndBranch()); - // Resolved branch. - size_ = CalculateSize(); - } - - bool IsCompareAndBranch() const { - return type_ == kCompareAndBranchNonZero || type_ == kCompareAndBranchZero; - } - - // Resolve a branch when the target is known. If this causes the - // size of the branch to change return true. Otherwise return false. - bool Resolve(uint32_t target) { - uint32_t old_target = target_; - target_ = target; - if (assembler_->CanRelocateBranches()) { - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } else { - if (kIsDebugBuild) { - if (old_target == kUnresolved) { - // Check that the size has not increased. - DCHECK(!(CalculateSize() == k32Bit && size_ == k16Bit)); - } else { - DCHECK(CalculateSize() == size_); - } - } - return false; - } - } - - // Move a cbz/cbnz branch. This is always forward. - void Move(int32_t delta) { - CHECK(IsCompareAndBranch()); - CHECK_GT(delta, 0); - location_ += delta; - target_ += delta; - } - - // Relocate a branch by a given delta. This changed the location and - // target if they need to be changed. It also recalculates the - // size of the branch instruction. It returns true if the branch - // has changed size. - bool Relocate(uint32_t oldlocation, int32_t delta) { - DCHECK(assembler_->CanRelocateBranches()); - if (location_ > oldlocation) { - location_ += delta; - } - if (target_ != kUnresolved) { - if (target_ > oldlocation) { - target_ += delta; - } - } else { - return false; // Don't know the size yet. - } - - // Calculate the new size. - Size new_size = CalculateSize(); - if (size_ != new_size) { - size_ = new_size; - return true; - } - return false; - } - - Size GetSize() const { - return size_; - } - - Type GetType() const { - return type_; - } - - uint32_t GetLocation() const { - return location_; - } - - // Emit the branch instruction into the assembler buffer. This does the - // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer) const; - - // Reset the type and condition to those given. This used for - // cbz/cbnz instructions when they are converted to cmp/b<cond> - void ResetTypeAndCondition(Type type, Condition cond) { - CHECK(IsCompareAndBranch()); - CHECK(cond == EQ || cond == NE); - type_ = type; - cond_ = cond; - } - - Register GetRegister() const { - return rn_; - } - - void ResetSize(Size size) { - size_ = size; - } - - private: - // Calculate the size of the branch instruction based on its type and offset. - Size CalculateSize() const { - if (target_ == kUnresolved) { - if (assembler_->IsForced32Bit() && (type_ == kUnconditional || type_ == kConditional)) { - return k32Bit; - } - if (IsCompareAndBranch()) { - // Compare and branch instructions can only be encoded on 16 bits. - return k16Bit; - } - return assembler_->CanRelocateBranches() ? k16Bit : k32Bit; - } - // When the target is resolved, we know the best encoding for it. - int32_t delta = target_ - location_ - 4; - if (delta < 0) { - delta = -delta; - } - switch (type_) { - case kUnconditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 11)) { - return k32Bit; - } else { - return k16Bit; - } - case kConditional: - if (assembler_->IsForced32Bit() || delta >= (1 << 8)) { - return k32Bit; - } else { - return k16Bit; - } - case kCompareAndBranchZero: - case kCompareAndBranchNonZero: - if (delta >= (1 << 7)) { - return k32Bit; // Will cause this branch to become invalid. - } - return k16Bit; - - case kUnconditionalX: - case kUnconditionalLinkX: - return k16Bit; - case kUnconditionalLink: - return k32Bit; - } - LOG(FATAL) << "Cannot reach"; - return k16Bit; - } - - static constexpr uint32_t kUnresolved = 0xffffffff; // Value for target_ for unresolved. - const Thumb2Assembler* assembler_; - Type type_; - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. - Size size_; - Condition cond_; - const Register rn_; - }; - - std::vector<Branch*> branches_; - - // Add a resolved branch and return its size. - Branch::Size AddBranch(Branch::Type type, uint32_t location, uint32_t target, - Condition cond = AL) { - branches_.push_back(new Branch(this, type, location, target, cond)); - return branches_[branches_.size()-1]->GetSize(); - } - - // Add a compare and branch (with a register) and return its id. - uint16_t AddBranch(Branch::Type type, uint32_t location, Register rn) { - branches_.push_back(new Branch(this, type, location, rn)); - return branches_.size() - 1; + FixupId AddFixup(Fixup fixup) { + FixupId fixup_id = static_cast<FixupId>(fixups_.size()); + fixups_.push_back(fixup); + // For iterating using FixupId, we need the next id to be representable. + DCHECK_EQ(static_cast<size_t>(static_cast<FixupId>(fixups_.size())), fixups_.size()); + return fixup_id; } - // Add an unresolved branch and return its id. - uint16_t AddBranch(Branch::Type type, - uint32_t location, - Condition cond = AL, - bool is_near = false) { - Branch* branch = new Branch(this, type, location, cond); - if (is_near) { - branch->ResetSize(Branch::k16Bit); - } - branches_.push_back(branch); - return branches_.size() - 1; - } - - Branch* GetBranch(uint16_t branchid) { - if (branchid >= branches_.size()) { - return nullptr; - } - return branches_[branchid]; + Fixup* GetFixup(FixupId fixup_id) { + DCHECK_LT(fixup_id, fixups_.size()); + return &fixups_[fixup_id]; } - void EmitBranches(); - void MakeHoleForBranch(uint32_t location, uint32_t size); + void BindLabel(Label* label, uint32_t bound_pc); + void BindLiterals(); + void AdjustFixupIfNeeded(Fixup* fixup, uint32_t* current_code_size, + std::deque<FixupId>* fixups_to_recalculate); + uint32_t AdjustFixups(); + void EmitFixups(uint32_t adjusted_code_size); + void EmitLiterals(); + + static int16_t BEncoding16(int32_t offset, Condition cond); + static int32_t BEncoding32(int32_t offset, Condition cond); + static int16_t CbxzEncoding16(Register rn, int32_t offset, Condition cond); + static int16_t CmpRnImm8Encoding16(Register rn, int32_t value); + static int16_t AddRdnRmEncoding16(Register rdn, Register rm); + static int32_t MovwEncoding32(Register rd, int32_t value); + static int32_t MovtEncoding32(Register rd, int32_t value); + static int32_t MovModImmEncoding32(Register rd, int32_t value); + static int16_t LdrLitEncoding16(Register rt, int32_t offset); + static int32_t LdrLitEncoding32(Register rt, int32_t offset); + static int32_t LdrdEncoding32(Register rt, Register rt2, Register rn, int32_t offset); + static int32_t VldrsEncoding32(SRegister sd, Register rn, int32_t offset); + static int32_t VldrdEncoding32(DRegister dd, Register rn, int32_t offset); + static int16_t LdrRtRnImm5Encoding16(Register rt, Register rn, int32_t offset); + static int32_t LdrRtRnImm12Encoding(Register rt, Register rn, int32_t offset); + + std::vector<Fixup> fixups_; + + // Use std::deque<> for literal labels to allow insertions at the end + // without invalidating pointers and references to existing elements. + std::deque<Literal> literals_; + + // Data for AdjustedPosition(), see the description there. + uint32_t last_position_adjustment_; + uint32_t last_old_position_; + FixupId last_fixup_id_; }; } // namespace arm diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index 733441b889..68b7931a0c 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -78,13 +78,20 @@ class AssemblerThumb2Test : public AssemblerTest<arm::Thumb2Assembler, return imm_value; } + std::string RepeatInsn(size_t count, const std::string& insn) { + std::string result; + for (; count != 0u; --count) { + result += insn; + } + return result; + } + private: std::vector<arm::Register*> registers_; static constexpr const char* kThumb2AssemblyHeader = ".syntax unified\n.thumb\n"; }; - TEST_F(AssemblerThumb2Test, Toolchain) { EXPECT_TRUE(CheckTools()); } @@ -370,4 +377,577 @@ TEST_F(AssemblerThumb2Test, StoreWordPairToNonThumbOffset) { DriverStr(expected, "StoreWordPairToNonThumbOffset"); } +TEST_F(AssemblerThumb2Test, TwoCbzMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 0u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 0u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzBeyondMaxOffset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 63; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzBeyondMaxOffset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 4u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 4u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 128; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cbz r0, 1f\n" + // cbz r0, label1 + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 0u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzSecondBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 62; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 129; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.w 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzSecondBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstAtMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 64; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.n 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cbz r0, 2f\n" // cbz r0, label2 + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstAtMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 2u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 2u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 2u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, TwoCbzFirstBeyondMaxB16Offset) { + Label label0, label1, label2; + __ cbz(arm::R0, &label1); + constexpr size_t kLdrR0R0Count1 = 127; + for (size_t i = 0; i != kLdrR0R0Count1; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label0); + __ cbz(arm::R0, &label2); + __ Bind(&label1); + constexpr size_t kLdrR0R0Count2 = 65; + for (size_t i = 0; i != kLdrR0R0Count2; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + __ Bind(&label2); + + std::string expected = + "cmp r0, #0\n" // cbz r0, label1 + "beq.w 1f\n" + + RepeatInsn(kLdrR0R0Count1, "ldr r0, [r0]\n") + + "0:\n" + "cmp r0, #0\n" // cbz r0, label2 + "beq.n 2f\n" + "1:\n" + + RepeatInsn(kLdrR0R0Count2, "ldr r0, [r0]\n") + + "2:\n"; + DriverStr(expected, "TwoCbzFirstBeyondMaxB16Offset"); + + EXPECT_EQ(static_cast<uint32_t>(label0.Position()) + 4u, + __ GetAdjustedPosition(label0.Position())); + EXPECT_EQ(static_cast<uint32_t>(label1.Position()) + 6u, + __ GetAdjustedPosition(label1.Position())); + EXPECT_EQ(static_cast<uint32_t>(label2.Position()) + 6u, + __ GetAdjustedPosition(label2.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.n r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R0, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 512; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r0, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2046; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldr.w r1, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 2u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax4KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 2047; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #4096\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax4KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "movw r1, #0xfffc\n" // "as" does not consider (2f - 1f - 4) a constant expression for movw. + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax64KiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 15) - 1u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax64KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w r1, #((2f - 1f - 4) & ~0xfff)\n" + "1:\n" + "add r1, pc\n" + "ldr r1, [r1, #((2f - 1b - 4) & 0xfff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 8u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralBeyondMax1MiB) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #(0x100000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #(0x100000 >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralBeyondMax1MiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralFar) { + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::R1, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1u << 19) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw r1, #((0x100000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt r1, #((0x100000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add r1, pc\n" + "ldr.w r1, [r1, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 12u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 510; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "1:\n" + "ldrd r1, r3, [pc, #((2f - 1b - 2) & ~2)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 0u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralWideBeyondMax1KiB) { + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::R1, arm::R3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = 511; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "ldrd r1, r3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralWideBeyondMax1KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralSingleMax256KiB) { + // The literal size must match but the type doesn't, so use an int32_t rather than float. + arm::Literal* literal = __ NewLiteral<int32_t>(0x12345678); + __ LoadLiteral(arm::S3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 3u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + "mov.w ip, #((2f - 1f - 4) & ~0x3ff)\n" + "1:\n" + "add ip, pc\n" + "vldr s3, [ip, #((2f - 1b - 4) & 0x3ff)]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralSingleMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 6u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleBeyondMax256KiB) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #(0x40000 & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #(0x40000 >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleBeyondMax256KiB"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + +TEST_F(AssemblerThumb2Test, LoadLiteralDoubleFar) { + // The literal size must match but the type doesn't, so use an int64_t rather than double. + arm::Literal* literal = __ NewLiteral<int64_t>(INT64_C(0x1234567887654321)); + __ LoadLiteral(arm::D3, literal); + Label label; + __ Bind(&label); + constexpr size_t kLdrR0R0Count = (1 << 17) - 2u + 0x1234; + for (size_t i = 0; i != kLdrR0R0Count; ++i) { + __ ldr(arm::R0, arm::Address(arm::R0)); + } + + std::string expected = + // "as" does not consider ((2f - 1f - 4) & 0xffff) a constant expression for movw. + "movw ip, #((0x40000 + 2 * 0x1234) & 0xffff)\n" + // "as" does not consider ((2f - 1f - 4) >> 16) a constant expression for movt. + "movt ip, #((0x40000 + 2 * 0x1234) >> 16)\n" + "1:\n" + "add ip, pc\n" + "vldr d3, [ip, #0]\n" + + RepeatInsn(kLdrR0R0Count, "ldr r0, [r0]\n") + + ".align 2, 0\n" + "2:\n" + ".word 0x87654321\n" + ".word 0x12345678\n"; + DriverStr(expected, "LoadLiteralDoubleFar"); + + EXPECT_EQ(static_cast<uint32_t>(label.Position()) + 10u, + __ GetAdjustedPosition(label.Position())); +} + } // namespace art diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index cc78002ab0..eb8de0620b 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -31,7 +31,7 @@ namespace arm64 { #define ___ vixl_masm_-> #endif -void Arm64Assembler::EmitSlowPaths() { +void Arm64Assembler::FinalizeCode() { if (!exception_blocks_.empty()) { for (size_t i = 0; i < exception_blocks_.size(); i++) { EmitExceptionPoll(exception_blocks_.at(i)); diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index fa9faed66b..b53c11bc24 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -73,8 +73,8 @@ class Arm64Assembler FINAL : public Assembler { delete vixl_masm_; } - // Emit slow paths queued during assembly. - void EmitSlowPaths(); + // Finalize the code. + void FinalizeCode() OVERRIDE; // Size of generated code. size_t CodeSize() const; diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index b016e74aba..6d8a98931f 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -80,10 +80,11 @@ void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) { } -void AssemblerBuffer::ExtendCapacity() { +void AssemblerBuffer::ExtendCapacity(size_t min_capacity) { size_t old_size = Size(); size_t old_capacity = Capacity(); size_t new_capacity = std::min(old_capacity * 2, old_capacity + 1 * MB); + new_capacity = std::max(new_capacity, min_capacity); // Allocate the new data area and copy contents of the old one to it. uint8_t* new_contents = NewContents(new_capacity); diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 672e1503be..0381af3956 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -199,13 +199,18 @@ class AssemblerBuffer { *reinterpret_cast<T*>(contents_ + position) = value; } - void Move(size_t newposition, size_t oldposition) { - CHECK(HasEnsuredCapacity()); - // Move the contents of the buffer from oldposition to - // newposition by nbytes. - size_t nbytes = Size() - oldposition; - memmove(contents_ + newposition, contents_ + oldposition, nbytes); - cursor_ += newposition - oldposition; + void Resize(size_t new_size) { + if (new_size > Capacity()) { + ExtendCapacity(new_size); + } + cursor_ = contents_ + new_size; + } + + void Move(size_t newposition, size_t oldposition, size_t size) { + // Move a chunk of the buffer from oldposition to newposition. + DCHECK_LE(oldposition + size, Size()); + DCHECK_LE(newposition + size, Size()); + memmove(contents_ + newposition, contents_ + oldposition, size); } // Emit a fixup at the current location. @@ -350,7 +355,7 @@ class AssemblerBuffer { return data + capacity - kMinimumGap; } - void ExtendCapacity(); + void ExtendCapacity(size_t min_capacity = 0u); friend class AssemblerFixup; }; @@ -376,8 +381,8 @@ class Assembler { public: static Assembler* Create(InstructionSet instruction_set); - // Emit slow paths queued during assembly - virtual void EmitSlowPaths() { buffer_.EmitSlowPaths(this); } + // Finalize the code; emit slow paths, fixup branches, add literal pool, etc. + virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); } // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index a339633efe..017402dbd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -544,6 +544,7 @@ class AssemblerTest : public testing::Test { } void DriverWrapper(std::string assembly_text, std::string test_name) { + assembler_->FinalizeCode(); size_t cs = assembler_->CodeSize(); std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs)); MemoryRegion code(&(*data)[0], data->size()); diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 1a2c9a9000..20f61f942b 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -65,20 +65,33 @@ int CompareIgnoringSpace(const char* s1, const char* s2) { return *s1 - *s2; } -void dump(std::vector<uint8_t>& code, const char* testname) { - // This will only work on the host. There is no as, objcopy or objdump on the - // device. +void InitResults() { + if (test_results.empty()) { + setup_results(); + } +} + +std::string GetToolsDir() { #ifndef HAVE_ANDROID_OS - static bool results_ok = false; + // This will only work on the host. There is no as, objcopy or objdump on the device. static std::string toolsdir; - if (!results_ok) { + if (toolsdir.empty()) { setup_results(); toolsdir = CommonRuntimeTest::GetAndroidTargetToolsDir(kThumb2); SetAndroidData(); - results_ok = true; } + return toolsdir; +#else + return std::string(); +#endif +} + +void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* const* results) { +#ifndef HAVE_ANDROID_OS + static std::string toolsdir = GetToolsDir(); + ScratchFile file; const char* filename = file.GetFilename().c_str(); @@ -130,9 +143,6 @@ void dump(std::vector<uint8_t>& code, const char* testname) { FILE *fp = popen(cmd, "r"); ASSERT_TRUE(fp != nullptr); - std::map<std::string, const char**>::iterator results = test_results.find(testname); - ASSERT_NE(results, test_results.end()); - uint32_t lineindex = 0; while (!feof(fp)) { @@ -141,14 +151,14 @@ void dump(std::vector<uint8_t>& code, const char* testname) { if (s == nullptr) { break; } - if (CompareIgnoringSpace(results->second[lineindex], testline) != 0) { + if (CompareIgnoringSpace(results[lineindex], testline) != 0) { LOG(FATAL) << "Output is not as expected at line: " << lineindex - << results->second[lineindex] << "/" << testline; + << results[lineindex] << "/" << testline; } ++lineindex; } // Check that we are at the end. - ASSERT_TRUE(results->second[lineindex] == nullptr); + ASSERT_TRUE(results[lineindex] == nullptr); fclose(fp); } @@ -163,8 +173,31 @@ void dump(std::vector<uint8_t>& code, const char* testname) { #define __ assembler-> +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname, + const char* const* results) { + __ FinalizeCode(); + size_t cs = __ CodeSize(); + std::vector<uint8_t> managed_code(cs); + MemoryRegion code(&managed_code[0], managed_code.size()); + __ FinalizeInstructions(code); + + DumpAndCheck(managed_code, testname, results); +} + +void EmitAndCheck(arm::Thumb2Assembler* assembler, const char* testname) { + InitResults(); + std::map<std::string, const char* const*>::iterator results = test_results.find(testname); + ASSERT_NE(results, test_results.end()); + + EmitAndCheck(assembler, testname, results->second); +} + +#undef __ + +#define __ assembler. + TEST(Thumb2AssemblerTest, SimpleMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); @@ -172,46 +205,31 @@ TEST(Thumb2AssemblerTest, SimpleMov) { __ mov(R0, ShifterOperand(1)); __ mov(R8, ShifterOperand(9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov"); } TEST(Thumb2AssemblerTest, SimpleMov32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); __ mov(R0, ShifterOperand(R1)); __ mov(R8, ShifterOperand(R9)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMov32"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMov32"); } TEST(Thumb2AssemblerTest, SimpleMovAdd) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ add(R0, R1, ShifterOperand(R2)); __ add(R0, R1, ShifterOperand()); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleMovAdd"); - delete assembler; + EmitAndCheck(&assembler, "SimpleMovAdd"); } TEST(Thumb2AssemblerTest, DataProcessingRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(R1)); __ mvn(R0, ShifterOperand(R1)); @@ -249,16 +267,11 @@ TEST(Thumb2AssemblerTest, DataProcessingRegister) { // 32 bit variants. __ add(R12, R1, ShifterOperand(R0)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingRegister"); } TEST(Thumb2AssemblerTest, DataProcessingImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x55)); __ mvn(R0, ShifterOperand(0x55)); @@ -283,16 +296,11 @@ TEST(Thumb2AssemblerTest, DataProcessingImmediate) { __ movs(R0, ShifterOperand(0x55)); __ mvns(R0, ShifterOperand(0x55)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mvn(R0, ShifterOperand(0x550055)); @@ -311,17 +319,12 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediate) { __ cmp(R0, ShifterOperand(0x550055)); __ cmn(R0, ShifterOperand(0x550055)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediate"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediate"); } TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R0, ShifterOperand(0x550055)); __ mov(R0, ShifterOperand(0x55005500)); @@ -331,16 +334,11 @@ TEST(Thumb2AssemblerTest, DataProcessingModifiedImmediates) { __ mov(R0, ShifterOperand(0x350)); // rotated to 2nd last position __ mov(R0, ShifterOperand(0x1a8)); // rotated to last position - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingModifiedImmediates"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingModifiedImmediates"); } TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mov(R3, ShifterOperand(R4, LSL, 4)); __ mov(R3, ShifterOperand(R4, LSR, 5)); @@ -355,17 +353,12 @@ TEST(Thumb2AssemblerTest, DataProcessingShiftedRegister) { __ mov(R8, ShifterOperand(R4, ROR, 7)); __ mov(R8, ShifterOperand(R4, RRX)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "DataProcessingShiftedRegister"); - delete assembler; + EmitAndCheck(&assembler, "DataProcessingShiftedRegister"); } TEST(Thumb2AssemblerTest, BasicLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24)); __ ldrb(R3, Address(R4, 24)); @@ -382,17 +375,12 @@ TEST(Thumb2AssemblerTest, BasicLoad) { __ ldrsb(R8, Address(R4, 24)); __ ldrsh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicLoad"); - delete assembler; + EmitAndCheck(&assembler, "BasicLoad"); } TEST(Thumb2AssemblerTest, BasicStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24)); __ strb(R3, Address(R4, 24)); @@ -405,16 +393,11 @@ TEST(Thumb2AssemblerTest, BasicStore) { __ strb(R8, Address(R4, 24)); __ strh(R8, Address(R4, 24)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicStore"); - delete assembler; + EmitAndCheck(&assembler, "BasicStore"); } TEST(Thumb2AssemblerTest, ComplexLoad) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, 24, Address::Mode::Offset)); __ ldr(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -451,17 +434,12 @@ TEST(Thumb2AssemblerTest, ComplexLoad) { __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ ldrsh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoad"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoad"); } TEST(Thumb2AssemblerTest, ComplexStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R3, Address(R4, 24, Address::Mode::Offset)); __ str(R3, Address(R4, 24, Address::Mode::PreIndex)); @@ -484,16 +462,11 @@ TEST(Thumb2AssemblerTest, ComplexStore) { __ strh(R3, Address(R4, 24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexStore"); - delete assembler; + EmitAndCheck(&assembler, "ComplexStore"); } TEST(Thumb2AssemblerTest, NegativeLoadStore) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R3, Address(R4, -24, Address::Mode::Offset)); __ ldr(R3, Address(R4, -24, Address::Mode::PreIndex)); @@ -551,30 +524,20 @@ TEST(Thumb2AssemblerTest, NegativeLoadStore) { __ strh(R3, Address(R4, -24, Address::Mode::NegPreIndex)); __ strh(R3, Address(R4, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStore"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStore"); } TEST(Thumb2AssemblerTest, SimpleLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ ldrd(R2, Address(R0, 24, Address::Mode::Offset)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "SimpleLoadStoreDual"); } TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, 24, Address::Mode::Offset)); __ strd(R2, Address(R0, 24, Address::Mode::PreIndex)); @@ -590,16 +553,11 @@ TEST(Thumb2AssemblerTest, ComplexLoadStoreDual) { __ ldrd(R2, Address(R0, 24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, 24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "ComplexLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "ComplexLoadStoreDual"); } TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ strd(R2, Address(R0, -24, Address::Mode::Offset)); __ strd(R2, Address(R0, -24, Address::Mode::PreIndex)); @@ -615,16 +573,11 @@ TEST(Thumb2AssemblerTest, NegativeLoadStoreDual) { __ ldrd(R2, Address(R0, -24, Address::Mode::NegPreIndex)); __ ldrd(R2, Address(R0, -24, Address::Mode::NegPostIndex)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "NegativeLoadStoreDual"); - delete assembler; + EmitAndCheck(&assembler, "NegativeLoadStoreDual"); } TEST(Thumb2AssemblerTest, SimpleBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ mov(R0, ShifterOperand(2)); @@ -658,17 +611,12 @@ TEST(Thumb2AssemblerTest, SimpleBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SimpleBranch"); - delete assembler; + EmitAndCheck(&assembler, "SimpleBranch"); } TEST(Thumb2AssemblerTest, LongBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); - assembler->Force32Bit(); + arm::Thumb2Assembler assembler; + __ Force32Bit(); // 32 bit branches. Label l1; __ mov(R0, ShifterOperand(2)); @@ -703,16 +651,11 @@ TEST(Thumb2AssemblerTest, LongBranch) { __ Bind(&l5); __ mov(R0, ShifterOperand(6)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LongBranch"); - delete assembler; + EmitAndCheck(&assembler, "LongBranch"); } TEST(Thumb2AssemblerTest, LoadMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldm(DB_W, R4, (1 << R0 | 1 << R3)); @@ -724,16 +667,11 @@ TEST(Thumb2AssemblerTest, LoadMultiple) { // Single reg is converted to ldr __ ldm(DB_W, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadMultiple"); - delete assembler; + EmitAndCheck(&assembler, "LoadMultiple"); } TEST(Thumb2AssemblerTest, StoreMultiple) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ stm(IA_W, R4, (1 << R0 | 1 << R3)); @@ -746,16 +684,11 @@ TEST(Thumb2AssemblerTest, StoreMultiple) { __ stm(IA_W, R4, (1 << R5)); __ stm(IA, R4, (1 << R5)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreMultiple"); - delete assembler; + EmitAndCheck(&assembler, "StoreMultiple"); } TEST(Thumb2AssemblerTest, MovWMovT) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ movw(R4, 0); // 16 bit. __ movw(R4, 0x34); // 16 bit. @@ -768,16 +701,11 @@ TEST(Thumb2AssemblerTest, MovWMovT) { __ movt(R0, 0x1234); __ movt(R1, 0xffff); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MovWMovT"); - delete assembler; + EmitAndCheck(&assembler, "MovWMovT"); } TEST(Thumb2AssemblerTest, SpecialAddSub) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ add(R2, SP, ShifterOperand(0x50)); // 16 bit. __ add(SP, SP, ShifterOperand(0x50)); // 16 bit. @@ -792,16 +720,11 @@ TEST(Thumb2AssemblerTest, SpecialAddSub) { __ sub(SP, SP, ShifterOperand(0xf00)); // 32 bit due to imm size - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "SpecialAddSub"); - delete assembler; + EmitAndCheck(&assembler, "SpecialAddSub"); } TEST(Thumb2AssemblerTest, StoreToOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ StoreToOffset(kStoreWord, R2, R4, 12); // Simple __ StoreToOffset(kStoreWord, R2, R4, 0x2000); // Offset too big. @@ -809,17 +732,12 @@ TEST(Thumb2AssemblerTest, StoreToOffset) { __ StoreToOffset(kStoreHalfword, R0, R12, 12); __ StoreToOffset(kStoreByte, R2, R12, 12); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StoreToOffset"); - delete assembler; + EmitAndCheck(&assembler, "StoreToOffset"); } TEST(Thumb2AssemblerTest, IfThen) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ it(EQ); __ mov(R1, ShifterOperand(1), EQ); @@ -848,16 +766,11 @@ TEST(Thumb2AssemblerTest, IfThen) { __ mov(R3, ShifterOperand(3), EQ); __ mov(R4, ShifterOperand(4), NE); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "IfThen"); - delete assembler; + EmitAndCheck(&assembler, "IfThen"); } TEST(Thumb2AssemblerTest, CbzCbnz) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R2, &l1); @@ -873,16 +786,11 @@ TEST(Thumb2AssemblerTest, CbzCbnz) { __ Bind(&l2); __ mov(R2, ShifterOperand(4)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CbzCbnz"); - delete assembler; + EmitAndCheck(&assembler, "CbzCbnz"); } TEST(Thumb2AssemblerTest, Multiply) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ mul(R0, R1, R0); __ mul(R0, R1, R2); @@ -898,16 +806,11 @@ TEST(Thumb2AssemblerTest, Multiply) { __ umull(R0, R1, R2, R3); __ umull(R8, R9, R10, R11); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Multiply"); - delete assembler; + EmitAndCheck(&assembler, "Multiply"); } TEST(Thumb2AssemblerTest, Divide) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ sdiv(R0, R1, R2); __ sdiv(R8, R9, R10); @@ -915,16 +818,11 @@ TEST(Thumb2AssemblerTest, Divide) { __ udiv(R0, R1, R2); __ udiv(R8, R9, R10); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Divide"); - delete assembler; + EmitAndCheck(&assembler, "Divide"); } TEST(Thumb2AssemblerTest, VMov) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vmovs(S1, 1.0); __ vmovd(D1, 1.0); @@ -932,17 +830,12 @@ TEST(Thumb2AssemblerTest, VMov) { __ vmovs(S1, S2); __ vmovd(D1, D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VMov"); - delete assembler; + EmitAndCheck(&assembler, "VMov"); } TEST(Thumb2AssemblerTest, BasicFloatingPoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vadds(S0, S1, S2); __ vsubs(S0, S1, S2); @@ -964,16 +857,11 @@ TEST(Thumb2AssemblerTest, BasicFloatingPoint) { __ vnegd(D0, D1); __ vsqrtd(D0, D1); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "BasicFloatingPoint"); - delete assembler; + EmitAndCheck(&assembler, "BasicFloatingPoint"); } TEST(Thumb2AssemblerTest, FloatingPointConversions) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcvtsd(S2, D2); __ vcvtds(D2, S2); @@ -990,16 +878,11 @@ TEST(Thumb2AssemblerTest, FloatingPointConversions) { __ vcvtud(S1, D2); __ vcvtdu(D1, S2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointConversions"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointConversions"); } TEST(Thumb2AssemblerTest, FloatingPointComparisons) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vcmps(S0, S1); __ vcmpd(D0, D1); @@ -1007,57 +890,37 @@ TEST(Thumb2AssemblerTest, FloatingPointComparisons) { __ vcmpsz(S2); __ vcmpdz(D2); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "FloatingPointComparisons"); - delete assembler; + EmitAndCheck(&assembler, "FloatingPointComparisons"); } TEST(Thumb2AssemblerTest, Calls) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ blx(LR); __ bx(LR); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Calls"); - delete assembler; + EmitAndCheck(&assembler, "Calls"); } TEST(Thumb2AssemblerTest, Breakpoint) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ bkpt(0); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Breakpoint"); - delete assembler; + EmitAndCheck(&assembler, "Breakpoint"); } TEST(Thumb2AssemblerTest, StrR1) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ str(R1, Address(SP, 68)); __ str(R1, Address(SP, 1068)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "StrR1"); - delete assembler; + EmitAndCheck(&assembler, "StrR1"); } TEST(Thumb2AssemblerTest, VPushPop) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ vpushs(S2, 4); __ vpushd(D2, 4); @@ -1065,16 +928,11 @@ TEST(Thumb2AssemblerTest, VPushPop) { __ vpops(S2, 4); __ vpopd(D2, 4); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "VPushPop"); - delete assembler; + EmitAndCheck(&assembler, "VPushPop"); } TEST(Thumb2AssemblerTest, Max16BitBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1084,16 +942,11 @@ TEST(Thumb2AssemblerTest, Max16BitBranch) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Max16BitBranch"); - delete assembler; + EmitAndCheck(&assembler, "Max16BitBranch"); } TEST(Thumb2AssemblerTest, Branch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ b(&l1); @@ -1103,16 +956,11 @@ TEST(Thumb2AssemblerTest, Branch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Branch32"); - delete assembler; + EmitAndCheck(&assembler, "Branch32"); } TEST(Thumb2AssemblerTest, CompareAndBranchMax) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1122,16 +970,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchMax) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchMax"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchMax"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1141,16 +984,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation16) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation16"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation16"); } TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; __ cbz(R4, &l1); @@ -1160,16 +998,11 @@ TEST(Thumb2AssemblerTest, CompareAndBranchRelocation32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranchRelocation32"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranchRelocation32"); } TEST(Thumb2AssemblerTest, MixedBranch32) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; Label l1; Label l2; @@ -1184,16 +1017,11 @@ TEST(Thumb2AssemblerTest, MixedBranch32) { __ Bind(&l1); __ mov(R1, ShifterOperand(R2)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "MixedBranch32"); - delete assembler; + EmitAndCheck(&assembler, "MixedBranch32"); } TEST(Thumb2AssemblerTest, Shifts) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit __ Lsl(R0, R1, 5); @@ -1240,16 +1068,11 @@ TEST(Thumb2AssemblerTest, Shifts) { __ Lsr(R0, R8, R2, true); __ Asr(R0, R1, R8, true); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "Shifts"); - delete assembler; + EmitAndCheck(&assembler, "Shifts"); } TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; // 16 bit. __ ldr(R0, Address(R1, R2)); @@ -1272,16 +1095,11 @@ TEST(Thumb2AssemblerTest, LoadStoreRegOffset) { __ ldr(R0, Address(R1, R8)); __ str(R0, Address(R1, R8)); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreRegOffset"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreRegOffset"); } TEST(Thumb2AssemblerTest, LoadStoreLiteral) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(4)); __ str(R0, Address(4)); @@ -1295,16 +1113,11 @@ TEST(Thumb2AssemblerTest, LoadStoreLiteral) { __ str(R0, Address(0x3ff)); // 32 bit (no 16 bit str(literal)). __ str(R0, Address(0x7ff)); // 11 bits (32 bit). - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLiteral"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLiteral"); } TEST(Thumb2AssemblerTest, LoadStoreLimits) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; __ ldr(R0, Address(R4, 124)); // 16 bit. __ ldr(R0, Address(R4, 128)); // 32 bit. @@ -1330,30 +1143,20 @@ TEST(Thumb2AssemblerTest, LoadStoreLimits) { __ strh(R0, Address(R4, 62)); // 16 bit. __ strh(R0, Address(R4, 64)); // 32 bit. - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "LoadStoreLimits"); - delete assembler; + EmitAndCheck(&assembler, "LoadStoreLimits"); } TEST(Thumb2AssemblerTest, CompareAndBranch) { - arm::Thumb2Assembler* assembler = static_cast<arm::Thumb2Assembler*>(Assembler::Create(kThumb2)); + arm::Thumb2Assembler assembler; - arm::NearLabel label; + Label label; __ CompareAndBranchIfZero(arm::R0, &label); __ CompareAndBranchIfZero(arm::R11, &label); __ CompareAndBranchIfNonZero(arm::R0, &label); __ CompareAndBranchIfNonZero(arm::R11, &label); __ Bind(&label); - size_t cs = __ CodeSize(); - std::vector<uint8_t> managed_code(cs); - MemoryRegion code(&managed_code[0], managed_code.size()); - __ FinalizeInstructions(code); - dump(managed_code, "CompareAndBranch"); - delete assembler; + EmitAndCheck(&assembler, "CompareAndBranch"); } #undef __ diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 841d6a00c0..280ed779b3 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -4832,7 +4832,7 @@ const char* CompareAndBranchResults[] = { nullptr }; -std::map<std::string, const char**> test_results; +std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; test_results["SimpleMov32"] = SimpleMov32Results; diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 96d5654d65..9e9dea64c6 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -1610,6 +1610,8 @@ class ImageDumper { const auto& bitmap_section = image_header_.GetImageSection(ImageHeader::kSectionImageBitmap); const auto& field_section = image_header_.GetImageSection(ImageHeader::kSectionArtFields); const auto& method_section = image_header_.GetMethodsSection(); + const auto& intern_section = image_header_.GetImageSection( + ImageHeader::kSectionInternedStrings); stats_.header_bytes = header_bytes; size_t alignment_bytes = RoundUp(header_bytes, kObjectAlignment) - header_bytes; stats_.alignment_bytes += alignment_bytes; @@ -1617,6 +1619,7 @@ class ImageDumper { stats_.bitmap_bytes += bitmap_section.Size(); stats_.art_field_bytes += field_section.Size(); stats_.art_method_bytes += method_section.Size(); + stats_.interned_strings_bytes += intern_section.Size(); stats_.Dump(os); os << "\n"; @@ -1945,6 +1948,7 @@ class ImageDumper { size_t object_bytes; size_t art_field_bytes; size_t art_method_bytes; + size_t interned_strings_bytes; size_t bitmap_bytes; size_t alignment_bytes; @@ -1974,6 +1978,7 @@ class ImageDumper { object_bytes(0), art_field_bytes(0), art_method_bytes(0), + interned_strings_bytes(0), bitmap_bytes(0), alignment_bytes(0), managed_code_bytes(0), @@ -2131,21 +2136,24 @@ class ImageDumper { << "art_file_bytes = header_bytes + object_bytes + alignment_bytes\n"; Indenter indent_filter(os.rdbuf(), kIndentChar, kIndentBy1Count); std::ostream indent_os(&indent_filter); - indent_os << StringPrintf("header_bytes = %8zd (%2.0f%% of art file bytes)\n" - "object_bytes = %8zd (%2.0f%% of art file bytes)\n" - "art_field_bytes = %8zd (%2.0f%% of art file bytes)\n" - "art_method_bytes = %8zd (%2.0f%% of art file bytes)\n" - "bitmap_bytes = %8zd (%2.0f%% of art file bytes)\n" - "alignment_bytes = %8zd (%2.0f%% of art file bytes)\n\n", + indent_os << StringPrintf("header_bytes = %8zd (%2.0f%% of art file bytes)\n" + "object_bytes = %8zd (%2.0f%% of art file bytes)\n" + "art_field_bytes = %8zd (%2.0f%% of art file bytes)\n" + "art_method_bytes = %8zd (%2.0f%% of art file bytes)\n" + "interned_string_bytes = %8zd (%2.0f%% of art file bytes)\n" + "bitmap_bytes = %8zd (%2.0f%% of art file bytes)\n" + "alignment_bytes = %8zd (%2.0f%% of art file bytes)\n\n", header_bytes, PercentOfFileBytes(header_bytes), object_bytes, PercentOfFileBytes(object_bytes), art_field_bytes, PercentOfFileBytes(art_field_bytes), art_method_bytes, PercentOfFileBytes(art_method_bytes), + interned_strings_bytes, + PercentOfFileBytes(interned_strings_bytes), bitmap_bytes, PercentOfFileBytes(bitmap_bytes), alignment_bytes, PercentOfFileBytes(alignment_bytes)) << std::flush; CHECK_EQ(file_bytes, header_bytes + object_bytes + art_field_bytes + art_method_bytes + - bitmap_bytes + alignment_bytes); + interned_strings_bytes + bitmap_bytes + alignment_bytes); } os << "object_bytes breakdown:\n"; diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc index 007125cfbe..04017273a8 100644 --- a/patchoat/patchoat.cc +++ b/patchoat/patchoat.cc @@ -437,6 +437,41 @@ void PatchOat::PatchArtMethods(const ImageHeader* image_header) { } } +class FixupRootVisitor : public RootVisitor { + public: + explicit FixupRootVisitor(const PatchOat* patch_oat) : patch_oat_(patch_oat) { + } + + void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + *roots[i] = patch_oat_->RelocatedAddressOfPointer(*roots[i]); + } + } + + void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, + const RootInfo& info ATTRIBUTE_UNUSED) + OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + for (size_t i = 0; i < count; ++i) { + roots[i]->Assign(patch_oat_->RelocatedAddressOfPointer(roots[i]->AsMirrorPtr())); + } + } + + private: + const PatchOat* const patch_oat_; +}; + +void PatchOat::PatchInternedStrings(const ImageHeader* image_header) { + const auto& section = image_header->GetImageSection(ImageHeader::kSectionInternedStrings); + InternTable temp_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_table.ReadFromMemory(image_->Begin() + section.Offset()); + FixupRootVisitor visitor(this); + temp_table.VisitRoots(&visitor, kVisitRootFlagAllRoots); +} + void PatchOat::PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) { auto* dex_caches = down_cast<mirror::ObjectArray<mirror::DexCache>*>( img_roots->Get(ImageHeader::kDexCaches)); @@ -483,12 +518,9 @@ bool PatchOat::PatchImage() { auto* img_roots = image_header->GetImageRoots(); image_header->RelocateImage(delta_); - // Patch and update ArtFields. PatchArtFields(image_header); - - // Patch and update ArtMethods. PatchArtMethods(image_header); - + PatchInternedStrings(image_header); // Patch dex file int/long arrays which point to ArtFields. PatchDexFileArrays(img_roots); diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h index 7b9c8bd508..23abca8c7e 100644 --- a/patchoat/patchoat.h +++ b/patchoat/patchoat.h @@ -116,6 +116,8 @@ class PatchOat { bool PatchImage() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchArtFields(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchArtMethods(const ImageHeader* image_header) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void PatchInternedStrings(const ImageHeader* image_header) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void PatchDexFileArrays(mirror::ObjectArray<mirror::Object>* img_roots) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -123,7 +125,7 @@ class PatchOat { bool WriteImage(File* out); template <typename T> - T* RelocatedCopyOf(T* obj) { + T* RelocatedCopyOf(T* obj) const { if (obj == nullptr) { return nullptr; } @@ -136,7 +138,7 @@ class PatchOat { } template <typename T> - T* RelocatedAddressOfPointer(T* obj) { + T* RelocatedAddressOfPointer(T* obj) const { if (obj == nullptr) { return obj; } @@ -149,7 +151,7 @@ class PatchOat { } template <typename T> - T RelocatedAddressOfIntPointer(T obj) { + T RelocatedAddressOfIntPointer(T obj) const { if (obj == 0) { return obj; } @@ -199,6 +201,7 @@ class PatchOat { TimingLogger* timings_; + friend class FixupRootVisitor; DISALLOW_IMPLICIT_CONSTRUCTORS(PatchOat); }; diff --git a/runtime/Android.mk b/runtime/Android.mk index c1e6e09728..5ed6955185 100644 --- a/runtime/Android.mk +++ b/runtime/Android.mk @@ -45,6 +45,7 @@ LIBART_COMMON_SRC_FILES := \ dex_file_verifier.cc \ dex_instruction.cc \ elf_file.cc \ + gc/allocation_record.cc \ gc/allocator/dlmalloc.cc \ gc/allocator/rosalloc.cc \ gc/accounting/bitmap.cc \ diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 3a0ea646e1..cc1de43723 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -380,7 +380,7 @@ END art_quick_do_long_jump /* * Called by managed code, saves most registers (forms basis of long jump context) and passes * the bottom of the stack. artDeliverExceptionFromCode will place the callee save Method* at - * the bottom of the thread. On entry r0 holds Throwable* + * the bottom of the thread. On entry a0 holds Throwable* */ ENTRY art_quick_deliver_exception SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index b2cd7f26c7..37c6c5b3f9 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -87,11 +87,11 @@ s.d $f24, 8($sp) # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place ArtMethod* at bottom of stack. + ld $t1, RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place ArtMethod* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -130,11 +130,11 @@ sd $s2, 8($sp) .cfi_rel_offset 18, 8 # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place Method* at bottom of stack. + ld $t1, RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place Method* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -253,11 +253,11 @@ .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_INTERNAL # load appropriate callee-save-method - ld $v0, %got(_ZN3art7Runtime9instance_E)($gp) - ld $v0, 0($v0) + ld $t1, %got(_ZN3art7Runtime9instance_E)($gp) + ld $t1, 0($t1) THIS_LOAD_REQUIRES_READ_BARRIER - ld $v0, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($v0) - sd $v0, 0($sp) # Place Method* at bottom of stack. + ld $t1, RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET($t1) + sd $t1, 0($sp) # Place Method* at bottom of stack. sd $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # Place sp in Thread::Current()->top_quick_frame. .endm @@ -442,7 +442,7 @@ END art_quick_do_long_jump * Called by managed code, saves most registers (forms basis of long jump * context) and passes the bottom of the stack. * artDeliverExceptionFromCode will place the callee save Method* at - * the bottom of the thread. On entry v0 holds Throwable* + * the bottom of the thread. On entry a0 holds Throwable* */ ENTRY art_quick_deliver_exception SETUP_SAVE_ALL_CALLEE_SAVE_FRAME diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h index ee51ec9f1c..73beb1f168 100644 --- a/runtime/art_field-inl.h +++ b/runtime/art_field-inl.h @@ -34,7 +34,8 @@ namespace art { inline mirror::Class* ArtField::GetDeclaringClass() { - mirror::Class* result = declaring_class_.Read(); + GcRootSource gc_root_source(this); + mirror::Class* result = declaring_class_.Read(&gc_root_source); DCHECK(result != nullptr); DCHECK(result->IsLoaded() || result->IsErroneous()); return result; diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h index 5cfce41cc0..8712bdbbf5 100644 --- a/runtime/art_method-inl.h +++ b/runtime/art_method-inl.h @@ -36,7 +36,8 @@ namespace art { inline mirror::Class* ArtMethod::GetDeclaringClassUnchecked() { - return declaring_class_.Read(); + GcRootSource gc_root_source(this); + return declaring_class_.Read(&gc_root_source); } inline mirror::Class* ArtMethod::GetDeclaringClassNoBarrier() { @@ -84,7 +85,8 @@ inline uint32_t ArtMethod::GetDexMethodIndex() { } inline mirror::PointerArray* ArtMethod::GetDexCacheResolvedMethods() { - return dex_cache_resolved_methods_.Read(); + GcRootSource gc_root_source(this); + return dex_cache_resolved_methods_.Read(&gc_root_source); } inline ArtMethod* ArtMethod::GetDexCacheResolvedMethod(uint16_t method_index, size_t ptr_size) { @@ -118,7 +120,8 @@ inline bool ArtMethod::HasSameDexCacheResolvedMethods(ArtMethod* other) { } inline mirror::ObjectArray<mirror::Class>* ArtMethod::GetDexCacheResolvedTypes() { - return dex_cache_resolved_types_.Read(); + GcRootSource gc_root_source(this); + return dex_cache_resolved_types_.Read(&gc_root_source); } template <bool kWithCheck> diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h index ab63dddaff..8daf6d4c9e 100644 --- a/runtime/base/hash_set.h +++ b/runtime/base/hash_set.h @@ -22,6 +22,7 @@ #include <stdint.h> #include <utility> +#include "bit_utils.h" #include "logging.h" namespace art { @@ -121,6 +122,7 @@ class HashSet { typedef BaseIterator<T, HashSet> Iterator; typedef BaseIterator<const T, const HashSet> ConstIterator; + // If we don't own the data, this will create a new array which owns the data. void Clear() { DeallocateStorage(); AllocateStorage(1); @@ -128,19 +130,70 @@ class HashSet { elements_until_expand_ = 0; } - HashSet() : num_elements_(0), num_buckets_(0), data_(nullptr), + HashSet() : num_elements_(0), num_buckets_(0), owns_data_(false), data_(nullptr), min_load_factor_(kDefaultMinLoadFactor), max_load_factor_(kDefaultMaxLoadFactor) { Clear(); } - HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + HashSet(const HashSet& other) : num_elements_(0), num_buckets_(0), owns_data_(false), + data_(nullptr) { *this = other; } - HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), data_(nullptr) { + HashSet(HashSet&& other) : num_elements_(0), num_buckets_(0), owns_data_(false), + data_(nullptr) { *this = std::move(other); } + // Construct from existing data. + // Read from a block of memory, if make_copy_of_data is false, then data_ points to within the + // passed in ptr_. + HashSet(const uint8_t* ptr, bool make_copy_of_data, size_t* read_count) { + uint64_t temp; + size_t offset = 0; + offset = ReadFromBytes(ptr, offset, &temp); + num_elements_ = static_cast<uint64_t>(temp); + offset = ReadFromBytes(ptr, offset, &temp); + num_buckets_ = static_cast<uint64_t>(temp); + CHECK_LE(num_elements_, num_buckets_); + offset = ReadFromBytes(ptr, offset, &temp); + elements_until_expand_ = static_cast<uint64_t>(temp); + offset = ReadFromBytes(ptr, offset, &min_load_factor_); + offset = ReadFromBytes(ptr, offset, &max_load_factor_); + if (!make_copy_of_data) { + owns_data_ = false; + data_ = const_cast<T*>(reinterpret_cast<const T*>(ptr + offset)); + offset += sizeof(*data_) * num_buckets_; + } else { + AllocateStorage(num_buckets_); + // Write elements, not that this may not be safe for cross compilation if the elements are + // pointer sized. + for (size_t i = 0; i < num_buckets_; ++i) { + offset = ReadFromBytes(ptr, offset, &data_[i]); + } + } + // Caller responsible for aligning. + *read_count = offset; + } + + // Returns how large the table is after being written. If target is null, then no writing happens + // but the size is still returned. Target must be 8 byte aligned. + size_t WriteToMemory(uint8_t* ptr) { + size_t offset = 0; + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_elements_)); + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(num_buckets_)); + offset = WriteToBytes(ptr, offset, static_cast<uint64_t>(elements_until_expand_)); + offset = WriteToBytes(ptr, offset, min_load_factor_); + offset = WriteToBytes(ptr, offset, max_load_factor_); + // Write elements, not that this may not be safe for cross compilation if the elements are + // pointer sized. + for (size_t i = 0; i < num_buckets_; ++i) { + offset = WriteToBytes(ptr, offset, data_[i]); + } + // Caller responsible for aligning. + return offset; + } + ~HashSet() { DeallocateStorage(); } @@ -152,6 +205,7 @@ class HashSet { std::swap(elements_until_expand_, other.elements_until_expand_); std::swap(min_load_factor_, other.min_load_factor_); std::swap(max_load_factor_, other.max_load_factor_); + std::swap(owns_data_, other.owns_data_); return *this; } @@ -386,6 +440,7 @@ class HashSet { void AllocateStorage(size_t num_buckets) { num_buckets_ = num_buckets; data_ = allocfn_.allocate(num_buckets_); + owns_data_ = true; for (size_t i = 0; i < num_buckets_; ++i) { allocfn_.construct(allocfn_.address(data_[i])); emptyfn_.MakeEmpty(data_[i]); @@ -394,10 +449,13 @@ class HashSet { void DeallocateStorage() { if (num_buckets_ != 0) { - for (size_t i = 0; i < NumBuckets(); ++i) { - allocfn_.destroy(allocfn_.address(data_[i])); + if (owns_data_) { + for (size_t i = 0; i < NumBuckets(); ++i) { + allocfn_.destroy(allocfn_.address(data_[i])); + } + allocfn_.deallocate(data_, NumBuckets()); + owns_data_ = false; } - allocfn_.deallocate(data_, NumBuckets()); data_ = nullptr; num_buckets_ = 0; } @@ -418,18 +476,23 @@ class HashSet { // Expand / shrink the table to the new specified size. void Resize(size_t new_size) { DCHECK_GE(new_size, Size()); - T* old_data = data_; + T* const old_data = data_; size_t old_num_buckets = num_buckets_; // Reinsert all of the old elements. + const bool owned_data = owns_data_; AllocateStorage(new_size); for (size_t i = 0; i < old_num_buckets; ++i) { T& element = old_data[i]; if (!emptyfn_.IsEmpty(element)) { data_[FirstAvailableSlot(IndexForHash(hashfn_(element)))] = std::move(element); } - allocfn_.destroy(allocfn_.address(element)); + if (owned_data) { + allocfn_.destroy(allocfn_.address(element)); + } + } + if (owned_data) { + allocfn_.deallocate(old_data, old_num_buckets); } - allocfn_.deallocate(old_data, old_num_buckets); } ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const { @@ -439,6 +502,24 @@ class HashSet { return index; } + // Return new offset. + template <typename Elem> + static size_t WriteToBytes(uint8_t* ptr, size_t offset, Elem n) { + DCHECK_ALIGNED(ptr + offset, sizeof(n)); + if (ptr != nullptr) { + *reinterpret_cast<Elem*>(ptr + offset) = n; + } + return offset + sizeof(n); + } + + template <typename Elem> + static size_t ReadFromBytes(const uint8_t* ptr, size_t offset, Elem* out) { + DCHECK(ptr != nullptr); + DCHECK_ALIGNED(ptr + offset, sizeof(*out)); + *out = *reinterpret_cast<const Elem*>(ptr + offset); + return offset + sizeof(*out); + } + Alloc allocfn_; // Allocator function. HashFn hashfn_; // Hashing function. EmptyFn emptyfn_; // IsEmpty/SetEmpty function. @@ -446,6 +527,7 @@ class HashSet { size_t num_elements_; // Number of inserted elements. size_t num_buckets_; // Number of hash table buckets. size_t elements_until_expand_; // Maxmimum number of elements until we expand the table. + bool owns_data_; // If we own data_ and are responsible for freeing it. T* data_; // Backing storage. double min_load_factor_; double max_load_factor_; diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index f2be85e277..0ab148e37e 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -94,7 +94,6 @@ enum LockLevel { kMonitorListLock, kJniLoadLibraryLock, kThreadListLock, - kBreakpointInvokeLock, kAllocTrackerLock, kDeoptimizationLock, kProfilerLock, diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 429fa5bfe0..98fa897637 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -808,18 +808,11 @@ static void FreeDexFilesInHeap(std::priority_queue<DexFileAndClassPair>* heap) { } const OatFile* ClassLinker::GetBootOatFile() { - // To grab the boot oat, look at the dex files in the boot classpath. Any of those is fine, as - // they were all compiled into the same oat file. So grab the first one, which is guaranteed to - // exist if the boot class-path isn't empty. - if (boot_class_path_.empty()) { + gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); + if (image_space == nullptr) { return nullptr; } - const DexFile* boot_dex_file = boot_class_path_[0]; - // Is it from an oat file? - if (boot_dex_file->GetOatDexFile() != nullptr) { - return boot_dex_file->GetOatDexFile()->GetOatFile(); - } - return nullptr; + return image_space->GetOatFile(); } const OatFile* ClassLinker::GetPrimaryOatFile() { @@ -1055,7 +1048,7 @@ static void SanityCheckArtMethodPointerArray( static void SanityCheckObjectsCallback(mirror::Object* obj, void* arg ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK(obj != nullptr); - CHECK(obj->GetClass() != nullptr) << "Null class " << obj; + CHECK(obj->GetClass() != nullptr) << "Null class in object " << obj; CHECK(obj->GetClass()->GetClass() != nullptr) << "Null class class " << obj; if (obj->IsClass()) { auto klass = obj->AsClass(); @@ -4901,6 +4894,9 @@ bool ClassLinker::LinkInterfaceMethods(Thread* self, Handle<mirror::Class> klass if (interface_name_comparator.HasSameNameAndSignature( vtable_method_for_name_comparison)) { if (!vtable_method->IsAbstract() && !vtable_method->IsPublic()) { + // Must do EndAssertNoThreadSuspension before throw since the throw can cause + // allocations. + self->EndAssertNoThreadSuspension(old_cause); ThrowIllegalAccessError(klass.Get(), "Method '%s' implementing interface method '%s' is not public", PrettyMethod(vtable_method).c_str(), PrettyMethod(interface_method).c_str()); diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 24615e2a66..5918c10515 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -29,9 +29,11 @@ #include "dex_file-inl.h" #include "dex_instruction.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/space/large_object_space.h" #include "gc/space/space-inl.h" #include "handle_scope.h" +#include "jdwp/jdwp_priv.h" #include "jdwp/object_registry.h" #include "mirror/class.h" #include "mirror/class-inl.h" @@ -61,127 +63,30 @@ namespace art { // The key identifying the debugger to update instrumentation. static constexpr const char* kDbgInstrumentationKey = "Debugger"; -static const size_t kMaxAllocRecordStackDepth = 16; // Max 255. -static const size_t kDefaultNumAllocRecords = 64*1024; // Must be a power of 2. 2BE can hold 64k-1. - -// Limit alloc_record_count to the 2BE value that is the limit of the current protocol. +// Limit alloc_record_count to the 2BE value (64k-1) that is the limit of the current protocol. static uint16_t CappedAllocRecordCount(size_t alloc_record_count) { - if (alloc_record_count > 0xffff) { - return 0xffff; - } - return alloc_record_count; -} - -class AllocRecordStackTraceElement { - public: - AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) { - } - - int32_t LineNumber() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ArtMethod* method = Method(); - DCHECK(method != nullptr); - return method->GetLineNumFromDexPC(DexPc()); - } - - ArtMethod* Method() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - return soa.DecodeMethod(method_); - } - - void SetMethod(ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - method_ = soa.EncodeMethod(m); - } - - uint32_t DexPc() const { - return dex_pc_; - } - - void SetDexPc(uint32_t pc) { - dex_pc_ = pc; - } - - private: - jmethodID method_; - uint32_t dex_pc_; -}; - -jobject Dbg::TypeCache::Add(mirror::Class* t) { - ScopedObjectAccessUnchecked soa(Thread::Current()); - JNIEnv* const env = soa.Env(); - ScopedLocalRef<jobject> local_ref(soa.Env(), soa.AddLocalReference<jobject>(t)); - const int32_t hash_code = soa.Decode<mirror::Class*>(local_ref.get())->IdentityHashCode(); - auto range = objects_.equal_range(hash_code); - for (auto it = range.first; it != range.second; ++it) { - if (soa.Decode<mirror::Class*>(it->second) == soa.Decode<mirror::Class*>(local_ref.get())) { - // Found a matching weak global, return it. - return it->second; + size_t cap = 0xffff; +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the number of recent records. + const char* propertyName = "dalvik.vm.recentAllocMax"; + char recentAllocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, recentAllocMaxString, "") > 0) { + char* end; + size_t value = strtoul(recentAllocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << recentAllocMaxString + << "' --- invalid"; + } else { + cap = value; } } - const jobject weak_global = env->NewWeakGlobalRef(local_ref.get()); - objects_.insert(std::make_pair(hash_code, weak_global)); - return weak_global; -} - -void Dbg::TypeCache::Clear() { - JavaVMExt* vm = Runtime::Current()->GetJavaVM(); - Thread* self = Thread::Current(); - for (const auto& p : objects_) { - vm->DeleteWeakGlobalRef(self, p.second); +#endif + if (alloc_record_count > cap) { + return cap; } - objects_.clear(); + return alloc_record_count; } -class AllocRecord { - public: - AllocRecord() : type_(nullptr), byte_count_(0), thin_lock_id_(0) {} - - mirror::Class* Type() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - return down_cast<mirror::Class*>(Thread::Current()->DecodeJObject(type_)); - } - - void SetType(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_) { - type_ = Dbg::type_cache_.Add(t); - } - - size_t GetDepth() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - size_t depth = 0; - while (depth < kMaxAllocRecordStackDepth && stack_[depth].Method() != nullptr) { - ++depth; - } - return depth; - } - - size_t ByteCount() const { - return byte_count_; - } - - void SetByteCount(size_t count) { - byte_count_ = count; - } - - uint16_t ThinLockId() const { - return thin_lock_id_; - } - - void SetThinLockId(uint16_t id) { - thin_lock_id_ = id; - } - - AllocRecordStackTraceElement* StackElement(size_t index) { - DCHECK_LT(index, kMaxAllocRecordStackDepth); - return &stack_[index]; - } - - private: - jobject type_; // This is a weak global. - size_t byte_count_; - uint16_t thin_lock_id_; - // Unused entries have null method. - AllocRecordStackTraceElement stack_[kMaxAllocRecordStackDepth]; -}; - class Breakpoint { public: Breakpoint(ArtMethod* method, uint32_t dex_pc, @@ -382,13 +287,6 @@ bool Dbg::gDebuggerActive = false; bool Dbg::gDisposed = false; ObjectRegistry* Dbg::gRegistry = nullptr; -// Recent allocation tracking. -AllocRecord* Dbg::recent_allocation_records_ = nullptr; // TODO: CircularBuffer<AllocRecord> -size_t Dbg::alloc_record_max_ = 0; -size_t Dbg::alloc_record_head_ = 0; -size_t Dbg::alloc_record_count_ = 0; -Dbg::TypeCache Dbg::type_cache_; - // Deoptimization support. std::vector<DeoptimizationRequest> Dbg::deoptimization_requests_; size_t Dbg::full_deoptimization_event_count_ = 0; @@ -1761,6 +1659,51 @@ JDWP::JdwpTag Dbg::GetStaticFieldBasicTag(JDWP::FieldId field_id) { return BasicTagFromDescriptor(FromFieldId(field_id)->GetTypeDescriptor()); } +static JValue GetArtFieldValue(ArtField* f, mirror::Object* o) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + Primitive::Type fieldType = f->GetTypeAsPrimitiveType(); + JValue field_value; + switch (fieldType) { + case Primitive::kPrimBoolean: + field_value.SetZ(f->GetBoolean(o)); + return field_value; + + case Primitive::kPrimByte: + field_value.SetB(f->GetByte(o)); + return field_value; + + case Primitive::kPrimChar: + field_value.SetC(f->GetChar(o)); + return field_value; + + case Primitive::kPrimShort: + field_value.SetS(f->GetShort(o)); + return field_value; + + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + // Int and Float must be treated as 32-bit values in JDWP. + field_value.SetI(f->GetInt(o)); + return field_value; + + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + // Long and Double must be treated as 64-bit values in JDWP. + field_value.SetJ(f->GetLong(o)); + return field_value; + + case Primitive::kPrimNot: + field_value.SetL(f->GetObject(o)); + return field_value; + + case Primitive::kPrimVoid: + LOG(FATAL) << "Attempt to read from field of type 'void'"; + UNREACHABLE(); + } + LOG(FATAL) << "Attempt to read from field of unknown type"; + UNREACHABLE(); +} + static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::ObjectId object_id, JDWP::FieldId field_id, JDWP::ExpandBuf* pReply, bool is_static) @@ -1795,27 +1738,17 @@ static JDWP::JdwpError GetFieldValueImpl(JDWP::RefTypeId ref_type_id, JDWP::Obje } } else { if (f->IsStatic()) { - LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field " - << PrettyField(f); + LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.GetValues" + << " on static field " << PrettyField(f); } } if (f->IsStatic()) { o = f->GetDeclaringClass(); } + JValue field_value(GetArtFieldValue(f, o)); JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor()); - JValue field_value; - if (tag == JDWP::JT_VOID) { - LOG(FATAL) << "Unknown tag: " << tag; - } else if (!IsPrimitiveTag(tag)) { - field_value.SetL(f->GetObject(o)); - } else if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) { - field_value.SetJ(f->Get64(o)); - } else { - field_value.SetI(f->Get32(o)); - } Dbg::OutputJValue(tag, &field_value, pReply); - return JDWP::ERR_NONE; } @@ -1829,6 +1762,76 @@ JDWP::JdwpError Dbg::GetStaticFieldValue(JDWP::RefTypeId ref_type_id, JDWP::Fiel return GetFieldValueImpl(ref_type_id, 0, field_id, pReply, true); } +static JDWP::JdwpError SetArtFieldValue(ArtField* f, mirror::Object* o, uint64_t value, int width) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + Primitive::Type fieldType = f->GetTypeAsPrimitiveType(); + // Debugging only happens at runtime so we know we are not running in a transaction. + static constexpr bool kNoTransactionMode = false; + switch (fieldType) { + case Primitive::kPrimBoolean: + CHECK_EQ(width, 1); + f->SetBoolean<kNoTransactionMode>(o, static_cast<uint8_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimByte: + CHECK_EQ(width, 1); + f->SetByte<kNoTransactionMode>(o, static_cast<uint8_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimChar: + CHECK_EQ(width, 2); + f->SetChar<kNoTransactionMode>(o, static_cast<uint16_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimShort: + CHECK_EQ(width, 2); + f->SetShort<kNoTransactionMode>(o, static_cast<int16_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + CHECK_EQ(width, 4); + // Int and Float must be treated as 32-bit values in JDWP. + f->SetInt<kNoTransactionMode>(o, static_cast<int32_t>(value)); + return JDWP::ERR_NONE; + + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + CHECK_EQ(width, 8); + // Long and Double must be treated as 64-bit values in JDWP. + f->SetLong<kNoTransactionMode>(o, value); + return JDWP::ERR_NONE; + + case Primitive::kPrimNot: { + JDWP::JdwpError error; + mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error); + if (error != JDWP::ERR_NONE) { + return JDWP::ERR_INVALID_OBJECT; + } + if (v != nullptr) { + mirror::Class* field_type; + { + StackHandleScope<2> hs(Thread::Current()); + HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v)); + HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o)); + field_type = f->GetType<true>(); + } + if (!field_type->IsAssignableFrom(v->GetClass())) { + return JDWP::ERR_INVALID_OBJECT; + } + } + f->SetObject<kNoTransactionMode>(o, v); + return JDWP::ERR_NONE; + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Attempt to write to field of type 'void'"; + UNREACHABLE(); + } + LOG(FATAL) << "Attempt to write to field of unknown type"; + UNREACHABLE(); +} + static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId object_id, JDWP::FieldId field_id, uint64_t value, int width, bool is_static) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -1847,47 +1850,14 @@ static JDWP::JdwpError SetFieldValueImpl(JDWP::ObjectId object_id, JDWP::FieldId } } else { if (f->IsStatic()) { - LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues on static field " << PrettyField(f); + LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues" + << " on static field " << PrettyField(f); } } if (f->IsStatic()) { o = f->GetDeclaringClass(); } - - JDWP::JdwpTag tag = BasicTagFromDescriptor(f->GetTypeDescriptor()); - - if (IsPrimitiveTag(tag)) { - if (tag == JDWP::JT_DOUBLE || tag == JDWP::JT_LONG) { - CHECK_EQ(width, 8); - // Debugging can't use transactional mode (runtime only). - f->Set64<false>(o, value); - } else { - CHECK_LE(width, 4); - // Debugging can't use transactional mode (runtime only). - f->Set32<false>(o, value); - } - } else { - mirror::Object* v = Dbg::GetObjectRegistry()->Get<mirror::Object*>(value, &error); - if (error != JDWP::ERR_NONE) { - return JDWP::ERR_INVALID_OBJECT; - } - if (v != nullptr) { - mirror::Class* field_type; - { - StackHandleScope<2> hs(Thread::Current()); - HandleWrapper<mirror::Object> h_v(hs.NewHandleWrapper(&v)); - HandleWrapper<mirror::Object> h_o(hs.NewHandleWrapper(&o)); - field_type = f->GetType<true>(); - } - if (!field_type->IsAssignableFrom(v->GetClass())) { - return JDWP::ERR_INVALID_OBJECT; - } - } - // Debugging can't use transactional mode (runtime only). - f->SetObject<false>(o, v); - } - - return JDWP::ERR_NONE; + return SetArtFieldValue(f, o, value, width); } JDWP::JdwpError Dbg::SetFieldValue(JDWP::ObjectId object_id, JDWP::FieldId field_id, uint64_t value, @@ -3763,17 +3733,16 @@ static char JdwpTagToShortyChar(JDWP::JdwpTag tag) { } } -JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id, - JDWP::RefTypeId class_id, JDWP::MethodId method_id, - uint32_t arg_count, uint64_t* arg_values, - JDWP::JdwpTag* arg_types, uint32_t options, - JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, - JDWP::ObjectId* pExceptionId) { - ThreadList* thread_list = Runtime::Current()->GetThreadList(); +JDWP::JdwpError Dbg::PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id, + JDWP::ObjectId object_id, JDWP::RefTypeId class_id, + JDWP::MethodId method_id, uint32_t arg_count, + uint64_t arg_values[], JDWP::JdwpTag* arg_types, + uint32_t options) { + Thread* const self = Thread::Current(); + CHECK_EQ(self, GetDebugThread()) << "This must be called by the JDWP thread"; + ThreadList* thread_list = Runtime::Current()->GetThreadList(); Thread* targetThread = nullptr; - std::unique_ptr<DebugInvokeReq> req; - Thread* self = Thread::Current(); { ScopedObjectAccessUnchecked soa(self); JDWP::JdwpError error; @@ -3883,99 +3852,82 @@ JDWP::JdwpError Dbg::InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId objec } // Allocates a DebugInvokeReq. - req.reset(new (std::nothrow) DebugInvokeReq(receiver, c, m, options, arg_values, arg_count)); - if (req.get() == nullptr) { + DebugInvokeReq* req = new (std::nothrow) DebugInvokeReq(request_id, thread_id, receiver, c, m, + options, arg_values, arg_count); + if (req == nullptr) { LOG(ERROR) << "Failed to allocate DebugInvokeReq"; return JDWP::ERR_OUT_OF_MEMORY; } - // Attach the DebugInvokeReq to the target thread so it executes the method when - // it is resumed. Once the invocation completes, it will detach it and signal us - // before suspending itself. - targetThread->SetDebugInvokeReq(req.get()); + // Attaches the DebugInvokeReq to the target thread so it executes the method when + // it is resumed. Once the invocation completes, the target thread will delete it before + // suspending itself (see ThreadList::SuspendSelfForDebugger). + targetThread->SetDebugInvokeReq(req); } // The fact that we've released the thread list lock is a bit risky --- if the thread goes - // away we're sitting high and dry -- but we must release this before the ResumeAllThreads - // call, and it's unwise to hold it during WaitForSuspend. - - { - /* - * We change our (JDWP thread) status, which should be THREAD_RUNNING, - * so we can suspend for a GC if the invoke request causes us to - * run out of memory. It's also a good idea to change it before locking - * the invokeReq mutex, although that should never be held for long. - */ - self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSend); - - VLOG(jdwp) << " Transferring control to event thread"; - { - MutexLock mu(self, req->lock); - - if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) { - VLOG(jdwp) << " Resuming all threads"; - thread_list->UndoDebuggerSuspensions(); - } else { - VLOG(jdwp) << " Resuming event thread only"; - thread_list->Resume(targetThread, true); - } - - // The target thread is resumed but needs the JDWP token we're holding. - // We release it now and will acquire it again when the invocation is - // complete and the target thread suspends itself. - gJdwpState->ReleaseJdwpTokenForCommand(); - - // Wait for the request to finish executing. - while (targetThread->GetInvokeReq() != nullptr) { - req->cond.Wait(self); - } - } - VLOG(jdwp) << " Control has returned from event thread"; - - /* wait for thread to re-suspend itself */ - SuspendThread(thread_id, false /* request_suspension */); - - // Now the thread is suspended again, we can re-acquire the JDWP token. - gJdwpState->AcquireJdwpTokenForCommand(); - - self->TransitionFromSuspendedToRunnable(); - } + // away we're sitting high and dry -- but we must release this before the UndoDebuggerSuspensions + // call. - /* - * Suspend the threads. We waited for the target thread to suspend - * itself, so all we need to do is suspend the others. - * - * The SuspendAllForDebugger() call will double-suspend the event thread, - * so we want to resume the target thread once to keep the books straight. - */ if ((options & JDWP::INVOKE_SINGLE_THREADED) == 0) { - self->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension); - VLOG(jdwp) << " Suspending all threads"; - thread_list->SuspendAllForDebugger(); - self->TransitionFromSuspendedToRunnable(); - VLOG(jdwp) << " Resuming event thread to balance the count"; + VLOG(jdwp) << " Resuming all threads"; + thread_list->UndoDebuggerSuspensions(); + } else { + VLOG(jdwp) << " Resuming event thread only"; thread_list->Resume(targetThread, true); } - // Copy the result. - *pResultTag = req->result_tag; - *pResultValue = req->result_value; - *pExceptionId = req->exception; - return req->error; + return JDWP::ERR_NONE; } void Dbg::ExecuteMethod(DebugInvokeReq* pReq) { - ScopedObjectAccess soa(Thread::Current()); + Thread* const self = Thread::Current(); + CHECK_NE(self, GetDebugThread()) << "This must be called by the event thread"; + + ScopedObjectAccess soa(self); // We can be called while an exception is pending. We need // to preserve that across the method invocation. - StackHandleScope<3> hs(soa.Self()); - auto old_exception = hs.NewHandle<mirror::Throwable>(soa.Self()->GetException()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::Throwable> old_exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); + // Execute the method then sends reply to the debugger. + ExecuteMethodWithoutPendingException(soa, pReq); + + // If an exception was pending before the invoke, restore it now. + if (old_exception.Get() != nullptr) { + soa.Self()->SetException(old_exception.Get()); + } +} + +// Helper function: write a variable-width value into the output input buffer. +static void WriteValue(JDWP::ExpandBuf* pReply, int width, uint64_t value) { + switch (width) { + case 1: + expandBufAdd1(pReply, value); + break; + case 2: + expandBufAdd2BE(pReply, value); + break; + case 4: + expandBufAdd4BE(pReply, value); + break; + case 8: + expandBufAdd8BE(pReply, value); + break; + default: + LOG(FATAL) << width; + UNREACHABLE(); + } +} + +void Dbg::ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq) { + soa.Self()->AssertNoPendingException(); + // Translate the method through the vtable, unless the debugger wants to suppress it. - auto* m = pReq->method; - auto image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + ArtMethod* m = pReq->method; + size_t image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); if ((pReq->options & JDWP::INVOKE_NONVIRTUAL) == 0 && pReq->receiver.Read() != nullptr) { ArtMethod* actual_method = pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size); @@ -3992,39 +3944,133 @@ void Dbg::ExecuteMethod(DebugInvokeReq* pReq) { CHECK_EQ(sizeof(jvalue), sizeof(uint64_t)); + // Invoke the method. ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read())); JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m), - reinterpret_cast<jvalue*>(pReq->arg_values)); + reinterpret_cast<jvalue*>(pReq->arg_values.get())); - pReq->result_tag = BasicTagFromDescriptor(m->GetShorty()); - const bool is_object_result = (pReq->result_tag == JDWP::JT_OBJECT); + // Prepare JDWP ids for the reply. + JDWP::JdwpTag result_tag = BasicTagFromDescriptor(m->GetShorty()); + const bool is_object_result = (result_tag == JDWP::JT_OBJECT); + StackHandleScope<2> hs(soa.Self()); Handle<mirror::Object> object_result = hs.NewHandle(is_object_result ? result.GetL() : nullptr); Handle<mirror::Throwable> exception = hs.NewHandle(soa.Self()->GetException()); soa.Self()->ClearException(); - pReq->exception = gRegistry->Add(exception); - if (pReq->exception != 0) { + + if (!IsDebuggerActive()) { + // The debugger detached: we must not re-suspend threads. We also don't need to fill the reply + // because it won't be sent either. + return; + } + + JDWP::ObjectId exceptionObjectId = gRegistry->Add(exception); + uint64_t result_value = 0; + if (exceptionObjectId != 0) { VLOG(jdwp) << " JDWP invocation returning with exception=" << exception.Get() << " " << exception->Dump(); - pReq->result_value = 0; + result_value = 0; } else if (is_object_result) { - /* if no exception thrown, examine object result more closely */ + /* if no exception was thrown, examine object result more closely */ JDWP::JdwpTag new_tag = TagFromObject(soa, object_result.Get()); - if (new_tag != pReq->result_tag) { - VLOG(jdwp) << " JDWP promoted result from " << pReq->result_tag << " to " << new_tag; - pReq->result_tag = new_tag; + if (new_tag != result_tag) { + VLOG(jdwp) << " JDWP promoted result from " << result_tag << " to " << new_tag; + result_tag = new_tag; } // Register the object in the registry and reference its ObjectId. This ensures // GC safety and prevents from accessing stale reference if the object is moved. - pReq->result_value = gRegistry->Add(object_result.Get()); + result_value = gRegistry->Add(object_result.Get()); } else { // Primitive result. - DCHECK(IsPrimitiveTag(pReq->result_tag)); - pReq->result_value = result.GetJ(); + DCHECK(IsPrimitiveTag(result_tag)); + result_value = result.GetJ(); + } + const bool is_constructor = m->IsConstructor() && !m->IsStatic(); + if (is_constructor) { + // If we invoked a constructor (which actually returns void), return the receiver, + // unless we threw, in which case we return null. + result_tag = JDWP::JT_OBJECT; + if (exceptionObjectId == 0) { + // TODO we could keep the receiver ObjectId in the DebugInvokeReq to avoid looking into the + // object registry. + result_value = GetObjectRegistry()->Add(pReq->receiver.Read()); + } else { + result_value = 0; + } } - if (old_exception.Get() != nullptr) { - soa.Self()->SetException(old_exception.Get()); + // Suspend other threads if the invoke is not single-threaded. + if ((pReq->options & JDWP::INVOKE_SINGLE_THREADED) == 0) { + soa.Self()->TransitionFromRunnableToSuspended(kWaitingForDebuggerSuspension); + VLOG(jdwp) << " Suspending all threads"; + Runtime::Current()->GetThreadList()->SuspendAllForDebugger(); + soa.Self()->TransitionFromSuspendedToRunnable(); + } + + VLOG(jdwp) << " --> returned " << result_tag + << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", result_value, + exceptionObjectId); + + // Show detailed debug output. + if (result_tag == JDWP::JT_STRING && exceptionObjectId == 0) { + if (result_value != 0) { + if (VLOG_IS_ON(jdwp)) { + std::string result_string; + JDWP::JdwpError error = Dbg::StringToUtf8(result_value, &result_string); + CHECK_EQ(error, JDWP::ERR_NONE); + VLOG(jdwp) << " string '" << result_string << "'"; + } + } else { + VLOG(jdwp) << " string (null)"; + } + } + + // Attach the reply to DebugInvokeReq so it can be sent to the debugger when the event thread + // is ready to suspend. + BuildInvokeReply(pReq->reply, pReq->request_id, result_tag, result_value, exceptionObjectId); +} + +void Dbg::BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id, JDWP::JdwpTag result_tag, + uint64_t result_value, JDWP::ObjectId exception) { + // Make room for the JDWP header since we do not know the size of the reply yet. + JDWP::expandBufAddSpace(pReply, kJDWPHeaderLen); + + size_t width = GetTagWidth(result_tag); + JDWP::expandBufAdd1(pReply, result_tag); + if (width != 0) { + WriteValue(pReply, width, result_value); + } + JDWP::expandBufAdd1(pReply, JDWP::JT_OBJECT); + JDWP::expandBufAddObjectId(pReply, exception); + + // Now we know the size, we can complete the JDWP header. + uint8_t* buf = expandBufGetBuffer(pReply); + JDWP::Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReply)); + JDWP::Set4BE(buf + kJDWPHeaderIdOffset, request_id); + JDWP::Set1(buf + kJDWPHeaderFlagsOffset, kJDWPFlagReply); // flags + JDWP::Set2BE(buf + kJDWPHeaderErrorCodeOffset, JDWP::ERR_NONE); +} + +void Dbg::FinishInvokeMethod(DebugInvokeReq* pReq) { + CHECK_NE(Thread::Current(), GetDebugThread()) << "This must be called by the event thread"; + + JDWP::ExpandBuf* const pReply = pReq->reply; + CHECK(pReply != nullptr) << "No reply attached to DebugInvokeReq"; + + // We need to prevent other threads (including JDWP thread) from interacting with the debugger + // while we send the reply but are not yet suspended. The JDWP token will be released just before + // we suspend ourself again (see ThreadList::SuspendSelfForDebugger). + gJdwpState->AcquireJdwpTokenForEvent(pReq->thread_id); + + // Send the reply unless the debugger detached before the completion of the method. + if (IsDebuggerActive()) { + const size_t replyDataLength = expandBufGetLength(pReply) - kJDWPHeaderLen; + VLOG(jdwp) << StringPrintf("REPLY INVOKE id=0x%06x (length=%zu)", + pReq->request_id, replyDataLength); + + gJdwpState->SendRequest(pReply); + } else { + VLOG(jdwp) << "Not sending invoke reply because debugger detached"; } } @@ -4665,177 +4711,41 @@ void Dbg::DdmSendHeapSegments(bool native) { Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id); } -static size_t GetAllocTrackerMax() { -#ifdef HAVE_ANDROID_OS - // Check whether there's a system property overriding the number of records. - const char* propertyName = "dalvik.vm.allocTrackerMax"; - char allocRecordMaxString[PROPERTY_VALUE_MAX]; - if (property_get(propertyName, allocRecordMaxString, "") > 0) { - char* end; - size_t value = strtoul(allocRecordMaxString, &end, 10); - if (*end != '\0') { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- invalid"; - return kDefaultNumAllocRecords; - } - if (!IsPowerOfTwo(value)) { - LOG(ERROR) << "Ignoring " << propertyName << " '" << allocRecordMaxString - << "' --- not power of two"; - return kDefaultNumAllocRecords; - } - return value; - } -#endif - return kDefaultNumAllocRecords; -} - void Dbg::SetAllocTrackingEnabled(bool enable) { - Thread* self = Thread::Current(); - if (enable) { - { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ != nullptr) { - return; // Already enabled, bail. - } - alloc_record_max_ = GetAllocTrackerMax(); - LOG(INFO) << "Enabling alloc tracker (" << alloc_record_max_ << " entries of " - << kMaxAllocRecordStackDepth << " frames, taking " - << PrettySize(sizeof(AllocRecord) * alloc_record_max_) << ")"; - DCHECK_EQ(alloc_record_head_, 0U); - DCHECK_EQ(alloc_record_count_, 0U); - recent_allocation_records_ = new AllocRecord[alloc_record_max_]; - CHECK(recent_allocation_records_ != nullptr); - } - Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); - } else { - { - ScopedObjectAccess soa(self); // For type_cache_.Clear(); - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - return; // Already disabled, bail. - } - LOG(INFO) << "Disabling alloc tracker"; - delete[] recent_allocation_records_; - recent_allocation_records_ = nullptr; - alloc_record_head_ = 0; - alloc_record_count_ = 0; - type_cache_.Clear(); - } - // If an allocation comes in before we uninstrument, we will safely drop it on the floor. - Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); - } -} - -struct AllocRecordStackVisitor : public StackVisitor { - AllocRecordStackVisitor(Thread* thread, AllocRecord* record_in) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), - record(record_in), - depth(0) {} - - // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses - // annotalysis. - bool VisitFrame() NO_THREAD_SAFETY_ANALYSIS { - if (depth >= kMaxAllocRecordStackDepth) { - return false; - } - ArtMethod* m = GetMethod(); - if (!m->IsRuntimeMethod()) { - record->StackElement(depth)->SetMethod(m); - record->StackElement(depth)->SetDexPc(GetDexPc()); - ++depth; - } - return true; - } - - ~AllocRecordStackVisitor() { - // Clear out any unused stack trace elements. - for (; depth < kMaxAllocRecordStackDepth; ++depth) { - record->StackElement(depth)->SetMethod(nullptr); - record->StackElement(depth)->SetDexPc(0); - } - } - - AllocRecord* record; - size_t depth; -}; - -void Dbg::RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) { - MutexLock mu(self, *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { - // In the process of shutting down recording, bail. - return; - } - - // Advance and clip. - if (++alloc_record_head_ == alloc_record_max_) { - alloc_record_head_ = 0; - } - - // Fill in the basics. - AllocRecord* record = &recent_allocation_records_[alloc_record_head_]; - record->SetType(type); - record->SetByteCount(byte_count); - record->SetThinLockId(self->GetThreadId()); - - // Fill in the stack trace. - AllocRecordStackVisitor visitor(self, record); - visitor.WalkStack(); - - if (alloc_record_count_ < alloc_record_max_) { - ++alloc_record_count_; - } -} - -// Returns the index of the head element. -// -// We point at the most-recently-written record, so if alloc_record_count_ is 1 -// we want to use the current element. Take "head+1" and subtract count -// from it. -// -// We need to handle underflow in our circular buffer, so we add -// alloc_record_max_ and then mask it back down. -size_t Dbg::HeadIndex() { - return (Dbg::alloc_record_head_ + 1 + Dbg::alloc_record_max_ - Dbg::alloc_record_count_) & - (Dbg::alloc_record_max_ - 1); + gc::AllocRecordObjectMap::SetAllocTrackingEnabled(enable); } void Dbg::DumpRecentAllocations() { ScopedObjectAccess soa(Thread::Current()); MutexLock mu(soa.Self(), *Locks::alloc_tracker_lock_); - if (recent_allocation_records_ == nullptr) { + if (!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { LOG(INFO) << "Not recording tracked allocations"; return; } + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); - // "i" is the head of the list. We want to start at the end of the - // list and move forward to the tail. - size_t i = HeadIndex(); - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - LOG(INFO) << "Tracked allocations, (head=" << alloc_record_head_ << " count=" << count << ")"; - while (count--) { - AllocRecord* record = &recent_allocation_records_[i]; + LOG(INFO) << "Tracked allocations, (count=" << count << ")"; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; - LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->ThinLockId(), record->ByteCount()) - << PrettyClass(record->Type()); + LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount()) + << PrettyClass(it->first.Read()->GetClass()); - for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) { - AllocRecordStackTraceElement* stack_element = record->StackElement(stack_frame); - ArtMethod* m = stack_element->Method(); - if (m == nullptr) { - break; - } - LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element->LineNumber(); + for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) { + const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame); + ArtMethod* m = stack_element.GetMethod(); + LOG(INFO) << " " << PrettyMethod(m) << " line " << stack_element.ComputeLineNumber(); } // pause periodically to help logcat catch up if ((count % 5) == 0) { usleep(40000); } - - i = (i + 1) & (alloc_record_max_ - 1); } } @@ -4937,6 +4847,15 @@ jbyteArray Dbg::GetRecentAllocations() { std::vector<uint8_t> bytes; { MutexLock mu(self, *Locks::alloc_tracker_lock_); + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + // In case this method is called when allocation tracker is disabled, + // we should still send some data back. + gc::AllocRecordObjectMap dummy; + if (records == nullptr) { + CHECK(!Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()); + records = &dummy; + } + // // Part 1: generate string tables. // @@ -4944,26 +4863,23 @@ jbyteArray Dbg::GetRecentAllocations() { StringTable method_names; StringTable filenames; - const uint16_t capped_count = CappedAllocRecordCount(Dbg::alloc_record_count_); + const uint16_t capped_count = CappedAllocRecordCount(records->Size()); uint16_t count = capped_count; - size_t idx = HeadIndex(); - while (count--) { - AllocRecord* record = &recent_allocation_records_[idx]; + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { + const gc::AllocRecord* record = it->second; std::string temp; - class_names.Add(record->Type()->GetDescriptor(&temp)); - for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) { - ArtMethod* m = record->StackElement(i)->Method(); - if (m != nullptr) { - class_names.Add(m->GetDeclaringClassDescriptor()); - method_names.Add(m->GetName()); - filenames.Add(GetMethodSourceFile(m)); - } + class_names.Add(it->first.Read()->GetClass()->GetDescriptor(&temp)); + for (size_t i = 0, depth = record->GetDepth(); i < depth; i++) { + ArtMethod* m = record->StackElement(i).GetMethod(); + class_names.Add(m->GetDeclaringClassDescriptor()); + method_names.Add(m->GetName()); + filenames.Add(GetMethodSourceFile(m)); } - - idx = (idx + 1) & (alloc_record_max_ - 1); } - LOG(INFO) << "allocation records: " << capped_count; + LOG(INFO) << "recent allocation records: " << capped_count; + LOG(INFO) << "allocation records all objects: " << records->Size(); // // Part 2: Generate the output and store it in the buffer. @@ -4991,20 +4907,23 @@ jbyteArray Dbg::GetRecentAllocations() { JDWP::Append2BE(bytes, method_names.Size()); JDWP::Append2BE(bytes, filenames.Size()); - idx = HeadIndex(); std::string temp; - for (count = capped_count; count != 0; --count) { + count = capped_count; + // The last "count" number of allocation records in "records" are the most recent "count" number + // of allocations. Reverse iterate to get them. The most recent allocation is sent first. + for (auto it = records->RBegin(), end = records->REnd(); + count > 0 && it != end; count--, it++) { // For each entry: // (4b) total allocation size // (2b) thread id // (2b) allocated object's class name index // (1b) stack depth - AllocRecord* record = &recent_allocation_records_[idx]; + const gc::AllocRecord* record = it->second; size_t stack_depth = record->GetDepth(); size_t allocated_object_class_name_index = - class_names.IndexOf(record->Type()->GetDescriptor(&temp)); + class_names.IndexOf(it->first.Read()->GetClass()->GetDescriptor(&temp)); JDWP::Append4BE(bytes, record->ByteCount()); - JDWP::Append2BE(bytes, record->ThinLockId()); + JDWP::Append2BE(bytes, static_cast<uint16_t>(record->GetTid())); JDWP::Append2BE(bytes, allocated_object_class_name_index); JDWP::Append1BE(bytes, stack_depth); @@ -5014,16 +4933,15 @@ jbyteArray Dbg::GetRecentAllocations() { // (2b) method name // (2b) method source file // (2b) line number, clipped to 32767; -2 if native; -1 if no source - ArtMethod* m = record->StackElement(stack_frame)->Method(); + ArtMethod* m = record->StackElement(stack_frame).GetMethod(); size_t class_name_index = class_names.IndexOf(m->GetDeclaringClassDescriptor()); size_t method_name_index = method_names.IndexOf(m->GetName()); size_t file_name_index = filenames.IndexOf(GetMethodSourceFile(m)); JDWP::Append2BE(bytes, class_name_index); JDWP::Append2BE(bytes, method_name_index); JDWP::Append2BE(bytes, file_name_index); - JDWP::Append2BE(bytes, record->StackElement(stack_frame)->LineNumber()); + JDWP::Append2BE(bytes, record->StackElement(stack_frame).ComputeLineNumber()); } - idx = (idx + 1) & (alloc_record_max_ - 1); } // (xb) class name strings diff --git a/runtime/debugger.h b/runtime/debugger.h index 7c586a4ff9..fd7d46c37e 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -23,7 +23,6 @@ #include <pthread.h> -#include <map> #include <set> #include <string> #include <vector> @@ -32,7 +31,6 @@ #include "jdwp/jdwp.h" #include "jni.h" #include "jvalue.h" -#include "object_callbacks.h" #include "thread_state.h" namespace art { @@ -41,10 +39,10 @@ class Class; class Object; class Throwable; } // namespace mirror -class AllocRecord; class ArtField; class ArtMethod; class ObjectRegistry; +class ScopedObjectAccess; class ScopedObjectAccessUnchecked; class StackVisitor; class Thread; @@ -53,33 +51,32 @@ class Thread; * Invoke-during-breakpoint support. */ struct DebugInvokeReq { - DebugInvokeReq(mirror::Object* invoke_receiver, mirror::Class* invoke_class, + DebugInvokeReq(uint32_t invoke_request_id, JDWP::ObjectId invoke_thread_id, + mirror::Object* invoke_receiver, mirror::Class* invoke_class, ArtMethod* invoke_method, uint32_t invoke_options, - uint64_t* args, uint32_t args_count) - : receiver(invoke_receiver), klass(invoke_class), method(invoke_method), - arg_count(args_count), arg_values(args), options(invoke_options), - error(JDWP::ERR_NONE), result_tag(JDWP::JT_VOID), result_value(0), exception(0), - lock("a DebugInvokeReq lock", kBreakpointInvokeLock), - cond("a DebugInvokeReq condition variable", lock) { + uint64_t args[], uint32_t args_count) + : request_id(invoke_request_id), thread_id(invoke_thread_id), receiver(invoke_receiver), + klass(invoke_class), method(invoke_method), arg_count(args_count), arg_values(args), + options(invoke_options), reply(JDWP::expandBufAlloc()) { } - /* request */ - GcRoot<mirror::Object> receiver; // not used for ClassType.InvokeMethod + ~DebugInvokeReq() { + JDWP::expandBufFree(reply); + } + + // Request + const uint32_t request_id; + const JDWP::ObjectId thread_id; + GcRoot<mirror::Object> receiver; // not used for ClassType.InvokeMethod. GcRoot<mirror::Class> klass; - ArtMethod* method; + ArtMethod* const method; const uint32_t arg_count; - uint64_t* const arg_values; // will be null if arg_count_ == 0 + std::unique_ptr<uint64_t[]> arg_values; // will be null if arg_count_ == 0. We take ownership + // of this array so we must delete it upon destruction. const uint32_t options; - /* result */ - JDWP::JdwpError error; - JDWP::JdwpTag result_tag; - uint64_t result_value; // either a primitive value or an ObjectId - JDWP::ObjectId exception; - - /* condition variable to wait on while the method executes */ - Mutex lock DEFAULT_MUTEX_ACQUIRED_AFTER; - ConditionVariable cond GUARDED_BY(lock); + // Reply + JDWP::ExpandBuf* const reply; void VisitRoots(RootVisitor* visitor, const RootInfo& root_info) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -202,19 +199,6 @@ std::ostream& operator<<(std::ostream& os, const DeoptimizationRequest::Kind& rh class Dbg { public: - class TypeCache { - public: - // Returns a weak global for the input type. Deduplicates. - jobject Add(mirror::Class* t) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - // Clears the type cache and deletes all the weak global refs. - void Clear() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_, - Locks::alloc_tracker_lock_); - - private: - std::multimap<int32_t, jobject> objects_; - }; - static void SetJdwpAllowed(bool allowed); static void StartJdwp(); @@ -621,19 +605,39 @@ class Dbg { LOCKS_EXCLUDED(Locks::thread_list_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - // Invoke support for commands ClassType.InvokeMethod, ClassType.NewInstance and - // ObjectReference.InvokeMethod. - static JDWP::JdwpError InvokeMethod(JDWP::ObjectId thread_id, JDWP::ObjectId object_id, - JDWP::RefTypeId class_id, JDWP::MethodId method_id, - uint32_t arg_count, uint64_t* arg_values, - JDWP::JdwpTag* arg_types, uint32_t options, - JDWP::JdwpTag* pResultTag, uint64_t* pResultValue, - JDWP::ObjectId* pExceptObj) + /* + * Invoke support + */ + + // Called by the JDWP thread to prepare invocation in the event thread (suspended on an event). + // If the information sent by the debugger is incorrect, it will send a reply with the + // appropriate error code. Otherwise, it will attach a DebugInvokeReq object to the event thread + // and resume it (and possibly other threads depending on the invoke options). + // Unlike other commands, the JDWP thread will not send the reply to the debugger (see + // JdwpState::ProcessRequest). The reply will be sent by the event thread itself after method + // invocation completes (see FinishInvokeMethod). This is required to allow the JDWP thread to + // process incoming commands from the debugger while the invocation is still in progress in the + // event thread, especially if it gets suspended by a debug event occurring in another thread. + static JDWP::JdwpError PrepareInvokeMethod(uint32_t request_id, JDWP::ObjectId thread_id, + JDWP::ObjectId object_id, JDWP::RefTypeId class_id, + JDWP::MethodId method_id, uint32_t arg_count, + uint64_t arg_values[], JDWP::JdwpTag* arg_types, + uint32_t options) LOCKS_EXCLUDED(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Called by the event thread to execute a method prepared by the JDWP thread in the given + // DebugInvokeReq object. Once the invocation completes, the event thread attaches a reply + // to that DebugInvokeReq object so it can be sent to the debugger only when the event thread + // is ready to suspend (see FinishInvokeMethod). static void ExecuteMethod(DebugInvokeReq* pReq); + // Called by the event thread to send the reply of the invoke (created in ExecuteMethod) + // before suspending itself. This is to ensure the thread is ready to suspend before the + // debugger receives the reply. + static void FinishInvokeMethod(DebugInvokeReq* pReq); + /* * DDM support. */ @@ -655,19 +659,12 @@ class Dbg { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); /* - * Recent allocation tracking support. + * Allocation tracking support. */ - static void RecordAllocation(Thread* self, mirror::Class* type, size_t byte_count) - LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); - static bool IsAllocTrackingEnabled() { - return recent_allocation_records_ != nullptr; - } static jbyteArray GetRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static size_t HeadIndex() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); static void DumpRecentAllocations() LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); enum HpifWhen { @@ -717,6 +714,14 @@ class Dbg { } private: + static void ExecuteMethodWithoutPendingException(ScopedObjectAccess& soa, DebugInvokeReq* pReq) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + static void BuildInvokeReply(JDWP::ExpandBuf* pReply, uint32_t request_id, + JDWP::JdwpTag result_tag, uint64_t result_value, + JDWP::ObjectId exception) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + static JDWP::JdwpError GetLocalValue(const StackVisitor& visitor, ScopedObjectAccessUnchecked& soa, int slot, JDWP::JdwpTag tag, uint8_t* buf, size_t width) @@ -755,11 +760,6 @@ class Dbg { static bool IsForcedInterpreterNeededForUpcallImpl(Thread* thread, ArtMethod* m) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - static AllocRecord* recent_allocation_records_ PT_GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_head_ GUARDED_BY(Locks::alloc_tracker_lock_); - static size_t alloc_record_count_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Indicates whether the debugger is making requests. static bool gDebuggerActive; @@ -784,9 +784,6 @@ class Dbg { static size_t* GetReferenceCounterForEvent(uint32_t instrumentation_event); - // Weak global type cache, TODO improve this. - static TypeCache type_cache_ GUARDED_BY(Locks::alloc_tracker_lock_); - // Instrumentation event reference counters. // TODO we could use an array instead of having all these dedicated counters. Instrumentation // events are bits of a mask so we could convert them to array index. @@ -798,7 +795,6 @@ class Dbg { static size_t exception_catch_event_ref_count_ GUARDED_BY(Locks::deoptimization_lock_); static uint32_t instrumentation_events_ GUARDED_BY(Locks::mutator_lock_); - friend class AllocRecord; // For type_cache_ with proper annotalysis. DISALLOW_COPY_AND_ASSIGN(Dbg); }; diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index a66c38e0fe..5fa58f754f 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -465,7 +465,9 @@ bool DexFileVerifier::CheckClassDataItemField(uint32_t idx, uint32_t access_flag } bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, - uint32_t code_offset, bool expect_direct) { + uint32_t code_offset, + std::unordered_set<uint32_t>& direct_method_indexes, + bool expect_direct) { if (!CheckIndex(idx, header_->method_ids_size_, "class_data_item method_idx")) { return false; } @@ -480,6 +482,13 @@ bool DexFileVerifier::CheckClassDataItemMethod(uint32_t idx, uint32_t access_fla return false; } + if (expect_direct) { + direct_method_indexes.insert(idx); + } else if (direct_method_indexes.find(idx) != direct_method_indexes.end()) { + ErrorStringPrintf("Found virtual method with same index as direct method: %d", idx); + return false; + } + constexpr uint32_t access_method_mask = kAccJavaFlagsMask | kAccConstructor | kAccDeclaredSynchronized; if (UNLIKELY(((access_flags & ~access_method_mask) != 0) || @@ -682,6 +691,7 @@ bool DexFileVerifier::CheckEncodedAnnotation() { bool DexFileVerifier::CheckIntraClassDataItem() { ClassDataItemIterator it(*dex_file_, ptr_); + std::unordered_set<uint32_t> direct_method_indexes; // These calls use the raw access flags to check whether the whole dex field is valid. @@ -697,13 +707,13 @@ bool DexFileVerifier::CheckIntraClassDataItem() { } for (; it.HasNextDirectMethod(); it.Next()) { if (!CheckClassDataItemMethod(it.GetMemberIndex(), it.GetRawMemberAccessFlags(), - it.GetMethodCodeItemOffset(), true)) { + it.GetMethodCodeItemOffset(), direct_method_indexes, true)) { return false; } } for (; it.HasNextVirtualMethod(); it.Next()) { if (!CheckClassDataItemMethod(it.GetMemberIndex(), it.GetRawMemberAccessFlags(), - it.GetMethodCodeItemOffset(), false)) { + it.GetMethodCodeItemOffset(), direct_method_indexes, false)) { return false; } } diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h index 877dfc2efd..ccc40d4442 100644 --- a/runtime/dex_file_verifier.h +++ b/runtime/dex_file_verifier.h @@ -59,6 +59,7 @@ class DexFileVerifier { uint32_t* handler_offsets, uint32_t handlers_size); bool CheckClassDataItemField(uint32_t idx, uint32_t access_flags, bool expect_static); bool CheckClassDataItemMethod(uint32_t idx, uint32_t access_flags, uint32_t code_offset, + std::unordered_set<uint32_t>& direct_method_indexes, bool expect_direct); bool CheckPadding(size_t offset, uint32_t aligned_offset); bool CheckEncodedValue(); diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index b0cbd02880..de925b7e8c 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -39,9 +39,12 @@ namespace art { inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, - uint32_t method_index, - InvokeType invoke_type) + const InlineInfo& inline_info, + uint8_t inlining_depth) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + uint32_t method_index = inline_info.GetMethodIndexAtDepth(inlining_depth); + InvokeType invoke_type = static_cast<InvokeType>( + inline_info.GetInvokeTypeAtDepth(inlining_depth)); ArtMethod* caller = outer_method->GetDexCacheResolvedMethod(method_index, sizeof(void*)); if (!caller->IsRuntimeMethod()) { return caller; @@ -51,10 +54,19 @@ inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method, // the stub that will then update the dex cache. Therefore, we need to do the // resolution ourselves. + // We first find the class loader of our caller. If it is the outer method, we can directly + // use its class loader. Otherwise, we also need to resolve our caller. StackHandleScope<2> hs(Thread::Current()); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle(outer_method->GetClassLoader())); + MutableHandle<mirror::ClassLoader> class_loader(hs.NewHandle<mirror::Class>(nullptr)); Handle<mirror::DexCache> dex_cache(hs.NewHandle(outer_method->GetDexCache())); + if (inlining_depth == 0) { + class_loader.Assign(outer_method->GetClassLoader()); + } else { + caller = GetResolvedMethod(outer_method, inline_info, inlining_depth - 1); + class_loader.Assign(caller->GetClassLoader()); + } + return class_linker->ResolveMethod( *outer_method->GetDexFile(), method_index, dex_cache, class_loader, nullptr, invoke_type); } @@ -82,10 +94,7 @@ inline ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, DCHECK(stack_map.IsValid()); if (stack_map.HasInlineInfo(encoding)) { InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - uint32_t method_index = inline_info.GetMethodIndexAtDepth(inline_info.GetDepth() - 1); - InvokeType invoke_type = static_cast<InvokeType>( - inline_info.GetInvokeTypeAtDepth(inline_info.GetDepth() - 1)); - caller = GetResolvedMethod(outer_method, method_index, invoke_type); + caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); } } diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc index 4a352ddf9a..762f061ce2 100644 --- a/runtime/fault_handler.cc +++ b/runtime/fault_handler.cc @@ -72,12 +72,10 @@ namespace art { // Static fault manger object accessed by signal handler. FaultManager fault_manager; -extern "C" { -void art_sigsegv_fault() { +extern "C" __attribute__((visibility("default"))) void art_sigsegv_fault() { // Set a breakpoint here to be informed when a SIGSEGV is unhandled by ART. VLOG(signals)<< "Caught unknown SIGSEGV in ART fault handler - chaining to next handler."; } -} // Signal handler called on SIGSEGV. static void art_fault_handler(int sig, siginfo_t* info, void* context) { diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h index c16f5d35e0..006d2c7d30 100644 --- a/runtime/gc/accounting/space_bitmap-inl.h +++ b/runtime/gc/accounting/space_bitmap-inl.h @@ -159,6 +159,7 @@ template<size_t kAlignment> template<bool kSetBit> inline bool SpaceBitmap<kAlignment>::Modify(const mirror::Object* obj) { uintptr_t addr = reinterpret_cast<uintptr_t>(obj); DCHECK_GE(addr, heap_begin_); + DCHECK(HasAddress(obj)) << obj; const uintptr_t offset = addr - heap_begin_; const size_t index = OffsetToIndex(offset); const uintptr_t mask = OffsetToMask(offset); diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index fe2b284fcb..6546eb4245 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -35,6 +35,11 @@ size_t SpaceBitmap<kAlignment>::ComputeBitmapSize(uint64_t capacity) { } template<size_t kAlignment> +size_t SpaceBitmap<kAlignment>::ComputeHeapSize(uint64_t bitmap_bytes) { + return bitmap_bytes * kBitsPerByte * kAlignment; +} + +template<size_t kAlignment> SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap( const std::string& name, MemMap* mem_map, uint8_t* heap_begin, size_t heap_capacity) { CHECK(mem_map != nullptr); diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index d6b3ed4f26..35faff3774 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -188,15 +188,16 @@ class SpaceBitmap { std::string Dump() const; + // Helper function for computing bitmap size based on a 64 bit capacity. + static size_t ComputeBitmapSize(uint64_t capacity); + static size_t ComputeHeapSize(uint64_t bitmap_bytes); + private: // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1, // however, we document that this is expected on heap_end_ SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size, const void* heap_begin); - // Helper function for computing bitmap size based on a 64 bit capacity. - static size_t ComputeBitmapSize(uint64_t capacity); - template<bool kSetBit> bool Modify(const mirror::Object* obj); diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc new file mode 100644 index 0000000000..a385363428 --- /dev/null +++ b/runtime/gc/allocation_record.cc @@ -0,0 +1,206 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "allocation_record.h" + +#include "art_method-inl.h" +#include "base/stl_util.h" +#include "stack.h" + +#ifdef HAVE_ANDROID_OS +#include "cutils/properties.h" +#endif + +namespace art { +namespace gc { + +int32_t AllocRecordStackTraceElement::ComputeLineNumber() const { + DCHECK(method_ != nullptr); + return method_->GetLineNumFromDexPC(dex_pc_); +} + +void AllocRecordObjectMap::SetProperties() { +#ifdef HAVE_ANDROID_OS + // Check whether there's a system property overriding the max number of records. + const char* propertyName = "dalvik.vm.allocTrackerMax"; + char allocMaxString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, allocMaxString, "") > 0) { + char* end; + size_t value = strtoul(allocMaxString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << allocMaxString + << "' --- invalid"; + } else { + alloc_record_max_ = value; + } + } + // Check whether there's a system property overriding the max depth of stack trace. + propertyName = "dalvik.vm.allocStackDepth"; + char stackDepthString[PROPERTY_VALUE_MAX]; + if (property_get(propertyName, stackDepthString, "") > 0) { + char* end; + size_t value = strtoul(stackDepthString, &end, 10); + if (*end != '\0') { + LOG(ERROR) << "Ignoring " << propertyName << " '" << stackDepthString + << "' --- invalid"; + } else { + max_stack_depth_ = value; + } + } +#endif +} + +AllocRecordObjectMap::~AllocRecordObjectMap() { + STLDeleteValues(&entries_); +} + +void AllocRecordObjectMap::SweepAllocationRecords(IsMarkedCallback* callback, void* arg) { + VLOG(heap) << "Start SweepAllocationRecords()"; + size_t count_deleted = 0, count_moved = 0; + for (auto it = entries_.begin(), end = entries_.end(); it != end;) { + // This does not need a read barrier because this is called by GC. + mirror::Object* old_object = it->first.Read<kWithoutReadBarrier>(); + AllocRecord* record = it->second; + mirror::Object* new_object = callback(old_object, arg); + if (new_object == nullptr) { + delete record; + it = entries_.erase(it); + ++count_deleted; + } else { + if (old_object != new_object) { + it->first = GcRoot<mirror::Object>(new_object); + ++count_moved; + } + ++it; + } + } + VLOG(heap) << "Deleted " << count_deleted << " allocation records"; + VLOG(heap) << "Updated " << count_moved << " allocation records"; +} + +struct AllocRecordStackVisitor : public StackVisitor { + AllocRecordStackVisitor(Thread* thread, AllocRecordStackTrace* trace_in, size_t max) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + trace(trace_in), + depth(0), + max_depth(max) {} + + // TODO: Enable annotalysis. We know lock is held in constructor, but abstraction confuses + // annotalysis. + bool VisitFrame() OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + if (depth >= max_depth) { + return false; + } + ArtMethod* m = GetMethod(); + if (!m->IsRuntimeMethod()) { + trace->SetStackElementAt(depth, m, GetDexPc()); + ++depth; + } + return true; + } + + ~AllocRecordStackVisitor() { + trace->SetDepth(depth); + } + + AllocRecordStackTrace* trace; + size_t depth; + const size_t max_depth; +}; + +void AllocRecordObjectMap::SetAllocTrackingEnabled(bool enable) { + Thread* self = Thread::Current(); + Heap* heap = Runtime::Current()->GetHeap(); + if (enable) { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (heap->IsAllocTrackingEnabled()) { + return; // Already enabled, bail. + } + AllocRecordObjectMap* records = new AllocRecordObjectMap(); + CHECK(records != nullptr); + records->SetProperties(); + std::string self_name; + self->GetThreadName(self_name); + if (self_name == "JDWP") { + records->alloc_ddm_thread_id_ = self->GetTid(); + } + size_t sz = sizeof(AllocRecordStackTraceElement) * records->max_stack_depth_ + + sizeof(AllocRecord) + sizeof(AllocRecordStackTrace); + LOG(INFO) << "Enabling alloc tracker (" << records->alloc_record_max_ << " entries of " + << records->max_stack_depth_ << " frames, taking up to " + << PrettySize(sz * records->alloc_record_max_) << ")"; + heap->SetAllocationRecords(records); + heap->SetAllocTrackingEnabled(true); + } + Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints(); + } else { + { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + if (!heap->IsAllocTrackingEnabled()) { + return; // Already disabled, bail. + } + heap->SetAllocTrackingEnabled(false); + LOG(INFO) << "Disabling alloc tracker"; + heap->SetAllocationRecords(nullptr); + } + // If an allocation comes in before we uninstrument, we will safely drop it on the floor. + Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints(); + } +} + +void AllocRecordObjectMap::RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) { + MutexLock mu(self, *Locks::alloc_tracker_lock_); + Heap* heap = Runtime::Current()->GetHeap(); + if (!heap->IsAllocTrackingEnabled()) { + // In the process of shutting down recording, bail. + return; + } + + AllocRecordObjectMap* records = heap->GetAllocationRecords(); + DCHECK(records != nullptr); + + // Do not record for DDM thread + if (records->alloc_ddm_thread_id_ == self->GetTid()) { + return; + } + + DCHECK_LE(records->Size(), records->alloc_record_max_); + + // Remove oldest record. + if (records->Size() == records->alloc_record_max_) { + records->RemoveOldest(); + } + + // Get stack trace. + const size_t max_depth = records->max_stack_depth_; + AllocRecordStackTrace* trace = new AllocRecordStackTrace(self->GetTid(), max_depth); + // add scope to make "visitor" destroyed promptly, in order to set the trace->depth_ + { + AllocRecordStackVisitor visitor(self, trace, max_depth); + visitor.WalkStack(); + } + + // Fill in the basics. + AllocRecord* record = new AllocRecord(byte_count, trace); + + records->Put(obj, record); + DCHECK_LE(records->Size(), records->alloc_record_max_); +} + +} // namespace gc +} // namespace art diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h new file mode 100644 index 0000000000..45b3406cea --- /dev/null +++ b/runtime/gc/allocation_record.h @@ -0,0 +1,271 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_GC_ALLOCATION_RECORD_H_ +#define ART_RUNTIME_GC_ALLOCATION_RECORD_H_ + +#include <list> + +#include "base/mutex.h" +#include "object_callbacks.h" +#include "gc_root.h" + +namespace art { + +class ArtMethod; +class Thread; + +namespace mirror { + class Class; + class Object; +} + +namespace gc { + +class AllocRecordStackTraceElement { + public: + AllocRecordStackTraceElement() : method_(nullptr), dex_pc_(0) {} + + int32_t ComputeLineNumber() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + ArtMethod* GetMethod() const { + return method_; + } + + void SetMethod(ArtMethod* m) { + method_ = m; + } + + uint32_t GetDexPc() const { + return dex_pc_; + } + + void SetDexPc(uint32_t pc) { + dex_pc_ = pc; + } + + bool operator==(const AllocRecordStackTraceElement& other) const { + if (this == &other) return true; + return method_ == other.method_ && dex_pc_ == other.dex_pc_; + } + + private: + ArtMethod* method_; + uint32_t dex_pc_; +}; + +class AllocRecordStackTrace { + public: + static constexpr size_t kHashMultiplier = 17; + + AllocRecordStackTrace(pid_t tid, size_t max_depth) + : tid_(tid), depth_(0), stack_(new AllocRecordStackTraceElement[max_depth]) {} + + ~AllocRecordStackTrace() { + delete[] stack_; + } + + pid_t GetTid() const { + return tid_; + } + + size_t GetDepth() const { + return depth_; + } + + void SetDepth(size_t depth) { + depth_ = depth; + } + + const AllocRecordStackTraceElement& GetStackElement(size_t index) const { + DCHECK_LT(index, depth_); + return stack_[index]; + } + + void SetStackElementAt(size_t index, ArtMethod* m, uint32_t dex_pc) { + stack_[index].SetMethod(m); + stack_[index].SetDexPc(dex_pc); + } + + bool operator==(const AllocRecordStackTrace& other) const { + if (this == &other) return true; + if (depth_ != other.depth_) return false; + for (size_t i = 0; i < depth_; ++i) { + if (!(stack_[i] == other.stack_[i])) return false; + } + return true; + } + + private: + const pid_t tid_; + size_t depth_; + AllocRecordStackTraceElement* const stack_; +}; + +struct HashAllocRecordTypes { + size_t operator()(const AllocRecordStackTraceElement& r) const { + return std::hash<void*>()(reinterpret_cast<void*>(r.GetMethod())) * + AllocRecordStackTrace::kHashMultiplier + std::hash<uint32_t>()(r.GetDexPc()); + } + + size_t operator()(const AllocRecordStackTrace& r) const { + size_t depth = r.GetDepth(); + size_t result = r.GetTid() * AllocRecordStackTrace::kHashMultiplier + depth; + for (size_t i = 0; i < depth; ++i) { + result = result * AllocRecordStackTrace::kHashMultiplier + (*this)(r.GetStackElement(i)); + } + return result; + } +}; + +template <typename T> struct HashAllocRecordTypesPtr { + size_t operator()(const T* r) const { + if (r == nullptr) return 0; + return HashAllocRecordTypes()(*r); + } +}; + +template <typename T> struct EqAllocRecordTypesPtr { + bool operator()(const T* r1, const T* r2) const { + if (r1 == r2) return true; + if (r1 == nullptr || r2 == nullptr) return false; + return *r1 == *r2; + } +}; + +class AllocRecord { + public: + // All instances of AllocRecord should be managed by an instance of AllocRecordObjectMap. + AllocRecord(size_t count, AllocRecordStackTrace* trace) + : byte_count_(count), trace_(trace) {} + + ~AllocRecord() { + delete trace_; + } + + size_t GetDepth() const { + return trace_->GetDepth(); + } + + const AllocRecordStackTrace* GetStackTrace() const { + return trace_; + } + + size_t ByteCount() const { + return byte_count_; + } + + pid_t GetTid() const { + return trace_->GetTid(); + } + + const AllocRecordStackTraceElement& StackElement(size_t index) const { + return trace_->GetStackElement(index); + } + + private: + const size_t byte_count_; + // TODO: Currently trace_ is like a std::unique_ptr, + // but in future with deduplication it could be a std::shared_ptr. + const AllocRecordStackTrace* const trace_; +}; + +class AllocRecordObjectMap { + public: + // Since the entries contain weak roots, they need a read barrier. Do not directly access + // the mirror::Object pointers in it. Use functions that contain read barriers. + // No need for "const AllocRecord*" in the list, because all fields of AllocRecord are const. + typedef std::list<std::pair<GcRoot<mirror::Object>, AllocRecord*>> EntryList; + + // "static" because it is part of double-checked locking. It needs to check a bool first, + // in order to make sure the AllocRecordObjectMap object is not null. + static void RecordAllocation(Thread* self, mirror::Object* obj, size_t byte_count) + LOCKS_EXCLUDED(Locks::alloc_tracker_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + static void SetAllocTrackingEnabled(bool enabled) LOCKS_EXCLUDED(Locks::alloc_tracker_lock_); + + AllocRecordObjectMap() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) + : alloc_record_max_(kDefaultNumAllocRecords), + max_stack_depth_(kDefaultAllocStackDepth), + alloc_ddm_thread_id_(0) {} + + ~AllocRecordObjectMap(); + + void Put(mirror::Object* obj, AllocRecord* record) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + entries_.emplace_back(GcRoot<mirror::Object>(obj), record); + } + + size_t Size() const SHARED_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.size(); + } + + void SweepAllocationRecords(IsMarkedCallback* callback, void* arg) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void RemoveOldest() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + DCHECK(!entries_.empty()); + delete entries_.front().second; + entries_.pop_front(); + } + + // TODO: Is there a better way to hide the entries_'s type? + EntryList::iterator Begin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.begin(); + } + + EntryList::iterator End() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.end(); + } + + EntryList::reverse_iterator RBegin() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rbegin(); + } + + EntryList::reverse_iterator REnd() + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return entries_.rend(); + } + + private: + static constexpr size_t kDefaultNumAllocRecords = 512 * 1024; + static constexpr size_t kDefaultAllocStackDepth = 4; + size_t alloc_record_max_ GUARDED_BY(Locks::alloc_tracker_lock_); + // The implementation always allocates max_stack_depth_ number of frames for each stack trace. + // As long as the max depth is not very large, this is not a waste of memory since most stack + // traces will fill up the max depth number of the frames. + size_t max_stack_depth_ GUARDED_BY(Locks::alloc_tracker_lock_); + pid_t alloc_ddm_thread_id_ GUARDED_BY(Locks::alloc_tracker_lock_); + EntryList entries_ GUARDED_BY(Locks::alloc_tracker_lock_); + + void SetProperties() EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); +}; + +} // namespace gc +} // namespace art +#endif // ART_RUNTIME_GC_ALLOCATION_RECORD_H_ diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 658390dd2d..6984c1624f 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1002,97 +1002,167 @@ void ConcurrentCopying::AssertToSpaceInvariant(mirror::Object* obj, MemberOffset } else if (region_space_->IsInFromSpace(ref)) { // Not OK. Do extra logging. if (obj != nullptr) { - if (kUseBakerReadBarrier) { - LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj) - << " holder rb_ptr=" << obj->GetReadBarrierPointer(); - } else { - LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj); - } - if (region_space_->IsInFromSpace(obj)) { - LOG(INFO) << "holder is in the from-space."; - } else if (region_space_->IsInToSpace(obj)) { - LOG(INFO) << "holder is in the to-space."; - } else if (region_space_->IsInUnevacFromSpace(obj)) { - LOG(INFO) << "holder is in the unevac from-space."; - if (region_space_bitmap_->Test(obj)) { - LOG(INFO) << "holder is marked in the region space bitmap."; - } else { - LOG(INFO) << "holder is not marked in the region space bitmap."; - } - } else { - // In a non-moving space. - if (immune_region_.ContainsObject(obj)) { - LOG(INFO) << "holder is in the image or the zygote space."; - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); - CHECK(cc_bitmap != nullptr) - << "An immune space object must have a bitmap."; - if (cc_bitmap->Test(obj)) { - LOG(INFO) << "holder is marked in the bit map."; - } else { - LOG(INFO) << "holder is NOT marked in the bit map."; - } - } else { - LOG(INFO) << "holder is in a non-moving (or main) space."; - accounting::ContinuousSpaceBitmap* mark_bitmap = - heap_mark_bitmap_->GetContinuousSpaceBitmap(obj); - accounting::LargeObjectBitmap* los_bitmap = - heap_mark_bitmap_->GetLargeObjectBitmap(obj); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; - bool is_los = mark_bitmap == nullptr; - if (!is_los && mark_bitmap->Test(obj)) { - LOG(INFO) << "holder is marked in the mark bit map."; - } else if (is_los && los_bitmap->Test(obj)) { - LOG(INFO) << "holder is marked in the los bit map."; - } else { - // If ref is on the allocation stack, then it is considered - // mark/alive (but not necessarily on the live stack.) - if (IsOnAllocStack(obj)) { - LOG(INFO) << "holder is on the alloc stack."; - } else { - LOG(INFO) << "holder is not marked or on the alloc stack."; - } - } - } - } - LOG(INFO) << "offset=" << offset.SizeValue(); + LogFromSpaceRefHolder(obj, offset); } + ref->GetLockWord(false).Dump(LOG(INTERNAL_FATAL)); CHECK(false) << "Found from-space ref " << ref << " " << PrettyTypeOf(ref); } else { - // In a non-moving spaces. Check that the ref is marked. - if (immune_region_.ContainsObject(ref)) { - accounting::ContinuousSpaceBitmap* cc_bitmap = - cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); - CHECK(cc_bitmap != nullptr) - << "An immune space ref must have a bitmap. " << ref; - if (kUseBakerReadBarrier) { - CHECK(cc_bitmap->Test(ref)) - << "Unmarked immune space ref. obj=" << obj << " rb_ptr=" - << obj->GetReadBarrierPointer() << " ref=" << ref; - } else { - CHECK(cc_bitmap->Test(ref)) - << "Unmarked immune space ref. obj=" << obj << " ref=" << ref; - } + AssertToSpaceInvariantInNonMovingSpace(obj, ref); + } + } +} + +class RootPrinter { + public: + RootPrinter() { } + + template <class MirrorType> + ALWAYS_INLINE void VisitRootIfNonNull(mirror::CompressedReference<MirrorType>* root) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + template <class MirrorType> + void VisitRoot(mirror::Object** root) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + LOG(INTERNAL_FATAL) << "root=" << root << " ref=" << *root; + } + + template <class MirrorType> + void VisitRoot(mirror::CompressedReference<MirrorType>* root) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + LOG(INTERNAL_FATAL) << "root=" << root << " ref=" << root->AsMirrorPtr(); + } +}; + +void ConcurrentCopying::AssertToSpaceInvariant(GcRootSource* gc_root_source, + mirror::Object* ref) { + CHECK(heap_->collector_type_ == kCollectorTypeCC) << static_cast<size_t>(heap_->collector_type_); + if (is_asserting_to_space_invariant_) { + if (region_space_->IsInToSpace(ref)) { + // OK. + return; + } else if (region_space_->IsInUnevacFromSpace(ref)) { + CHECK(region_space_bitmap_->Test(ref)) << ref; + } else if (region_space_->IsInFromSpace(ref)) { + // Not OK. Do extra logging. + if (gc_root_source == nullptr) { + // No info. + } else if (gc_root_source->HasArtField()) { + ArtField* field = gc_root_source->GetArtField(); + LOG(INTERNAL_FATAL) << "gc root in field " << field << " " << PrettyField(field); + RootPrinter root_printer; + field->VisitRoots(root_printer); + } else if (gc_root_source->HasArtMethod()) { + ArtMethod* method = gc_root_source->GetArtMethod(); + LOG(INTERNAL_FATAL) << "gc root in method " << method << " " << PrettyMethod(method); + RootPrinter root_printer; + method->VisitRoots(root_printer); + } + ref->GetLockWord(false).Dump(LOG(INTERNAL_FATAL)); + region_space_->DumpNonFreeRegions(LOG(INTERNAL_FATAL)); + PrintFileToLog("/proc/self/maps", LogSeverity::INTERNAL_FATAL); + MemMap::DumpMaps(LOG(INTERNAL_FATAL), true); + CHECK(false) << "Found from-space ref " << ref << " " << PrettyTypeOf(ref); + } else { + AssertToSpaceInvariantInNonMovingSpace(nullptr, ref); + } + } +} + +void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) { + if (kUseBakerReadBarrier) { + LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj) + << " holder rb_ptr=" << obj->GetReadBarrierPointer(); + } else { + LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj); + } + if (region_space_->IsInFromSpace(obj)) { + LOG(INFO) << "holder is in the from-space."; + } else if (region_space_->IsInToSpace(obj)) { + LOG(INFO) << "holder is in the to-space."; + } else if (region_space_->IsInUnevacFromSpace(obj)) { + LOG(INFO) << "holder is in the unevac from-space."; + if (region_space_bitmap_->Test(obj)) { + LOG(INFO) << "holder is marked in the region space bitmap."; + } else { + LOG(INFO) << "holder is not marked in the region space bitmap."; + } + } else { + // In a non-moving space. + if (immune_region_.ContainsObject(obj)) { + LOG(INFO) << "holder is in the image or the zygote space."; + accounting::ContinuousSpaceBitmap* cc_bitmap = + cc_heap_bitmap_->GetContinuousSpaceBitmap(obj); + CHECK(cc_bitmap != nullptr) + << "An immune space object must have a bitmap."; + if (cc_bitmap->Test(obj)) { + LOG(INFO) << "holder is marked in the bit map."; } else { - accounting::ContinuousSpaceBitmap* mark_bitmap = - heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); - accounting::LargeObjectBitmap* los_bitmap = - heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; - bool is_los = mark_bitmap == nullptr; - if ((!is_los && mark_bitmap->Test(ref)) || - (is_los && los_bitmap->Test(ref))) { - // OK. + LOG(INFO) << "holder is NOT marked in the bit map."; + } + } else { + LOG(INFO) << "holder is in a non-moving (or main) space."; + accounting::ContinuousSpaceBitmap* mark_bitmap = + heap_mark_bitmap_->GetContinuousSpaceBitmap(obj); + accounting::LargeObjectBitmap* los_bitmap = + heap_mark_bitmap_->GetLargeObjectBitmap(obj); + CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; + bool is_los = mark_bitmap == nullptr; + if (!is_los && mark_bitmap->Test(obj)) { + LOG(INFO) << "holder is marked in the mark bit map."; + } else if (is_los && los_bitmap->Test(obj)) { + LOG(INFO) << "holder is marked in the los bit map."; + } else { + // If ref is on the allocation stack, then it is considered + // mark/alive (but not necessarily on the live stack.) + if (IsOnAllocStack(obj)) { + LOG(INFO) << "holder is on the alloc stack."; } else { - // If ref is on the allocation stack, then it may not be - // marked live, but considered marked/alive (but not - // necessarily on the live stack). - CHECK(IsOnAllocStack(ref)) << "Unmarked ref that's not on the allocation stack. " - << "obj=" << obj << " ref=" << ref; + LOG(INFO) << "holder is not marked or on the alloc stack."; } } } } + LOG(INFO) << "offset=" << offset.SizeValue(); +} + +void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, + mirror::Object* ref) { + // In a non-moving spaces. Check that the ref is marked. + if (immune_region_.ContainsObject(ref)) { + accounting::ContinuousSpaceBitmap* cc_bitmap = + cc_heap_bitmap_->GetContinuousSpaceBitmap(ref); + CHECK(cc_bitmap != nullptr) + << "An immune space ref must have a bitmap. " << ref; + if (kUseBakerReadBarrier) { + CHECK(cc_bitmap->Test(ref)) + << "Unmarked immune space ref. obj=" << obj << " rb_ptr=" + << obj->GetReadBarrierPointer() << " ref=" << ref; + } else { + CHECK(cc_bitmap->Test(ref)) + << "Unmarked immune space ref. obj=" << obj << " ref=" << ref; + } + } else { + accounting::ContinuousSpaceBitmap* mark_bitmap = + heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); + accounting::LargeObjectBitmap* los_bitmap = + heap_mark_bitmap_->GetLargeObjectBitmap(ref); + CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; + bool is_los = mark_bitmap == nullptr; + if ((!is_los && mark_bitmap->Test(ref)) || + (is_los && los_bitmap->Test(ref))) { + // OK. + } else { + // If ref is on the allocation stack, then it may not be + // marked live, but considered marked/alive (but not + // necessarily on the live stack). + CHECK(IsOnAllocStack(ref)) << "Unmarked ref that's not on the allocation stack. " + << "obj=" << obj << " ref=" << ref; + } + } } // Used to scan ref fields of an object. diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 60ea6b6444..b1897b82f4 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -169,6 +169,8 @@ class ConcurrentCopying : public GarbageCollector { } void AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset, mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); bool IsInToSpace(mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { DCHECK(ref != nullptr); return IsMarked(ref) == ref; @@ -236,6 +238,10 @@ class ConcurrentCopying : public GarbageCollector { void SwapStacks(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void RecordLiveStackFreezeSize(Thread* self); void ComputeUnevacFromSpaceLiveRatio(); + void LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + void AssertToSpaceInvariantInNonMovingSpace(mirror::Object* obj, mirror::Object* ref) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 2d5433032d..ee4568ecea 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -22,6 +22,7 @@ #include "base/time_utils.h" #include "debugger.h" #include "gc/accounting/card_table-inl.h" +#include "gc/allocation_record.h" #include "gc/collector/semi_space.h" #include "gc/space/bump_pointer_space-inl.h" #include "gc/space/dlmalloc_space-inl.h" @@ -168,11 +169,11 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas PushOnAllocationStack(self, &obj); } if (kInstrumented) { - if (Dbg::IsAllocTrackingEnabled()) { - Dbg::RecordAllocation(self, klass, bytes_allocated); + if (IsAllocTrackingEnabled()) { + AllocRecordObjectMap::RecordAllocation(self, obj, bytes_allocated); } } else { - DCHECK(!Dbg::IsAllocTrackingEnabled()); + DCHECK(!IsAllocTrackingEnabled()); } // IsConcurrentGc() isn't known at compile time so we can optimize by not checking it for // the BumpPointer or TLAB allocators. This is nice since it allows the entire if statement to be diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index aeab7d80b4..3c020e2f2e 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -209,7 +209,8 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max blocking_gc_count_last_window_(0U), gc_count_rate_histogram_("gc count rate histogram", 1U, kGcCountRateMaxBucketCount), blocking_gc_count_rate_histogram_("blocking gc count rate histogram", 1U, - kGcCountRateMaxBucketCount) { + kGcCountRateMaxBucketCount), + alloc_tracking_enabled_(false) { if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { LOG(INFO) << "Heap() entering"; } @@ -502,6 +503,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max bool no_gap = MemMap::CheckNoGaps(GetImageSpace()->GetMemMap(), non_moving_space_->GetMemMap()); if (!no_gap) { + PrintFileToLog("/proc/self/maps", LogSeverity::ERROR); MemMap::DumpMaps(LOG(ERROR), true); LOG(FATAL) << "There's a gap between the image space and the non-moving space"; } @@ -1000,6 +1002,27 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { BaseMutex::DumpAll(os); } +void Heap::ResetGcPerformanceInfo() { + for (auto& collector : garbage_collectors_) { + collector->ResetMeasurements(); + } + total_allocation_time_.StoreRelaxed(0); + total_bytes_freed_ever_ = 0; + total_objects_freed_ever_ = 0; + total_wait_time_ = 0; + blocking_gc_count_ = 0; + blocking_gc_time_ = 0; + gc_count_last_window_ = 0; + blocking_gc_count_last_window_ = 0; + last_update_time_gc_count_rate_histograms_ = // Round down by the window duration. + (NanoTime() / kGcCountRateHistogramWindowDuration) * kGcCountRateHistogramWindowDuration; + { + MutexLock mu(Thread::Current(), *gc_complete_lock_); + gc_count_rate_histogram_.Reset(); + blocking_gc_count_rate_histogram_.Reset(); + } +} + uint64_t Heap::GetGcCount() const { uint64_t gc_count = 0U; for (auto& collector : garbage_collectors_) { @@ -1043,6 +1066,7 @@ Heap::~Heap() { STLDeleteElements(&garbage_collectors_); // If we don't reset then the mark stack complains in its destructor. allocation_stack_->Reset(); + allocation_records_.reset(); live_stack_->Reset(); STLDeleteValues(&mod_union_tables_); STLDeleteValues(&remembered_sets_); @@ -3653,5 +3677,18 @@ void Heap::ClearMarkedObjects() { } } +void Heap::SetAllocationRecords(AllocRecordObjectMap* records) { + allocation_records_.reset(records); +} + +void Heap::SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const { + if (IsAllocTrackingEnabled()) { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (IsAllocTrackingEnabled()) { + GetAllocationRecords()->SweepAllocationRecords(visitor, arg); + } + } +} + } // namespace gc } // namespace art diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 81a97414ba..18244c856b 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -58,6 +58,7 @@ namespace mirror { namespace gc { +class AllocRecordObjectMap; class ReferenceProcessor; class TaskProcessor; @@ -597,6 +598,7 @@ class Heap { // GC performance measuring void DumpGcPerformanceInfo(std::ostream& os); + void ResetGcPerformanceInfo(); // Returns true if we currently care about pause times. bool CareAboutPauseTimes() const { @@ -683,6 +685,27 @@ class Heap { void DumpGcCountRateHistogram(std::ostream& os) const; void DumpBlockingGcCountRateHistogram(std::ostream& os) const; + // Allocation tracking support + // Callers to this function use double-checked locking to ensure safety on allocation_records_ + bool IsAllocTrackingEnabled() const { + return alloc_tracking_enabled_.LoadRelaxed(); + } + + void SetAllocTrackingEnabled(bool enabled) EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + alloc_tracking_enabled_.StoreRelaxed(enabled); + } + + AllocRecordObjectMap* GetAllocationRecords() const + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_) { + return allocation_records_.get(); + } + + void SetAllocationRecords(AllocRecordObjectMap* records) + EXCLUSIVE_LOCKS_REQUIRED(Locks::alloc_tracker_lock_); + + void SweepAllocationRecords(IsMarkedCallback* visitor, void* arg) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -1191,6 +1214,11 @@ class Heap { // The histogram of the number of blocking GC invocations per window duration. Histogram<uint64_t> blocking_gc_count_rate_histogram_ GUARDED_BY(gc_complete_lock_); + // Allocation tracking support + Atomic<bool> alloc_tracking_enabled_; + std::unique_ptr<AllocRecordObjectMap> allocation_records_ + GUARDED_BY(Locks::alloc_tracker_lock_); + friend class CollectorTransitionTask; friend class collector::GarbageCollector; friend class collector::MarkCompact; diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index 437fd8c5c9..1923d24805 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -694,7 +694,7 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat const auto section_idx = static_cast<ImageHeader::ImageSections>(i); auto& section = image_header.GetImageSection(section_idx); LOG(INFO) << section_idx << " start=" - << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) + << reinterpret_cast<void*>(image_header.GetImageBegin() + section.Offset()) << " " << section; } } @@ -730,9 +730,9 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat std::string bitmap_name(StringPrintf("imagespace %s live-bitmap %u", image_filename, bitmap_index)); std::unique_ptr<accounting::ContinuousSpaceBitmap> bitmap( - accounting::ContinuousSpaceBitmap::CreateFromMemMap(bitmap_name, image_map.release(), - reinterpret_cast<uint8_t*>(map->Begin()), - map->Size())); + accounting::ContinuousSpaceBitmap::CreateFromMemMap( + bitmap_name, image_map.release(), reinterpret_cast<uint8_t*>(map->Begin()), + accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_section.Size()))); if (bitmap.get() == nullptr) { *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str()); return nullptr; @@ -755,6 +755,7 @@ ImageSpace* ImageSpace::Init(const char* image_filename, const char* image_locat DCHECK(!error_msg->empty()); return nullptr; } + space->oat_file_non_owned_ = space->oat_file_.get(); if (validate_oat_file && !space->ValidateOatFile(error_msg)) { DCHECK(!error_msg->empty()); @@ -838,10 +839,12 @@ bool ImageSpace::ValidateOatFile(std::string* error_msg) const { return true; } + const OatFile* ImageSpace::GetOatFile() const { - return oat_file_.get(); + return oat_file_non_owned_; } + OatFile* ImageSpace::ReleaseOatFile() { CHECK(oat_file_.get() != nullptr); return oat_file_.release(); diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 54dc7a61dd..93ff8aaff7 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -152,6 +152,10 @@ class ImageSpace : public MemMapSpace { // the ClassLinker during it's initialization. std::unique_ptr<OatFile> oat_file_; + // There are times when we need to find the boot image oat file. As + // we release ownership during startup, keep a non-owned reference. + const OatFile* oat_file_non_owned_; + const std::string image_location_; DISALLOW_COPY_AND_ASSIGN(ImageSpace); diff --git a/runtime/gc_root-inl.h b/runtime/gc_root-inl.h index 57d5689e56..ae8a38f43e 100644 --- a/runtime/gc_root-inl.h +++ b/runtime/gc_root-inl.h @@ -27,9 +27,9 @@ namespace art { template<class MirrorType> template<ReadBarrierOption kReadBarrierOption> -inline MirrorType* GcRoot<MirrorType>::Read() const { +inline MirrorType* GcRoot<MirrorType>::Read(GcRootSource* gc_root_source) const { return down_cast<MirrorType*>( - ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&root_)); + ReadBarrier::BarrierForRoot<mirror::Object, kReadBarrierOption>(&root_, gc_root_source)); } template<class MirrorType> inline GcRoot<MirrorType>::GcRoot(MirrorType* ref) diff --git a/runtime/gc_root.h b/runtime/gc_root.h index b67e9c29b4..d6146f348f 100644 --- a/runtime/gc_root.h +++ b/runtime/gc_root.h @@ -22,6 +22,8 @@ #include "mirror/object_reference.h" namespace art { +class ArtField; +class ArtMethod; namespace mirror { class Object; @@ -129,11 +131,43 @@ class SingleRootVisitor : public RootVisitor { virtual void VisitRoot(mirror::Object* root, const RootInfo& info) = 0; }; +class GcRootSource { + public: + GcRootSource() + : field_(nullptr), method_(nullptr) { + } + explicit GcRootSource(ArtField* field) + : field_(field), method_(nullptr) { + } + explicit GcRootSource(ArtMethod* method) + : field_(nullptr), method_(method) { + } + ArtField* GetArtField() const { + return field_; + } + ArtMethod* GetArtMethod() const { + return method_; + } + bool HasArtField() const { + return field_ != nullptr; + } + bool HasArtMethod() const { + return method_ != nullptr; + } + + private: + ArtField* const field_; + ArtMethod* const method_; + + DISALLOW_COPY_AND_ASSIGN(GcRootSource); +}; + template<class MirrorType> class GcRoot { public: template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> - ALWAYS_INLINE MirrorType* Read() const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + ALWAYS_INLINE MirrorType* Read(GcRootSource* gc_root_source = nullptr) const + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void VisitRoot(RootVisitor* visitor, const RootInfo& info) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 6e0e56e82a..f32d5a1b81 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -48,6 +48,7 @@ #include "dex_file-inl.h" #include "gc_root.h" #include "gc/accounting/heap_bitmap.h" +#include "gc/allocation_record.h" #include "gc/heap.h" #include "gc/space/space.h" #include "globals.h" @@ -68,14 +69,13 @@ namespace hprof { static constexpr bool kDirectStream = true; static constexpr uint32_t kHprofTime = 0; -static constexpr uint32_t kHprofNullStackTrace = 0; static constexpr uint32_t kHprofNullThread = 0; static constexpr size_t kMaxObjectsPerSegment = 128; static constexpr size_t kMaxBytesPerSegment = 4096; // The static field-name for the synthetic object generated to account for class static overhead. -static constexpr const char* kStaticOverheadName = "$staticOverhead"; +static constexpr const char* kClassOverheadName = "$classOverhead"; enum HprofTag { HPROF_TAG_STRING = 0x01, @@ -144,6 +144,10 @@ enum HprofBasicType { typedef uint32_t HprofStringId; typedef uint32_t HprofClassObjectId; +typedef uint32_t HprofClassSerialNumber; +typedef uint32_t HprofStackTraceSerialNumber; +typedef uint32_t HprofStackFrameId; +static constexpr HprofStackTraceSerialNumber kHprofNullStackTrace = 0; class EndianOutput { public: @@ -194,6 +198,10 @@ class EndianOutput { AddU4(PointerToLowMemUInt32(value)); } + void AddStackTraceSerialNumber(HprofStackTraceSerialNumber value) { + AddU4(value); + } + // The ID for the synthetic object generated to account for class static overhead. void AddClassStaticsId(const mirror::Class* value) { AddU4(1 | PointerToLowMemUInt32(value)); @@ -415,13 +423,21 @@ class Hprof : public SingleRootVisitor { start_ns_(NanoTime()), current_heap_(HPROF_HEAP_DEFAULT), objects_in_segment_(0), - next_string_id_(0x400000) { + next_string_id_(0x400000), + next_class_serial_number_(1) { LOG(INFO) << "hprof: heap dump \"" << filename_ << "\" starting..."; } void Dump() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - LOCKS_EXCLUDED(Locks::heap_bitmap_lock_) { + LOCKS_EXCLUDED(Locks::heap_bitmap_lock_, Locks::alloc_tracker_lock_) { + { + MutexLock mu(Thread::Current(), *Locks::alloc_tracker_lock_); + if (Runtime::Current()->GetHeap()->IsAllocTrackingEnabled()) { + PopulateAllocationTrackingTraces(); + } + } + // First pass to measure the size of the dump. size_t overall_size; size_t max_length; @@ -480,11 +496,11 @@ class Hprof : public SingleRootVisitor { objects_in_segment_ = 0; if (header_first) { - ProcessHeader(); + ProcessHeader(true); ProcessBody(); } else { ProcessBody(); - ProcessHeader(); + ProcessHeader(false); } } @@ -501,21 +517,29 @@ class Hprof : public SingleRootVisitor { output_->EndRecord(); } - void ProcessHeader() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { + void ProcessHeader(bool string_first) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write the header. WriteFixedHeader(); // Write the string and class tables, and any stack traces, to the header. // (jhat requires that these appear before any of the data in the body that refers to them.) - WriteStringTable(); + // jhat also requires the string table appear before class table and stack traces. + // However, WriteStackTraces() can modify the string table, so it's necessary to call + // WriteStringTable() last in the first pass, to compute the correct length of the output. + if (string_first) { + WriteStringTable(); + } WriteClassTable(); WriteStackTraces(); + if (!string_first) { + WriteStringTable(); + } output_->EndRecord(); } void WriteClassTable() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { - uint32_t nextSerialNumber = 1; - - for (mirror::Class* c : classes_) { + for (const auto& p : classes_) { + mirror::Class* c = p.first; + HprofClassSerialNumber sn = p.second; CHECK(c != nullptr); output_->StartNewRecord(HPROF_TAG_LOAD_CLASS, kHprofTime); // LOAD CLASS format: @@ -523,9 +547,9 @@ class Hprof : public SingleRootVisitor { // ID: class object ID. We use the address of the class object structure as its ID. // U4: stack trace serial number // ID: class name string ID - __ AddU4(nextSerialNumber++); + __ AddU4(sn); __ AddObjectId(c); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(c)); __ AddStringId(LookupClassNameId(c)); } } @@ -567,15 +591,31 @@ class Hprof : public SingleRootVisitor { HprofClassObjectId LookupClassId(mirror::Class* c) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { if (c != nullptr) { - auto result = classes_.insert(c); - const mirror::Class* present = *result.first; - CHECK_EQ(present, c); - // Make sure that we've assigned a string ID for this class' name - LookupClassNameId(c); + auto it = classes_.find(c); + if (it == classes_.end()) { + // first time to see this class + HprofClassSerialNumber sn = next_class_serial_number_++; + classes_.Put(c, sn); + // Make sure that we've assigned a string ID for this class' name + LookupClassNameId(c); + } } return PointerToLowMemUInt32(c); } + HprofStackTraceSerialNumber LookupStackTraceSerialNumber(const mirror::Object* obj) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + auto r = allocation_records_.find(obj); + if (r == allocation_records_.end()) { + return kHprofNullStackTrace; + } else { + const gc::AllocRecordStackTrace* trace = r->second; + auto result = traces_.find(trace); + CHECK(result != traces_.end()); + return result->second; + } + } + HprofStringId LookupStringId(mirror::String* string) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { return LookupStringId(string->ToModifiedUtf8()); } @@ -622,12 +662,66 @@ class Hprof : public SingleRootVisitor { __ AddU4(static_cast<uint32_t>(nowMs & 0xFFFFFFFF)); } - void WriteStackTraces() { + void WriteStackTraces() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { // Write a dummy stack trace record so the analysis tools don't freak out. output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); - __ AddU4(kHprofNullStackTrace); + __ AddStackTraceSerialNumber(kHprofNullStackTrace); __ AddU4(kHprofNullThread); __ AddU4(0); // no frames + + // TODO: jhat complains "WARNING: Stack trace not found for serial # -1", but no trace should + // have -1 as its serial number (as long as HprofStackTraceSerialNumber doesn't overflow). + for (const auto& it : traces_) { + const gc::AllocRecordStackTrace* trace = it.first; + HprofStackTraceSerialNumber trace_sn = it.second; + size_t depth = trace->GetDepth(); + + // First write stack frames of the trace + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + ArtMethod* method = frame->GetMethod(); + CHECK(method != nullptr); + output_->StartNewRecord(HPROF_TAG_STACK_FRAME, kHprofTime); + // STACK FRAME format: + // ID: stack frame ID. We use the address of the AllocRecordStackTraceElement object as its ID. + // ID: method name string ID + // ID: method signature string ID + // ID: source file name string ID + // U4: class serial number + // U4: >0, line number; 0, no line information available; -1, unknown location + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + __ AddStringId(LookupStringId(method->GetName())); + __ AddStringId(LookupStringId(method->GetSignature().ToString())); + const char* source_file = method->GetDeclaringClassSourceFile(); + if (source_file == nullptr) { + source_file = ""; + } + __ AddStringId(LookupStringId(source_file)); + auto class_result = classes_.find(method->GetDeclaringClass()); + CHECK(class_result != classes_.end()); + __ AddU4(class_result->second); + __ AddU4(frame->ComputeLineNumber()); + } + + // Then write the trace itself + output_->StartNewRecord(HPROF_TAG_STACK_TRACE, kHprofTime); + // STACK TRACE format: + // U4: stack trace serial number. We use the address of the AllocRecordStackTrace object as its serial number. + // U4: thread serial number. We use Thread::GetTid(). + // U4: number of frames + // [ID]*: series of stack frame ID's + __ AddStackTraceSerialNumber(trace_sn); + __ AddU4(trace->GetTid()); + __ AddU4(depth); + for (size_t i = 0; i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frame_result = frames_.find(frame); + CHECK(frame_result != frames_.end()); + __ AddU4(frame_result->second); + } + } } bool DumpToDdmsBuffered(size_t overall_size ATTRIBUTE_UNUSED, size_t max_length ATTRIBUTE_UNUSED) @@ -723,6 +817,40 @@ class Hprof : public SingleRootVisitor { return true; } + void PopulateAllocationTrackingTraces() + EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, Locks::alloc_tracker_lock_) { + gc::AllocRecordObjectMap* records = Runtime::Current()->GetHeap()->GetAllocationRecords(); + CHECK(records != nullptr); + HprofStackTraceSerialNumber next_trace_sn = kHprofNullStackTrace + 1; + HprofStackFrameId next_frame_id = 0; + + for (auto it = records->Begin(), end = records->End(); it != end; ++it) { + const mirror::Object* obj = it->first.Read(); + const gc::AllocRecordStackTrace* trace = it->second->GetStackTrace(); + + // Copy the pair into a real hash map to speed up look up. + auto records_result = allocation_records_.emplace(obj, trace); + // The insertion should always succeed, i.e. no duplicate object pointers in "records" + CHECK(records_result.second); + + // Generate serial numbers for traces, and IDs for frames. + auto traces_result = traces_.find(trace); + if (traces_result == traces_.end()) { + traces_.emplace(trace, next_trace_sn++); + // only check frames if the trace is newly discovered + for (size_t i = 0, depth = trace->GetDepth(); i < depth; ++i) { + const gc::AllocRecordStackTraceElement* frame = &trace->GetStackElement(i); + auto frames_result = frames_.find(frame); + if (frames_result == frames_.end()) { + frames_.emplace(frame, next_frame_id++); + } + } + } + } + CHECK_EQ(traces_.size(), next_trace_sn - kHprofNullStackTrace - 1); + CHECK_EQ(frames_.size(), next_frame_id); + } + // If direct_to_ddms_ is set, "filename_" and "fd" will be ignored. // Otherwise, "filename_" must be valid, though if "fd" >= 0 it will // only be used for debug messages. @@ -737,9 +865,18 @@ class Hprof : public SingleRootVisitor { HprofHeapId current_heap_; // Which heap we're currently dumping. size_t objects_in_segment_; - std::set<mirror::Class*> classes_; HprofStringId next_string_id_; SafeMap<std::string, HprofStringId> strings_; + HprofClassSerialNumber next_class_serial_number_; + SafeMap<mirror::Class*, HprofClassSerialNumber> classes_; + + std::unordered_map<const gc::AllocRecordStackTrace*, HprofStackTraceSerialNumber, + gc::HashAllocRecordTypesPtr<gc::AllocRecordStackTrace>, + gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTrace>> traces_; + std::unordered_map<const gc::AllocRecordStackTraceElement*, HprofStackFrameId, + gc::HashAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>, + gc::EqAllocRecordTypesPtr<gc::AllocRecordStackTraceElement>> frames_; + std::unordered_map<const mirror::Object*, const gc::AllocRecordStackTrace*> allocation_records_; DISALLOW_COPY_AND_ASSIGN(Hprof); }; @@ -881,10 +1018,6 @@ void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeap ++objects_in_segment_; } -static int StackTraceSerialNumber(const mirror::Object* /*obj*/) { - return kHprofNullStackTrace; -} - void Hprof::DumpHeapObject(mirror::Object* obj) { // Ignore classes that are retired. if (obj->IsClass() && obj->AsClass()->IsRetired()) { @@ -959,24 +1092,30 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { // Class is allocated but not yet loaded: we cannot access its fields or super class. return; } - size_t sFieldCount = klass->NumStaticFields(); - if (sFieldCount != 0) { - int byteLength = sFieldCount * sizeof(JValue); // TODO bogus; fields are packed + const size_t num_static_fields = klass->NumStaticFields(); + // Total class size including embedded IMT, embedded vtable, and static fields. + const size_t class_size = klass->GetClassSize(); + // Class size excluding static fields (relies on reference fields being the first static fields). + const size_t class_size_without_overhead = sizeof(mirror::Class); + CHECK_LE(class_size_without_overhead, class_size); + const size_t overhead_size = class_size - class_size_without_overhead; + + if (overhead_size != 0) { // Create a byte array to reflect the allocation of the // StaticField array at the end of this class. __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddClassStaticsId(klass); - __ AddU4(StackTraceSerialNumber(klass)); - __ AddU4(byteLength); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); + __ AddU4(overhead_size); __ AddU1(hprof_basic_byte); - for (int i = 0; i < byteLength; ++i) { + for (size_t i = 0; i < overhead_size; ++i) { __ AddU1(0); } } __ AddU1(HPROF_CLASS_DUMP); __ AddClassId(LookupClassId(klass)); - __ AddU4(StackTraceSerialNumber(klass)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass)); __ AddClassId(LookupClassId(klass->GetSuperClass())); __ AddObjectId(klass->GetClassLoader()); __ AddObjectId(nullptr); // no signer @@ -986,7 +1125,7 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { if (klass->IsClassClass()) { // ClassObjects have their static fields appended, so aren't all the same size. // But they're at least this size. - __ AddU4(sizeof(mirror::Class)); // instance size + __ AddU4(class_size_without_overhead); // instance size } else if (klass->IsStringClass()) { // Strings are variable length with character data at the end like arrays. // This outputs the size of an empty string. @@ -1000,15 +1139,15 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { __ AddU2(0); // empty const pool // Static fields - if (sFieldCount == 0) { - __ AddU2((uint16_t)0); + if (overhead_size == 0) { + __ AddU2(static_cast<uint16_t>(0)); } else { - __ AddU2((uint16_t)(sFieldCount+1)); - __ AddStringId(LookupStringId(kStaticOverheadName)); + __ AddU2(static_cast<uint16_t>(num_static_fields + 1)); + __ AddStringId(LookupStringId(kClassOverheadName)); __ AddU1(hprof_basic_object); __ AddClassStaticsId(klass); - for (size_t i = 0; i < sFieldCount; ++i) { + for (size_t i = 0; i < num_static_fields; ++i) { ArtField* f = klass->GetStaticField(i); size_t size; @@ -1072,7 +1211,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_OBJECT_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddClassId(LookupClassId(klass)); @@ -1087,7 +1226,7 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(length); __ AddU1(t); @@ -1108,7 +1247,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { // obj is an instance object. __ AddU1(HPROF_INSTANCE_DUMP); __ AddObjectId(obj); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddClassId(LookupClassId(klass)); // Reserve some space for the length of the instance data, which we won't @@ -1170,7 +1309,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP); __ AddObjectId(value); - __ AddU4(StackTraceSerialNumber(obj)); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); __ AddU4(s->GetLength()); __ AddU1(hprof_basic_char); __ AddU2List(s->GetValue(), s->GetLength()); diff --git a/runtime/image.cc b/runtime/image.cc index 947c914de6..44193da4ee 100644 --- a/runtime/image.cc +++ b/runtime/image.cc @@ -24,7 +24,7 @@ namespace art { const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' }; -const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '6', '\0' }; +const uint8_t ImageHeader::kImageVersion[] = { '0', '1', '7', '\0' }; ImageHeader::ImageHeader(uint32_t image_begin, uint32_t image_size, diff --git a/runtime/image.h b/runtime/image.h index c6be7ef3f7..d856f218af 100644 --- a/runtime/image.h +++ b/runtime/image.h @@ -142,6 +142,7 @@ class PACKED(4) ImageHeader { kSectionObjects, kSectionArtFields, kSectionArtMethods, + kSectionInternedStrings, kSectionImageBitmap, kSectionCount, // Number of elements in enum. }; diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc index 9abbca8460..2a962784ca 100644 --- a/runtime/intern_table.cc +++ b/runtime/intern_table.cc @@ -152,20 +152,28 @@ void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) { CHECK(image_space != nullptr); MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); if (!image_added_to_intern_table_) { - mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches); - mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>(); - for (int32_t i = 0; i < dex_caches->GetLength(); ++i) { - mirror::DexCache* dex_cache = dex_caches->Get(i); - const DexFile* dex_file = dex_cache->GetDexFile(); - const size_t num_strings = dex_file->NumStringIds(); - for (size_t j = 0; j < num_strings; ++j) { - mirror::String* image_string = dex_cache->GetResolvedString(j); - if (image_string != nullptr) { - mirror::String* found = LookupStrong(image_string); - if (found == nullptr) { - InsertStrong(image_string); - } else { - DCHECK_EQ(found, image_string); + const ImageHeader* const header = &image_space->GetImageHeader(); + // Check if we have the interned strings section. + const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings); + if (section.Size() > 0) { + ReadFromMemoryLocked(image_space->Begin() + section.Offset()); + } else { + // TODO: Delete this logic? + mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches); + mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>(); + for (int32_t i = 0; i < dex_caches->GetLength(); ++i) { + mirror::DexCache* dex_cache = dex_caches->Get(i); + const DexFile* dex_file = dex_cache->GetDexFile(); + const size_t num_strings = dex_file->NumStringIds(); + for (size_t j = 0; j < num_strings; ++j) { + mirror::String* image_string = dex_cache->GetResolvedString(j); + if (image_string != nullptr) { + mirror::String* found = LookupStrong(image_string); + if (found == nullptr) { + InsertStrong(image_string); + } else { + DCHECK_EQ(found, image_string); + } } } } @@ -285,6 +293,29 @@ void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) { weak_interns_.SweepWeaks(callback, arg); } +void InternTable::AddImageInternTable(gc::space::ImageSpace* image_space) { + const ImageSection& intern_section = image_space->GetImageHeader().GetImageSection( + ImageHeader::kSectionInternedStrings); + // Read the string tables from the image. + const uint8_t* ptr = image_space->Begin() + intern_section.Offset(); + const size_t offset = ReadFromMemory(ptr); + CHECK_LE(offset, intern_section.Size()); +} + +size_t InternTable::ReadFromMemory(const uint8_t* ptr) { + MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); + return ReadFromMemoryLocked(ptr); +} + +size_t InternTable::ReadFromMemoryLocked(const uint8_t* ptr) { + return strong_interns_.ReadIntoPreZygoteTable(ptr); +} + +size_t InternTable::WriteToMemory(uint8_t* ptr) { + MutexLock mu(Thread::Current(), *Locks::intern_table_lock_); + return strong_interns_.WriteFromPostZygoteTable(ptr); +} + std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) const { if (kIsDebugBuild) { Locks::mutator_lock_->AssertSharedHeld(Thread::Current()); @@ -300,6 +331,17 @@ bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a, return a.Read()->Equals(b.Read()); } +size_t InternTable::Table::ReadIntoPreZygoteTable(const uint8_t* ptr) { + CHECK_EQ(pre_zygote_table_.Size(), 0u); + size_t read_count = 0; + pre_zygote_table_ = UnorderedSet(ptr, false /* make copy */, &read_count); + return read_count; +} + +size_t InternTable::Table::WriteFromPostZygoteTable(uint8_t* ptr) { + return post_zygote_table_.WriteToMemory(ptr); +} + void InternTable::Table::Remove(mirror::String* s) { auto it = post_zygote_table_.Find(GcRoot<mirror::String>(s)); if (it != post_zygote_table_.end()) { @@ -325,9 +367,13 @@ mirror::String* InternTable::Table::Find(mirror::String* s) { } void InternTable::Table::SwapPostZygoteWithPreZygote() { - CHECK(pre_zygote_table_.Empty()); - std::swap(pre_zygote_table_, post_zygote_table_); - VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table"; + if (pre_zygote_table_.Empty()) { + std::swap(pre_zygote_table_, post_zygote_table_); + VLOG(heap) << "Swapping " << pre_zygote_table_.Size() << " interns to the pre zygote table"; + } else { + // This case happens if read the intern table from the image. + VLOG(heap) << "Not swapping due to non-empty pre_zygote_table_"; + } } void InternTable::Table::Insert(mirror::String* s) { diff --git a/runtime/intern_table.h b/runtime/intern_table.h index 1e5d3c22c9..97ce73c52e 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -97,6 +97,20 @@ class InternTable { void SwapPostZygoteWithPreZygote() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_); + // Add an intern table which was serialized to the image. + void AddImageInternTable(gc::space::ImageSpace* image_space) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_); + + // Read the intern table from memory. The elements aren't copied, the intern hash set data will + // point to somewhere within ptr. Only reads the strong interns. + size_t ReadFromMemory(const uint8_t* ptr) LOCKS_EXCLUDED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + + // Write the post zygote intern table to a pointer. Only writes the strong interns since it is + // expected that there is no weak interns since this is called from the image writer. + size_t WriteToMemory(uint8_t* ptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) + LOCKS_EXCLUDED(Locks::intern_table_lock_); + private: class StringHashEquals { public: @@ -133,6 +147,16 @@ class InternTable { EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); void SwapPostZygoteWithPreZygote() EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); size_t Size() const EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); + // Read pre zygote table is called from ReadFromMemory which happens during runtime creation + // when we load the image intern table. Returns how many bytes were read. + size_t ReadIntoPreZygoteTable(const uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // The image writer calls WritePostZygoteTable through WriteToMemory, it writes the interns in + // the post zygote table. Returns how many bytes were written. + size_t WriteFromPostZygoteTable(uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); private: typedef HashSet<GcRoot<mirror::String>, GcRootEmptyFn, StringHashEquals, StringHashEquals, @@ -192,6 +216,10 @@ class InternTable { EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_); friend class Transaction; + size_t ReadFromMemoryLocked(const uint8_t* ptr) + EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_); bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_); bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index 2a15087f7e..b21103b1fe 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -416,7 +416,10 @@ static inline int32_t DoPackedSwitch(const Instruction* inst, const ShadowFrame& int32_t test_val = shadow_frame.GetVReg(inst->VRegA_31t(inst_data)); DCHECK_EQ(switch_data[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); uint16_t size = switch_data[1]; - DCHECK_GT(size, 0); + if (size == 0) { + // Empty packed switch, move forward by 3 (size of PACKED_SWITCH). + return 3; + } const int32_t* keys = reinterpret_cast<const int32_t*>(&switch_data[2]); DCHECK(IsAligned<4>(keys)); int32_t first_key = keys[0]; diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h index e18d10fa0a..7c48985dfe 100644 --- a/runtime/jdwp/jdwp.h +++ b/runtime/jdwp/jdwp.h @@ -297,7 +297,7 @@ struct JdwpState { private: explicit JdwpState(const JdwpOptions* options); - size_t ProcessRequest(Request* request, ExpandBuf* pReply); + size_t ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply); bool InvokeInProgress(); bool IsConnected(); void SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId thread_self_id) diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc index 612af8bc99..14f097f72a 100644 --- a/runtime/jdwp/jdwp_event.cc +++ b/runtime/jdwp/jdwp_event.cc @@ -99,10 +99,6 @@ put ourselves to sleep. That way we don't interfere with anyone else and don't allow anyone else to interfere with us. */ - -#define kJdwpEventCommandSet 64 -#define kJdwpCompositeCommand 100 - namespace art { namespace JDWP { @@ -612,13 +608,10 @@ void JdwpState::SuspendByPolicy(JdwpSuspendPolicy suspend_policy, JDWP::ObjectId */ DebugInvokeReq* const pReq = Dbg::GetInvokeReq(); if (pReq == nullptr) { - /*LOGD("SuspendByPolicy: no invoke needed");*/ break; } - /* grab this before posting/suspending again */ - AcquireJdwpTokenForEvent(thread_self_id); - + // Execute method. Dbg::ExecuteMethod(pReq); } } @@ -749,11 +742,11 @@ static ExpandBuf* eventPrep() { void JdwpState::EventFinish(ExpandBuf* pReq) { uint8_t* buf = expandBufGetBuffer(pReq); - Set4BE(buf, expandBufGetLength(pReq)); - Set4BE(buf + 4, NextRequestSerial()); - Set1(buf + 8, 0); /* flags */ - Set1(buf + 9, kJdwpEventCommandSet); - Set1(buf + 10, kJdwpCompositeCommand); + Set4BE(buf + kJDWPHeaderSizeOffset, expandBufGetLength(pReq)); + Set4BE(buf + kJDWPHeaderIdOffset, NextRequestSerial()); + Set1(buf + kJDWPHeaderFlagsOffset, 0); /* flags */ + Set1(buf + kJDWPHeaderCmdSetOffset, kJDWPEventCmdSet); + Set1(buf + kJDWPHeaderCmdOffset, kJDWPEventCompositeCmd); SendRequest(pReq); diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index f7f70f6ed7..d4e2656b7e 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -52,17 +52,6 @@ std::string DescribeRefTypeId(const RefTypeId& ref_type_id) { return StringPrintf("%#" PRIx64 " (%s)", ref_type_id, signature.c_str()); } -// Helper function: write a variable-width value into the output input buffer. -static void WriteValue(ExpandBuf* pReply, int width, uint64_t value) { - switch (width) { - case 1: expandBufAdd1(pReply, value); break; - case 2: expandBufAdd2BE(pReply, value); break; - case 4: expandBufAdd4BE(pReply, value); break; - case 8: expandBufAdd8BE(pReply, value); break; - default: LOG(FATAL) << width; break; - } -} - static JdwpError WriteTaggedObject(ExpandBuf* reply, ObjectId object_id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { uint8_t tag; @@ -92,7 +81,7 @@ static JdwpError WriteTaggedObjectList(ExpandBuf* reply, const std::vector<Objec * If "is_constructor" is set, this returns "object_id" rather than the * expected-to-be-void return value of the called function. */ -static JdwpError RequestInvoke(JdwpState*, Request* request, ExpandBuf* pReply, +static JdwpError RequestInvoke(JdwpState*, Request* request, ObjectId thread_id, ObjectId object_id, RefTypeId class_id, MethodId method_id, bool is_constructor) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { @@ -122,49 +111,15 @@ static JdwpError RequestInvoke(JdwpState*, Request* request, ExpandBuf* pReply, (options & INVOKE_SINGLE_THREADED) ? " (SINGLE_THREADED)" : "", (options & INVOKE_NONVIRTUAL) ? " (NONVIRTUAL)" : ""); - JdwpTag resultTag; - uint64_t resultValue; - ObjectId exceptObjId; - JdwpError err = Dbg::InvokeMethod(thread_id, object_id, class_id, method_id, arg_count, - argValues.get(), argTypes.get(), options, &resultTag, - &resultValue, &exceptObjId); - if (err != ERR_NONE) { - return err; - } - - if (is_constructor) { - // If we invoked a constructor (which actually returns void), return the receiver, - // unless we threw, in which case we return null. - resultTag = JT_OBJECT; - resultValue = (exceptObjId == 0) ? object_id : 0; - } - - size_t width = Dbg::GetTagWidth(resultTag); - expandBufAdd1(pReply, resultTag); - if (width != 0) { - WriteValue(pReply, width, resultValue); - } - expandBufAdd1(pReply, JT_OBJECT); - expandBufAddObjectId(pReply, exceptObjId); - - VLOG(jdwp) << " --> returned " << resultTag - << StringPrintf(" %#" PRIx64 " (except=%#" PRIx64 ")", resultValue, exceptObjId); - - /* show detailed debug output */ - if (resultTag == JT_STRING && exceptObjId == 0) { - if (resultValue != 0) { - if (VLOG_IS_ON(jdwp)) { - std::string result_string; - JDWP::JdwpError error = Dbg::StringToUtf8(resultValue, &result_string); - CHECK_EQ(error, JDWP::ERR_NONE); - VLOG(jdwp) << " string '" << result_string << "'"; - } - } else { - VLOG(jdwp) << " string (null)"; - } + JDWP::JdwpError error = Dbg::PrepareInvokeMethod(request->GetId(), thread_id, object_id, + class_id, method_id, arg_count, + argValues.get(), argTypes.get(), options); + if (error == JDWP::ERR_NONE) { + // We successfully requested the invoke. The event thread now owns the arguments array in its + // DebugInvokeReq mailbox. + argValues.release(); } - - return err; + return error; } static JdwpError VM_Version(JdwpState*, Request*, ExpandBuf* pReply) @@ -684,13 +639,14 @@ static JdwpError CT_SetValues(JdwpState* , Request* request, ExpandBuf*) * Example: Eclipse sometimes uses java/lang/Class.forName(String s) on * values in the "variables" display. */ -static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { RefTypeId class_id = request->ReadRefTypeId(); ObjectId thread_id = request->ReadThreadId(); MethodId method_id = request->ReadMethodId(); - return RequestInvoke(state, request, pReply, thread_id, 0, class_id, method_id, false); + return RequestInvoke(state, request, thread_id, 0, class_id, method_id, false); } /* @@ -700,7 +656,8 @@ static JdwpError CT_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* * Example: in IntelliJ, create a watch on "new String(myByteArray)" to * see the contents of a byte[] as a string. */ -static JdwpError CT_NewInstance(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError CT_NewInstance(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { RefTypeId class_id = request->ReadRefTypeId(); ObjectId thread_id = request->ReadThreadId(); @@ -711,7 +668,7 @@ static JdwpError CT_NewInstance(JdwpState* state, Request* request, ExpandBuf* p if (status != ERR_NONE) { return status; } - return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, true); + return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, true); } /* @@ -863,14 +820,15 @@ static JdwpError OR_MonitorInfo(JdwpState*, Request* request, ExpandBuf* reply) * object), it will try to invoke the object's toString() function. This * feature becomes crucial when examining ArrayLists with Eclipse. */ -static JdwpError OR_InvokeMethod(JdwpState* state, Request* request, ExpandBuf* pReply) +static JdwpError OR_InvokeMethod(JdwpState* state, Request* request, + ExpandBuf* pReply ATTRIBUTE_UNUSED) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { ObjectId object_id = request->ReadObjectId(); ObjectId thread_id = request->ReadThreadId(); RefTypeId class_id = request->ReadRefTypeId(); MethodId method_id = request->ReadMethodId(); - return RequestInvoke(state, request, pReply, thread_id, object_id, class_id, method_id, false); + return RequestInvoke(state, request, thread_id, object_id, class_id, method_id, false); } static JdwpError OR_DisableCollection(JdwpState*, Request* request, ExpandBuf*) @@ -1602,13 +1560,27 @@ static std::string DescribeCommand(Request* request) { return result; } +// Returns true if the given command_set and command identify an "invoke" command. +static bool IsInvokeCommand(uint8_t command_set, uint8_t command) { + if (command_set == kJDWPClassTypeCmdSet) { + return command == kJDWPClassTypeInvokeMethodCmd || command == kJDWPClassTypeNewInstanceCmd; + } else if (command_set == kJDWPObjectReferenceCmdSet) { + return command == kJDWPObjectReferenceInvokeCmd; + } else { + return false; + } +} + /* - * Process a request from the debugger. + * Process a request from the debugger. The skip_reply flag is set to true to indicate to the + * caller the reply must not be sent to the debugger. This is used for invoke commands where the + * reply is sent by the event thread after completing the invoke. * * On entry, the JDWP thread is in VMWAIT. */ -size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply) { +size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply, bool* skip_reply) { JdwpError result = ERR_NONE; + *skip_reply = false; if (request->GetCommandSet() != kJDWPDdmCmdSet) { /* @@ -1661,24 +1633,31 @@ size_t JdwpState::ProcessRequest(Request* request, ExpandBuf* pReply) { result = ERR_NOT_IMPLEMENTED; } - /* - * Set up the reply header. - * - * If we encountered an error, only send the header back. - */ - uint8_t* replyBuf = expandBufGetBuffer(pReply); - size_t replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen; - Set4BE(replyBuf + 0, replyLength); - Set4BE(replyBuf + 4, request->GetId()); - Set1(replyBuf + 8, kJDWPFlagReply); - Set2BE(replyBuf + 9, result); - - CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId(); - - size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen; - VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")"; - if (false) { - VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, ""); + size_t replyLength = 0U; + if (result == ERR_NONE && IsInvokeCommand(request->GetCommandSet(), request->GetCommand())) { + // We successfully request an invoke in the event thread. It will send the reply once the + // invoke completes so we must not send it now. + *skip_reply = true; + } else { + /* + * Set up the reply header. + * + * If we encountered an error, only send the header back. + */ + uint8_t* replyBuf = expandBufGetBuffer(pReply); + replyLength = (result == ERR_NONE) ? expandBufGetLength(pReply) : kJDWPHeaderLen; + Set4BE(replyBuf + kJDWPHeaderSizeOffset, replyLength); + Set4BE(replyBuf + kJDWPHeaderIdOffset, request->GetId()); + Set1(replyBuf + kJDWPHeaderFlagsOffset, kJDWPFlagReply); + Set2BE(replyBuf + kJDWPHeaderErrorCodeOffset, result); + + CHECK_GT(expandBufGetLength(pReply), 0U) << GetCommandName(request) << " " << request->GetId(); + + size_t respLen = expandBufGetLength(pReply) - kJDWPHeaderLen; + VLOG(jdwp) << "REPLY: " << GetCommandName(request) << " " << result << " (length=" << respLen << ")"; + if (false) { + VLOG(jdwp) << HexDump(expandBufGetBuffer(pReply) + kJDWPHeaderLen, respLen, false, ""); + } } VLOG(jdwp) << "----------"; diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc index e6b97a2083..6bc5e27f85 100644 --- a/runtime/jdwp/jdwp_main.cc +++ b/runtime/jdwp/jdwp_main.cc @@ -395,8 +395,15 @@ bool JdwpState::HandlePacket() { JDWP::Request request(netStateBase->input_buffer_, netStateBase->input_count_); ExpandBuf* pReply = expandBufAlloc(); - size_t replyLength = ProcessRequest(&request, pReply); - ssize_t cc = netStateBase->WritePacket(pReply, replyLength); + bool skip_reply = false; + size_t replyLength = ProcessRequest(&request, pReply, &skip_reply); + ssize_t cc = 0; + if (!skip_reply) { + cc = netStateBase->WritePacket(pReply, replyLength); + } else { + DCHECK_EQ(replyLength, 0U); + } + expandBufFree(pReply); /* * We processed this request and sent its reply so we can release the JDWP token. @@ -405,10 +412,8 @@ bool JdwpState::HandlePacket() { if (cc != static_cast<ssize_t>(replyLength)) { PLOG(ERROR) << "Failed sending reply to debugger"; - expandBufFree(pReply); return false; } - expandBufFree(pReply); netStateBase->ConsumeBytes(request.GetLength()); { MutexLock mu(self, shutdown_lock_); diff --git a/runtime/jdwp/jdwp_priv.h b/runtime/jdwp/jdwp_priv.h index f290be0f52..d58467d108 100644 --- a/runtime/jdwp/jdwp_priv.h +++ b/runtime/jdwp/jdwp_priv.h @@ -29,15 +29,32 @@ /* * JDWP constants. */ -#define kJDWPHeaderLen 11 -#define kJDWPFlagReply 0x80 - -#define kMagicHandshake "JDWP-Handshake" -#define kMagicHandshakeLen (sizeof(kMagicHandshake)-1) +static constexpr size_t kJDWPHeaderSizeOffset = 0U; +static constexpr size_t kJDWPHeaderIdOffset = 4U; +static constexpr size_t kJDWPHeaderFlagsOffset = 8U; +static constexpr size_t kJDWPHeaderErrorCodeOffset = 9U; +static constexpr size_t kJDWPHeaderCmdSetOffset = 9U; +static constexpr size_t kJDWPHeaderCmdOffset = 10U; +static constexpr size_t kJDWPHeaderLen = 11U; +static constexpr uint8_t kJDWPFlagReply = 0x80; + +static constexpr const char kMagicHandshake[] = "JDWP-Handshake"; +static constexpr size_t kMagicHandshakeLen = sizeof(kMagicHandshake) - 1; + +/* Invoke commands */ +static constexpr uint8_t kJDWPClassTypeCmdSet = 3U; +static constexpr uint8_t kJDWPClassTypeInvokeMethodCmd = 3U; +static constexpr uint8_t kJDWPClassTypeNewInstanceCmd = 4U; +static constexpr uint8_t kJDWPObjectReferenceCmdSet = 9U; +static constexpr uint8_t kJDWPObjectReferenceInvokeCmd = 6U; + +/* Event command */ +static constexpr uint8_t kJDWPEventCmdSet = 64U; +static constexpr uint8_t kJDWPEventCompositeCmd = 100U; /* DDM support */ -#define kJDWPDdmCmdSet 199 /* 0xc7, or 'G'+128 */ -#define kJDWPDdmCmd 1 +static constexpr uint8_t kJDWPDdmCmdSet = 199U; // 0xc7, or 'G'+128 +static constexpr uint8_t kJDWPDdmCmd = 1U; namespace art { diff --git a/runtime/lock_word.h b/runtime/lock_word.h index aafbfe4159..a290575bac 100644 --- a/runtime/lock_word.h +++ b/runtime/lock_word.h @@ -210,6 +210,10 @@ class LockWord { return lw1.GetValueWithoutReadBarrierState() == lw2.GetValueWithoutReadBarrierState(); } + void Dump(std::ostream& os) { + os << "LockWord:" << std::hex << value_; + } + private: // Default constructor with no lock ownership. LockWord(); diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index 189306df5b..7e640c65f1 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -436,7 +436,15 @@ MemMap* MemMap::MapAnonymous(const char* name, uint8_t* expected_ptr, size_t byt return nullptr; } return new MemMap(name, reinterpret_cast<uint8_t*>(actual), byte_count, actual, - page_aligned_byte_count, prot, false); + page_aligned_byte_count, prot, reuse); +} + +MemMap* MemMap::MapDummy(const char* name, uint8_t* addr, size_t byte_count) { + if (byte_count == 0) { + return new MemMap(name, nullptr, 0, nullptr, 0, 0, false); + } + const size_t page_aligned_byte_count = RoundUp(byte_count, kPageSize); + return new MemMap(name, addr, byte_count, addr, page_aligned_byte_count, 0, true /* reuse */); } MemMap* MemMap::MapFileAtAddress(uint8_t* expected_ptr, size_t byte_count, int prot, int flags, diff --git a/runtime/mem_map.h b/runtime/mem_map.h index 6023a703c5..14387eed28 100644 --- a/runtime/mem_map.h +++ b/runtime/mem_map.h @@ -64,6 +64,12 @@ class MemMap { static MemMap* MapAnonymous(const char* ashmem_name, uint8_t* addr, size_t byte_count, int prot, bool low_4gb, bool reuse, std::string* error_msg); + // Create placeholder for a region allocated by direct call to mmap. + // This is useful when we do not have control over the code calling mmap, + // but when we still want to keep track of it in the list. + // The region is not considered to be owned and will not be unmmaped. + static MemMap* MapDummy(const char* name, uint8_t* addr, size_t byte_count); + // Map part of a file, taking care of non-page aligned offsets. The // "start" offset is absolute, not relative. // diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc index 94024ef4b2..a41aed6f29 100644 --- a/runtime/native/java_lang_Class.cc +++ b/runtime/native/java_lang_Class.cc @@ -282,11 +282,11 @@ static ALWAYS_INLINE inline bool MethodMatchesConstructor(ArtMethod* m, bool pub static jobjectArray Class_getDeclaredConstructorsInternal( JNIEnv* env, jobject javaThis, jboolean publicOnly) { ScopedFastNativeObjectAccess soa(env); - auto* klass = DecodeClass(soa, javaThis); - StackHandleScope<1> hs(soa.Self()); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::Class> h_klass = hs.NewHandle(DecodeClass(soa, javaThis)); size_t constructor_count = 0; // Two pass approach for speed. - for (auto& m : klass->GetDirectMethods(sizeof(void*))) { + for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) { constructor_count += MethodMatchesConstructor(&m, publicOnly != JNI_FALSE) ? 1u : 0u; } auto h_constructors = hs.NewHandle(mirror::ObjectArray<mirror::Constructor>::Alloc( @@ -296,7 +296,7 @@ static jobjectArray Class_getDeclaredConstructorsInternal( return nullptr; } constructor_count = 0; - for (auto& m : klass->GetDirectMethods(sizeof(void*))) { + for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) { if (MethodMatchesConstructor(&m, publicOnly != JNI_FALSE)) { auto* constructor = mirror::Constructor::CreateFromArtMethod(soa.Self(), &m); if (UNLIKELY(constructor == nullptr)) { @@ -319,16 +319,16 @@ static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis, // were synthesized by the runtime. constexpr uint32_t kSkipModifiers = kAccMiranda | kAccSynthetic; ScopedFastNativeObjectAccess soa(env); - StackHandleScope<4> hs(soa.Self()); + StackHandleScope<3> hs(soa.Self()); auto h_method_name = hs.NewHandle(soa.Decode<mirror::String*>(name)); if (UNLIKELY(h_method_name.Get() == nullptr)) { ThrowNullPointerException("name == null"); return nullptr; } auto h_args = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>*>(args)); - auto* klass = DecodeClass(soa, javaThis); + Handle<mirror::Class> h_klass = hs.NewHandle(DecodeClass(soa, javaThis)); ArtMethod* result = nullptr; - for (auto& m : klass->GetVirtualMethods(sizeof(void*))) { + for (auto& m : h_klass->GetVirtualMethods(sizeof(void*))) { auto* np_method = m.GetInterfaceMethodIfProxy(sizeof(void*)); // May cause thread suspension. mirror::String* np_name = np_method->GetNameAsString(soa.Self()); @@ -347,7 +347,7 @@ static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis, } } if (result == nullptr) { - for (auto& m : klass->GetDirectMethods(sizeof(void*))) { + for (auto& m : h_klass->GetDirectMethods(sizeof(void*))) { auto modifiers = m.GetAccessFlags(); if ((modifiers & kAccConstructor) != 0) { continue; @@ -380,8 +380,8 @@ static jobject Class_getDeclaredMethodInternal(JNIEnv* env, jobject javaThis, static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaThis, jboolean publicOnly) { ScopedFastNativeObjectAccess soa(env); - StackHandleScope<3> hs(soa.Self()); - auto* klass = DecodeClass(soa, javaThis); + StackHandleScope<2> hs(soa.Self()); + Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis)); size_t num_methods = 0; for (auto& m : klass->GetVirtualMethods(sizeof(void*))) { auto modifiers = m.GetAccessFlags(); @@ -432,7 +432,7 @@ static jobjectArray Class_getDeclaredMethodsUnchecked(JNIEnv* env, jobject javaT static jobject Class_newInstance(JNIEnv* env, jobject javaThis) { ScopedFastNativeObjectAccess soa(env); StackHandleScope<4> hs(soa.Self()); - auto klass = hs.NewHandle(DecodeClass(soa, javaThis)); + Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis)); if (UNLIKELY(klass->GetPrimitiveType() != 0 || klass->IsInterface() || klass->IsArrayClass() || klass->IsAbstract())) { soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;", diff --git a/runtime/native/java_lang_Runtime.cc b/runtime/native/java_lang_Runtime.cc index bd043a84c6..abac8153b3 100644 --- a/runtime/native/java_lang_Runtime.cc +++ b/runtime/native/java_lang_Runtime.cc @@ -52,52 +52,29 @@ NO_RETURN static void Runtime_nativeExit(JNIEnv*, jclass, jint status) { exit(status); } -static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) { +static void SetLdLibraryPath(JNIEnv* env, jstring javaLdLibraryPathJstr) { #ifdef HAVE_ANDROID_OS - std::stringstream ss; if (javaLdLibraryPathJstr != nullptr) { - ScopedUtfChars javaLdLibraryPath(env, javaLdLibraryPathJstr); - if (javaLdLibraryPath.c_str() != nullptr) { - ss << javaLdLibraryPath.c_str(); + ScopedUtfChars ldLibraryPath(env, javaLdLibraryPathJstr); + if (ldLibraryPath.c_str() != nullptr) { + android_update_LD_LIBRARY_PATH(ldLibraryPath.c_str()); } } - if (javaDexPathJstr != nullptr) { - ScopedUtfChars javaDexPath(env, javaDexPathJstr); - if (javaDexPath.c_str() != nullptr) { - std::vector<std::string> dexPathVector; - Split(javaDexPath.c_str(), ':', &dexPathVector); - - for (auto abi : art::Runtime::Current()->GetCpuAbilist()) { - for (auto zip_path : dexPathVector) { - // Native libraries live under lib/<abi>/ inside .apk file. - ss << ":" << zip_path << "!" << "lib/" << abi; - } - } - } - } - - std::string ldLibraryPathStr = ss.str(); - const char* ldLibraryPath = ldLibraryPathStr.c_str(); - if (*ldLibraryPath == ':') { - ++ldLibraryPath; - } - - android_update_LD_LIBRARY_PATH(ldLibraryPath); #else LOG(WARNING) << "android_update_LD_LIBRARY_PATH not found; .so dependencies will not work!"; - UNUSED(javaLdLibraryPathJstr, javaDexPathJstr, env); + UNUSED(javaLdLibraryPathJstr, env); #endif } static jstring Runtime_nativeLoad(JNIEnv* env, jclass, jstring javaFilename, jobject javaLoader, - jstring javaLdLibraryPathJstr, jstring javaDexPathJstr) { + jstring javaLdLibraryPathJstr) { ScopedUtfChars filename(env, javaFilename); if (filename.c_str() == nullptr) { return nullptr; } - SetLdLibraryPath(env, javaLdLibraryPathJstr, javaDexPathJstr); + SetLdLibraryPath(env, javaLdLibraryPathJstr); std::string error_msg; { @@ -130,7 +107,7 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(Runtime, gc, "()V"), NATIVE_METHOD(Runtime, maxMemory, "!()J"), NATIVE_METHOD(Runtime, nativeExit, "(I)V"), - NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"), + NATIVE_METHOD(Runtime, nativeLoad, "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"), NATIVE_METHOD(Runtime, totalMemory, "!()J"), }; diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc index b96ddc8102..9ce4a02f1b 100644 --- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc +++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc @@ -38,7 +38,7 @@ static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv* env, jclass) { } static jboolean DdmVmInternal_getRecentAllocationStatus(JNIEnv*, jclass) { - return Dbg::IsAllocTrackingEnabled(); + return Runtime::Current()->GetHeap()->IsAllocTrackingEnabled(); } /* diff --git a/runtime/oat.h b/runtime/oat.h index 604e16171d..000ae8ed5d 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '6', '3', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '6', '4', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index 6fda790697..ad5741e475 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -21,6 +21,9 @@ #include <unistd.h> #include <cstdlib> +#ifndef __APPLE__ +#include <link.h> // for dl_iterate_phdr. +#endif #include <sstream> // dlopen_ext support from bionic. @@ -35,6 +38,7 @@ #include "elf_file.h" #include "elf_utils.h" #include "oat.h" +#include "mem_map.h" #include "mirror/class.h" #include "mirror/object-inl.h" #include "os.h" @@ -45,13 +49,13 @@ namespace art { // Whether OatFile::Open will try DlOpen() first. Fallback is our own ELF loader. -static constexpr bool kUseDlopen = false; +static constexpr bool kUseDlopen = true; // Whether OatFile::Open will try DlOpen() on the host. On the host we're not linking against // bionic, so cannot take advantage of the support for changed semantics (loading the same soname // multiple times). However, if/when we switch the above, we likely want to switch this, too, // to get test coverage of the code paths. -static constexpr bool kUseDlopenOnHost = false; +static constexpr bool kUseDlopenOnHost = true; // For debugging, Open will print DlOpen error message if set to true. static constexpr bool kPrintDlOpenErrorMessage = false; @@ -210,6 +214,15 @@ OatFile::~OatFile() { bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base, const char* abs_dex_location, std::string* error_msg) { +#ifdef __APPLE__ + // The dl_iterate_phdr syscall is missing. There is similar API on OSX, + // but let's fallback to the custom loading code for the time being. + UNUSED(elf_filename); + UNUSED(requested_base); + UNUSED(abs_dex_location); + UNUSED(error_msg); + return false; +#else std::unique_ptr<char> absolute_path(realpath(elf_filename.c_str(), nullptr)); if (absolute_path == nullptr) { *error_msg = StringPrintf("Failed to find absolute path for '%s'", elf_filename.c_str()); @@ -217,7 +230,7 @@ bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base, } #ifdef HAVE_ANDROID_OS android_dlextinfo extinfo; - extinfo.flags = ANDROID_DLEXT_FORCE_LOAD; + extinfo.flags = ANDROID_DLEXT_FORCE_LOAD | ANDROID_DLEXT_FORCE_FIXED_VADDR; dlopen_handle_ = android_dlopen_ext(absolute_path.get(), RTLD_NOW, &extinfo); #else dlopen_handle_ = dlopen(absolute_path.get(), RTLD_NOW); @@ -264,7 +277,49 @@ bool OatFile::Dlopen(const std::string& elf_filename, uint8_t* requested_base, bss_end_ += sizeof(uint32_t); } + // Ask the linker where it mmaped the file and notify our mmap wrapper of the regions. + struct dl_iterate_context { + static int callback(struct dl_phdr_info *info, size_t /* size */, void *data) { + auto* context = reinterpret_cast<dl_iterate_context*>(data); + // See whether this callback corresponds to the file which we have just loaded. + bool contains_begin = false; + for (int i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type == PT_LOAD) { + uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr + + info->dlpi_phdr[i].p_vaddr); + size_t memsz = info->dlpi_phdr[i].p_memsz; + if (vaddr <= context->begin_ && context->begin_ < vaddr + memsz) { + contains_begin = true; + break; + } + } + } + // Add dummy mmaps for this file. + if (contains_begin) { + for (int i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type == PT_LOAD) { + uint8_t* vaddr = reinterpret_cast<uint8_t*>(info->dlpi_addr + + info->dlpi_phdr[i].p_vaddr); + size_t memsz = info->dlpi_phdr[i].p_memsz; + MemMap* mmap = MemMap::MapDummy(info->dlpi_name, vaddr, memsz); + context->dlopen_mmaps_->push_back(std::unique_ptr<MemMap>(mmap)); + } + } + return 1; // Stop iteration and return 1 from dl_iterate_phdr. + } + return 0; // Continue iteration and return 0 from dl_iterate_phdr when finished. + } + const uint8_t* const begin_; + std::vector<std::unique_ptr<MemMap>>* const dlopen_mmaps_; + } context = { begin_, &dlopen_mmaps_ }; + + if (dl_iterate_phdr(dl_iterate_context::callback, &context) == 0) { + PrintFileToLog("/proc/self/maps", LogSeverity::WARNING); + LOG(ERROR) << "File " << elf_filename << " loaded with dlopen but can not find its mmaps."; + } + return Setup(abs_dex_location, error_msg); +#endif // __APPLE__ } bool OatFile::ElfFileOpen(File* file, uint8_t* requested_base, uint8_t* oat_file_begin, diff --git a/runtime/oat_file.h b/runtime/oat_file.h index c58b0294c9..1a782deb82 100644 --- a/runtime/oat_file.h +++ b/runtime/oat_file.h @@ -321,6 +321,9 @@ class OatFile FINAL { // dlopen handle during runtime. void* dlopen_handle_; + // Dummy memory map objects corresponding to the regions mapped by dlopen. + std::vector<std::unique_ptr<MemMap>> dlopen_mmaps_; + // Owning storage for the OatDexFile objects. std::vector<const OatDexFile*> oat_dex_files_storage_; diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index 8d84c35bd9..701481392f 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -74,7 +74,8 @@ inline MirrorType* ReadBarrier::Barrier( } template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup> -inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) { +inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root, + GcRootSource* gc_root_source) { MirrorType* ref = *root; const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; if (with_read_barrier && kUseBakerReadBarrier) { @@ -87,7 +88,7 @@ inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) { if (Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking()) { ref = reinterpret_cast<MirrorType*>(Mark(ref)); } - AssertToSpaceInvariant(nullptr, MemberOffset(0), ref); + AssertToSpaceInvariant(gc_root_source, ref); return ref; } else if (with_read_barrier && kUseBrooksReadBarrier) { // To be implemented. @@ -105,7 +106,7 @@ inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) { Atomic<mirror::Object*>* atomic_root = reinterpret_cast<Atomic<mirror::Object*>*>(root); atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, ref); } - AssertToSpaceInvariant(nullptr, MemberOffset(0), ref); + AssertToSpaceInvariant(gc_root_source, ref); return ref; } else { return ref; @@ -114,7 +115,8 @@ inline MirrorType* ReadBarrier::BarrierForRoot(MirrorType** root) { // TODO: Reduce copy paste template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kMaybeDuringStartup> -inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root) { +inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<MirrorType>* root, + GcRootSource* gc_root_source) { MirrorType* ref = root->AsMirrorPtr(); const bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; if (with_read_barrier && kUseBakerReadBarrier) { @@ -127,7 +129,7 @@ inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<Mirro if (Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking()) { ref = reinterpret_cast<MirrorType*>(Mark(ref)); } - AssertToSpaceInvariant(nullptr, MemberOffset(0), ref); + AssertToSpaceInvariant(gc_root_source, ref); return ref; } else if (with_read_barrier && kUseBrooksReadBarrier) { // To be implemented. @@ -147,7 +149,7 @@ inline MirrorType* ReadBarrier::BarrierForRoot(mirror::CompressedReference<Mirro reinterpret_cast<Atomic<mirror::CompressedReference<MirrorType>>*>(root); atomic_root->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref); } - AssertToSpaceInvariant(nullptr, MemberOffset(0), ref); + AssertToSpaceInvariant(gc_root_source, ref); return ref; } else { return ref; @@ -183,6 +185,17 @@ inline void ReadBarrier::AssertToSpaceInvariant(mirror::Object* obj, MemberOffse } } +inline void ReadBarrier::AssertToSpaceInvariant(GcRootSource* gc_root_source, + mirror::Object* ref) { + if (kEnableToSpaceInvariantChecks || kIsDebugBuild) { + if (ref == nullptr || IsDuringStartup()) { + return; + } + Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()-> + AssertToSpaceInvariant(gc_root_source, ref); + } +} + inline mirror::Object* ReadBarrier::Mark(mirror::Object* obj) { return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->Mark(obj); } diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h index aa72e97328..55cef6826a 100644 --- a/runtime/read_barrier.h +++ b/runtime/read_barrier.h @@ -19,6 +19,7 @@ #include "base/mutex.h" #include "base/macros.h" +#include "gc_root.h" #include "jni.h" #include "mirror/object_reference.h" #include "offsets.h" @@ -54,14 +55,16 @@ class ReadBarrier { // whereas the return value must be an updated reference. template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kMaybeDuringStartup = false> - ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root) + ALWAYS_INLINE static MirrorType* BarrierForRoot(MirrorType** root, + GcRootSource* gc_root_source = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // It's up to the implementation whether the given root gets updated // whereas the return value must be an updated reference. template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kMaybeDuringStartup = false> - ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root) + ALWAYS_INLINE static MirrorType* BarrierForRoot(mirror::CompressedReference<MirrorType>* root, + GcRootSource* gc_root_source = nullptr) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static bool IsDuringStartup(); @@ -75,6 +78,9 @@ class ReadBarrier { static void AssertToSpaceInvariant(mirror::Object* obj, MemberOffset offset, mirror::Object* ref) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + // With GcRootSource. + static void AssertToSpaceInvariant(GcRootSource* gc_root_source, mirror::Object* ref) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); static mirror::Object* Mark(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 103332afa3..7a78928cee 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -403,6 +403,7 @@ void Runtime::SweepSystemWeaks(IsMarkedCallback* visitor, void* arg) { GetInternTable()->SweepInternTableWeaks(visitor, arg); GetMonitorList()->SweepMonitorList(visitor, arg); GetJavaVM()->SweepJniWeakGlobals(visitor, arg); + GetHeap()->SweepAllocationRecords(visitor, arg); } bool Runtime::Create(const RuntimeOptions& options, bool ignore_unrecognized) { @@ -646,6 +647,10 @@ void Runtime::DidForkFromZygote(JNIEnv* env, NativeBridgeAction action, const ch // Create the thread pools. heap_->CreateThreadPool(); + // Reset the gc performance data at zygote fork so that the GCs + // before fork aren't attributed to an app. + heap_->ResetGcPerformanceInfo(); + if (jit_.get() == nullptr && jit_options_->UseJIT()) { // Create the JIT if the flag is set and we haven't already create it (happens for run-tests). CreateJit(); @@ -1473,6 +1478,11 @@ void Runtime::DisallowNewSystemWeaks() { monitor_list_->DisallowNewMonitors(); intern_table_->DisallowNewInterns(); java_vm_->DisallowNewWeakGlobals(); + // TODO: add a similar call for heap.allocation_records_, otherwise some of the newly allocated + // objects that are not marked might be swept from the records, making the records incomplete. + // It is safe for now since the only effect is that those objects do not have allocation records. + // The number of such objects should be small, and current allocation tracker cannot collect + // allocation records for all objects anyway. } void Runtime::AllowNewSystemWeaks() { diff --git a/runtime/stack.cc b/runtime/stack.cc index 5aeca98a88..11c94dbbb8 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -126,10 +126,7 @@ ArtMethod* StackVisitor::GetMethod() const { if (IsInInlinedFrame()) { size_t depth_in_stack_map = current_inlining_depth_ - 1; InlineInfo inline_info = GetCurrentInlineInfo(); - uint32_t method_index = inline_info.GetMethodIndexAtDepth(depth_in_stack_map); - InvokeType invoke_type = - static_cast<InvokeType>(inline_info.GetInvokeTypeAtDepth(depth_in_stack_map)); - return GetResolvedMethod(*GetCurrentQuickFrame(), method_index, invoke_type); + return GetResolvedMethod(*GetCurrentQuickFrame(), inline_info, depth_in_stack_map); } else { return *cur_quick_frame_; } diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc index f8fc2a9e90..741cd906a8 100644 --- a/runtime/stack_map.cc +++ b/runtime/stack_map.cc @@ -19,6 +19,7 @@ #include <stdint.h> #include "indenter.h" +#include "invoke_type.h" namespace art { @@ -207,6 +208,13 @@ void StackMap::Dump(std::ostream& os, *this, encoding, number_of_dex_registers); dex_register_map.Dump(os, code_info, number_of_dex_registers); } + if (HasInlineInfo(encoding)) { + InlineInfo inline_info = code_info.GetInlineInfoOf(*this, encoding); + // We do not know the length of the dex register maps of inlined frames + // at this level, so we just pass null to `InlineInfo::Dump` to tell + // it not to look at these maps. + inline_info.Dump(os, code_info, nullptr); + } } void InlineInfo::Dump(std::ostream& os, @@ -220,9 +228,11 @@ void InlineInfo::Dump(std::ostream& os, indented_os << " At depth " << i << std::hex << " (dex_pc=0x" << GetDexPcAtDepth(i) - << ", method_index=0x" << GetMethodIndexAtDepth(i) + << std::dec + << ", method_index=" << GetMethodIndexAtDepth(i) + << ", invoke_type=" << static_cast<InvokeType>(GetInvokeTypeAtDepth(i)) << ")\n"; - if (HasDexRegisterMapAtDepth(i)) { + if (HasDexRegisterMapAtDepth(i) && (number_of_dex_registers != nullptr)) { StackMapEncoding encoding = code_info.ExtractEncoding(); DexRegisterMap dex_register_map = code_info.GetDexRegisterMapAtDepth(i, *this, encoding, number_of_dex_registers[i]); diff --git a/runtime/thread.cc b/runtime/thread.cc index fe98b0a98a..fe8b0d8c60 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2578,12 +2578,11 @@ void Thread::SetDebugInvokeReq(DebugInvokeReq* req) { } void Thread::ClearDebugInvokeReq() { - CHECK(Dbg::IsDebuggerActive()); CHECK(GetInvokeReq() != nullptr) << "Debug invoke req not active in thread " << *this; CHECK(Thread::Current() == this) << "Debug invoke must be finished by the thread itself"; - // We do not own the DebugInvokeReq* so we must not delete it, it is the responsibility of - // the owner (the JDWP thread). + DebugInvokeReq* req = tlsPtr_.debug_invoke_req; tlsPtr_.debug_invoke_req = nullptr; + delete req; } void Thread::PushVerifier(verifier::MethodVerifier* verifier) { diff --git a/runtime/thread.h b/runtime/thread.h index 9311bef48a..0e71c08b07 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -781,15 +781,14 @@ class Thread { void DeactivateSingleStepControl(); // Sets debug invoke request for debugging. When the thread is resumed, - // it executes the method described by this request then suspends itself. - // The thread does not take ownership of the given DebugInvokeReq*, it is - // owned by the JDWP thread which is waiting for the execution of the - // method. + // it executes the method described by this request then sends the reply + // before suspending itself. The thread takes the ownership of the given + // DebugInvokeReq*. It is deleted by a call to ClearDebugInvokeReq. void SetDebugInvokeReq(DebugInvokeReq* req); // Clears debug invoke request for debugging. When the thread completes - // method invocation, it clears its debug invoke request, signals the - // JDWP thread and suspends itself. + // method invocation, it deletes its debug invoke request and suspends + // itself. void ClearDebugInvokeReq(); // Returns the fake exception used to activate deoptimization. diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index af9ba6848b..b697b43a77 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -875,31 +875,36 @@ void ThreadList::SuspendSelfForDebugger() { // The debugger thread must not suspend itself due to debugger activity! Thread* debug_thread = Dbg::GetDebugThread(); - CHECK(debug_thread != nullptr); CHECK(self != debug_thread); CHECK_NE(self->GetState(), kRunnable); Locks::mutator_lock_->AssertNotHeld(self); - { + // The debugger may have detached while we were executing an invoke request. In that case, we + // must not suspend ourself. + DebugInvokeReq* pReq = self->GetInvokeReq(); + const bool skip_thread_suspension = (pReq != nullptr && !Dbg::IsDebuggerActive()); + if (!skip_thread_suspension) { // Collisions with other suspends aren't really interesting. We want // to ensure that we're the only one fiddling with the suspend count // though. MutexLock mu(self, *Locks::thread_suspend_count_lock_); self->ModifySuspendCount(self, +1, true); CHECK_GT(self->GetSuspendCount(), 0); - } - VLOG(threads) << *self << " self-suspending (debugger)"; + VLOG(threads) << *self << " self-suspending (debugger)"; + } else { + // We must no longer be subject to debugger suspension. + MutexLock mu(self, *Locks::thread_suspend_count_lock_); + CHECK_EQ(self->GetDebugSuspendCount(), 0) << "Debugger detached without resuming us"; - // Tell JDWP we've completed invocation and are ready to suspend. - DebugInvokeReq* const pReq = self->GetInvokeReq(); + VLOG(threads) << *self << " not self-suspending because debugger detached during invoke"; + } + + // If the debugger requested an invoke, we need to send the reply and clear the request. if (pReq != nullptr) { - // Clear debug invoke request before signaling. + Dbg::FinishInvokeMethod(pReq); self->ClearDebugInvokeReq(); - - VLOG(jdwp) << "invoke complete, signaling"; - MutexLock mu(self, pReq->lock); - pReq->cond.Signal(self); + pReq = nullptr; // object has been deleted, clear it for safety. } // Tell JDWP that we've completed suspension. The JDWP thread can't diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 33b09355c5..91e63f5b11 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -3394,11 +3394,27 @@ ArtMethod* MethodVerifier::VerifyInvocationArgsFromIterator( << " but expected " << reg_type; return nullptr; } - } else if (!work_line_->VerifyRegisterType(this, get_reg, reg_type)) { - // Continue on soft failures. We need to find possible hard failures to avoid problems in the - // compiler. - if (have_pending_hard_failure_) { - return nullptr; + } else { + if (!work_line_->VerifyRegisterType(this, get_reg, reg_type)) { + // Continue on soft failures. We need to find possible hard failures to avoid problems in + // the compiler. + if (have_pending_hard_failure_) { + return nullptr; + } + } else if (reg_type.IsLongOrDoubleTypes()) { + // Check that registers are consecutive (for non-range invokes). Invokes are the only + // instructions not specifying register pairs by the first component, but require them + // nonetheless. Only check when there's an actual register in the parameters. If there's + // none, this will fail below. + if (!is_range && sig_registers + 1 < expected_args) { + uint32_t second_reg = arg[sig_registers + 1]; + if (second_reg != get_reg + 1) { + Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation, long or double parameter " + "at index " << sig_registers << " is not a pair: " << get_reg << " + " + << second_reg << "."; + return nullptr; + } + } } } sig_registers += reg_type.IsLongOrDoubleTypes() ? 2 : 1; diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc index e7857a0602..0c7cce908c 100644 --- a/runtime/well_known_classes.cc +++ b/runtime/well_known_classes.cc @@ -369,7 +369,7 @@ void WellKnownClasses::Init(JNIEnv* env) { void WellKnownClasses::LateInit(JNIEnv* env) { ScopedLocalRef<jclass> java_lang_Runtime(env, env->FindClass("java/lang/Runtime")); - java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"); + java_lang_Runtime_nativeLoad = CacheMethod(env, java_lang_Runtime.get(), true, "nativeLoad", "(Ljava/lang/String;Ljava/lang/ClassLoader;Ljava/lang/String;)Ljava/lang/String;"); } mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) { diff --git a/test/004-UnsafeTest/src/Main.java b/test/004-UnsafeTest/src/Main.java index 708f61f028..2554b76f73 100644 --- a/test/004-UnsafeTest/src/Main.java +++ b/test/004-UnsafeTest/src/Main.java @@ -36,6 +36,13 @@ public class Main { } } + private static void check(Object actual, Object expected, String msg) { + if (actual != expected) { + System.out.println(msg + " : " + actual + " != " + expected); + System.exit(1); + } + } + private static Unsafe getUnsafe() throws Exception { Class<?> unsafeClass = Class.forName("sun.misc.Unsafe"); Field f = unsafeClass.getDeclaredField("theUnsafe"); @@ -80,6 +87,7 @@ public class Main { "Unsafe.arrayIndexScale(Object[])"); TestClass t = new TestClass(); + int intValue = 12345678; Field intField = TestClass.class.getDeclaredField("intVar"); long intOffset = unsafe.objectFieldOffset(intField); @@ -87,14 +95,23 @@ public class Main { unsafe.putInt(t, intOffset, intValue); check(t.intVar, intValue, "Unsafe.putInt(Object, long, int)"); check(unsafe.getInt(t, intOffset), intValue, "Unsafe.getInt(Object, long)"); + + long longValue = 1234567887654321L; Field longField = TestClass.class.getDeclaredField("longVar"); long longOffset = unsafe.objectFieldOffset(longField); - long longValue = 1234567887654321L; check(unsafe.getLong(t, longOffset), 0, "Unsafe.getLong(Object, long) - initial"); unsafe.putLong(t, longOffset, longValue); check(t.longVar, longValue, "Unsafe.putLong(Object, long, long)"); check(unsafe.getLong(t, longOffset), longValue, "Unsafe.getLong(Object, long)"); + Object objectValue = new Object(); + Field objectField = TestClass.class.getDeclaredField("objectVar"); + long objectOffset = unsafe.objectFieldOffset(objectField); + check(unsafe.getObject(t, objectOffset), null, "Unsafe.getObject(Object, long) - initial"); + unsafe.putObject(t, objectOffset, objectValue); + check(t.objectVar, objectValue, "Unsafe.putObject(Object, long, Object)"); + check(unsafe.getObject(t, objectOffset), objectValue, "Unsafe.getObject(Object, long)"); + if (unsafe.compareAndSwapInt(t, intOffset, 0, 1)) { System.out.println("Unexpectedly succeeding compareAndSwap..."); } @@ -119,6 +136,7 @@ public class Main { private static class TestClass { public int intVar = 0; public long longVar = 0; + public Object objectVar = null; } private static native int vmArrayBaseOffset(Class clazz); diff --git a/test/098-ddmc/src/Main.java b/test/098-ddmc/src/Main.java index f41ff2a94a..4914ba2289 100644 --- a/test/098-ddmc/src/Main.java +++ b/test/098-ddmc/src/Main.java @@ -43,14 +43,24 @@ public class Main { System.out.println("Confirm when we overflow, we don't roll over to zero. b/17392248"); final int overflowAllocations = 64 * 1024; // Won't fit in unsigned 16-bit value. + // TODO: Temporary fix. Keep the new objects live so they are not garbage collected. + // This will cause OOM exception for GC stress tests. The root cause is changed behaviour of + // getRecentAllocations(). Working on restoring its old behaviour. b/20037135 + Object[] objects = new Object[overflowAllocations]; for (int i = 0; i < overflowAllocations; i++) { - new Object(); + objects[i] = new Object(); } Allocations after = new Allocations(DdmVmInternal.getRecentAllocations()); System.out.println("before < overflowAllocations=" + (before.numberOfEntries < overflowAllocations)); System.out.println("after > before=" + (after.numberOfEntries > before.numberOfEntries)); System.out.println("after.numberOfEntries=" + after.numberOfEntries); + // TODO: Temporary fix as above. b/20037135 + objects = null; + Runtime.getRuntime().gc(); + final int fillerStrings = 16 * 1024; + String[] strings = new String[fillerStrings]; + System.out.println("Disable and confirm back to empty"); DdmVmInternal.enableRecentAllocations(false); System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus()); @@ -66,8 +76,8 @@ public class Main { System.out.println("Confirm we can reenable twice in a row without losing allocations"); DdmVmInternal.enableRecentAllocations(true); System.out.println("status=" + DdmVmInternal.getRecentAllocationStatus()); - for (int i = 0; i < 16 * 1024; i++) { - new String("fnord"); + for (int i = 0; i < fillerStrings; i++) { + strings[i] = new String("fnord"); } Allocations first = new Allocations(DdmVmInternal.getRecentAllocations()); DdmVmInternal.enableRecentAllocations(true); diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc index b2d7e55214..59722ad00d 100644 --- a/test/137-cfi/cfi.cc +++ b/test/137-cfi/cfi.cc @@ -29,6 +29,9 @@ #include "base/logging.h" #include "base/macros.h" +#include "gc/heap.h" +#include "gc/space/image_space.h" +#include "oat_file.h" #include "utils.h" namespace art { @@ -73,18 +76,45 @@ static bool CheckStack(Backtrace* bt, const std::vector<std::string>& seq) { } } + printf("Can not find %s in backtrace:\n", seq[cur_search_index].c_str()); + for (Backtrace::const_iterator it = bt->begin(); it != bt->end(); ++it) { + if (BacktraceMap::IsValid(it->map)) { + printf(" %s\n", it->func_name.c_str()); + } + } + return false; } #endif +// Currently we have to fall back to our own loader for the boot image when it's compiled PIC +// because its base is zero. Thus in-process unwinding through it won't work. This is a helper +// detecting this. +#if __linux__ +static bool IsPicImage() { + gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); + CHECK(image_space != nullptr); // We should be running with an image. + const OatFile* oat_file = image_space->GetOatFile(); + CHECK(oat_file != nullptr); // We should have an oat file to go with the image. + return oat_file->IsPic(); +} +#endif + extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject, jint, jboolean) { #if __linux__ + if (IsPicImage()) { + LOG(INFO) << "Image is pic, in-process unwinding check bypassed."; + return JNI_TRUE; + } + // TODO: What to do on Valgrind? std::unique_ptr<Backtrace> bt(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, GetTid())); if (!bt->Unwind(0, nullptr)) { + printf("Can not unwind in process.\n"); return JNI_FALSE; } else if (bt->NumFrames() == 0) { + printf("No frames for unwind in process.\n"); return JNI_FALSE; } @@ -94,6 +124,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindInProcess(JNIEnv*, jobject std::vector<std::string> seq = { "Java_Main_unwindInProcess", // This function. "boolean Main.unwindInProcess(int, boolean)", // The corresponding Java native method frame. + "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)", // Framework method. "void Main.main(java.lang.String[])" // The Java entry method. }; @@ -155,8 +186,9 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj if (ptrace(PTRACE_ATTACH, pid, 0, 0)) { // Were not able to attach, bad. + printf("Failed to attach to other process.\n"); PLOG(ERROR) << "Failed to attach."; - kill(pid, SIGCONT); + kill(pid, SIGKILL); return JNI_FALSE; } @@ -172,8 +204,10 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj std::unique_ptr<Backtrace> bt(Backtrace::Create(pid, BACKTRACE_CURRENT_THREAD)); bool result = true; if (!bt->Unwind(0, nullptr)) { + printf("Can not unwind other process.\n"); result = false; } else if (bt->NumFrames() == 0) { + printf("No frames for unwind of other process.\n"); result = false; } @@ -185,6 +219,7 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj // Note: For some reason, the name isn't // resolved, so don't look for it right now. "boolean Main.sleep(int, boolean, double)", // The corresponding Java native method frame. + "int java.util.Arrays.binarySearch(java.lang.Object[], int, int, java.lang.Object, java.util.Comparator)", // Framework method. "void Main.main(java.lang.String[])" // The Java entry method. }; @@ -195,8 +230,8 @@ extern "C" JNIEXPORT jboolean JNICALL Java_Main_unwindOtherProcess(JNIEnv*, jobj PLOG(ERROR) << "Detach failed"; } - // Continue the process so we can kill it on the Java side. - kill(pid, SIGCONT); + // Kill the other process once we are done with it. + kill(pid, SIGKILL); return result ? JNI_TRUE : JNI_FALSE; #else diff --git a/test/137-cfi/run b/test/137-cfi/run new file mode 100755 index 0000000000..78cf2aaf8d --- /dev/null +++ b/test/137-cfi/run @@ -0,0 +1,21 @@ +#!/bin/bash +# +# Copyright (C) 2008 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Temporarily disable address space layout randomization (ASLR). +# This is need on host so that the linker loads core.oat at fixed address. +export LD_USE_LOAD_BIAS=1 + +exec ${RUN} "$@" diff --git a/test/137-cfi/src/Main.java b/test/137-cfi/src/Main.java index e184e66e6f..6cd187a033 100644 --- a/test/137-cfi/src/Main.java +++ b/test/137-cfi/src/Main.java @@ -20,11 +20,13 @@ import java.io.InputStream; import java.io.OutputStream; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.Comparator; -public class Main { +public class Main implements Comparator<Main> { // Whether to test local unwinding. Libunwind uses linker info to find executables. As we do // not dlopen at the moment, this doesn't work, so keep it off for now. - public final static boolean TEST_LOCAL_UNWINDING = false; + public final static boolean TEST_LOCAL_UNWINDING = true; // Unwinding another process, modelling debuggerd. This doesn't use the linker, so should work // no matter whether we're using dlopen or not. @@ -32,6 +34,8 @@ public class Main { private boolean secondary; + private boolean passed; + public Main(boolean secondary) { this.secondary = secondary; } @@ -60,13 +64,13 @@ public class Main { } private void runSecondary() { - foo(true); + foo(); throw new RuntimeException("Didn't expect to get back..."); } private void runPrimary() { // First do the in-process unwinding. - if (TEST_LOCAL_UNWINDING && !foo(false)) { + if (TEST_LOCAL_UNWINDING && !foo()) { System.out.println("Unwinding self failed."); } @@ -99,7 +103,7 @@ public class Main { System.out.println("Unwinding other process failed."); } } finally { - // Kill the forked process. + // Kill the forked process if it is not already dead. p.destroy(); } } @@ -134,8 +138,19 @@ public class Main { } } - public boolean foo(boolean b) { - return bar(b); + public boolean foo() { + // Call bar via Arrays.binarySearch. + // This tests that we can unwind from framework code. + Main[] array = { this, this, this }; + Arrays.binarySearch(array, 0, 3, this /* value */, this /* comparator */); + return passed; + } + + public int compare(Main lhs, Main rhs) { + passed = bar(secondary); + // Returning "equal" ensures that we terminate search + // after first item and thus call bar() only once. + return 0; } public boolean bar(boolean b) { diff --git a/test/444-checker-nce/src/Main.java b/test/444-checker-nce/src/Main.java index 6ac0cad7e8..32122e4dcd 100644 --- a/test/444-checker-nce/src/Main.java +++ b/test/444-checker-nce/src/Main.java @@ -27,37 +27,37 @@ public class Main { return m.g(); } - /// CHECK-START: Main Main.thisTest() instruction_simplifier (before) + /// CHECK-START: Main Main.thisTest() ssa_builder (after) /// CHECK: NullCheck /// CHECK: InvokeStaticOrDirect - /// CHECK-START: Main Main.thisTest() instruction_simplifier (after) + /// CHECK-START: Main Main.thisTest() instruction_simplifier_after_types (after) /// CHECK-NOT: NullCheck /// CHECK: InvokeStaticOrDirect public Main thisTest() { return g(); } - /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (before) + /// CHECK-START: Main Main.newInstanceRemoveTest() ssa_builder (after) /// CHECK: NewInstance /// CHECK: NullCheck /// CHECK: InvokeStaticOrDirect /// CHECK: NullCheck /// CHECK: InvokeStaticOrDirect - /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier (after) + /// CHECK-START: Main Main.newInstanceRemoveTest() instruction_simplifier_after_types (after) /// CHECK-NOT: NullCheck public Main newInstanceRemoveTest() { Main m = new Main(); return m.g(); } - /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (before) + /// CHECK-START: Main Main.newArrayRemoveTest() ssa_builder (after) /// CHECK: NewArray /// CHECK: NullCheck /// CHECK: ArrayGet - /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier (after) + /// CHECK-START: Main Main.newArrayRemoveTest() instruction_simplifier_after_types (after) /// CHECK: NewArray /// CHECK-NOT: NullCheck /// CHECK: ArrayGet @@ -178,10 +178,10 @@ public class Main { return n.g(); } - /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (before) + /// CHECK-START: Main Main.scopeRemoveTest(int, Main) ssa_builder (after) /// CHECK: NullCheck - /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier (after) + /// CHECK-START: Main Main.scopeRemoveTest(int, Main) instruction_simplifier_after_types (after) /// CHECK-NOT: NullCheck public Main scopeRemoveTest(int count, Main a) { Main m = null; diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java index 8960df896b..ed6fc1ee2b 100644 --- a/test/449-checker-bce/src/Main.java +++ b/test/449-checker-bce/src/Main.java @@ -617,15 +617,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo1(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo1(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -646,15 +652,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo2(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo2(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -675,14 +687,20 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo3(int[], int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo3(int[] array, int end) { // Two HDeoptimize will be added. One for end < array.length, @@ -694,6 +712,7 @@ public class Main { } } + /// CHECK-START: void Main.foo4(int[], int) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -701,14 +720,20 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo4(int[], int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo4(int[] array, int end) { // Two HDeoptimize will be added. One for end <= array.length, @@ -734,8 +759,6 @@ public class Main { /// CHECK-START: void Main.foo5(int[], int) BCE (after) /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet @@ -743,6 +766,15 @@ public class Main { /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + // array.length is defined before the loop header so no phi is needed. + /// CHECK-NOT: Phi + /// CHECK: Goto void foo5(int[] array, int end) { // Bounds check in this loop can be eliminated without deoptimization. @@ -774,10 +806,6 @@ public class Main { /// CHECK: ArraySet /// CHECK-START: void Main.foo6(int[], int, int) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet @@ -791,6 +819,17 @@ public class Main { /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArraySet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto + /// CHECK-NOT: Deoptimize void foo6(int[] array, int start, int end) { // Three HDeoptimize will be added. One for @@ -810,15 +849,21 @@ public class Main { /// CHECK: ArrayGet /// CHECK-START: void Main.foo7(int[], int, int, boolean) BCE (after) - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK: Deoptimize - /// CHECK-NOT: Deoptimize /// CHECK: Phi /// CHECK: BoundsCheck /// CHECK: ArrayGet /// CHECK-NOT: BoundsCheck /// CHECK: ArrayGet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto void foo7(int[] array, int start, int end, boolean lowEnd) { // Three HDeoptimize will be added. One for @@ -837,6 +882,73 @@ public class Main { } + /// CHECK-START: void Main.foo8(int[][], int, int) BCE (before) + /// CHECK: BoundsCheck + /// CHECK: ArrayGet + /// CHECK: BoundsCheck + /// CHECK: ArraySet + + /// CHECK-START: void Main.foo8(int[][], int, int) BCE (after) + /// CHECK: Phi + /// CHECK-NOT: BoundsCheck + /// CHECK: ArrayGet + /// CHECK: Phi + /// CHECK-NOT: BoundsCheck + /// CHECK: ArraySet + // Added blocks for deoptimization. + /// CHECK: If + /// CHECK: Goto + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Goto + /// CHECK: Phi + /// CHECK: Goto + + void foo8(int[][] matrix, int start, int end) { + // Three HDeoptimize will be added for the outer loop. + // start >= 0, end <= matrix.length, and null check on matrix. + // Three HDeoptimize will be added for the inner loop + // start >= 0 (TODO: this may be optimized away), + // end <= row.length, and null check on row. + for (int i = start; i < end; i++) { + int[] row = matrix[i]; + for (int j = start; j < end; j++) { + row[j] = 1; + } + } + } + + + /// CHECK-START: void Main.foo9(int[]) BCE (before) + /// CHECK: NullCheck + /// CHECK: BoundsCheck + /// CHECK: ArrayGet + + /// CHECK-START: void Main.foo9(int[]) BCE (after) + // The loop is guaranteed to be entered. No need to transform the + // loop for loop body entry test. + /// CHECK: Deoptimize + /// CHECK: Deoptimize + /// CHECK-NOT: Deoptimize + /// CHECK: Phi + /// CHECK-NOT: NullCheck + /// CHECK-NOT: BoundsCheck + /// CHECK: ArrayGet + + void foo9(int[] array) { + // Two HDeoptimize will be added. One for + // 10 <= array.length, and one for null check on array. + for (int i = 0 ; i < 10; i++) { + sum += array[i]; + } + } + + /// CHECK-START: void Main.partialLooping(int[], int, int) BCE (before) /// CHECK: BoundsCheck /// CHECK: ArraySet @@ -951,6 +1063,13 @@ public class Main { main.foo6(new int[10], 2, 7); main = new Main(); + int[] array9 = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + main.foo9(array9); + if (main.sum != 45) { + System.out.println("foo9 failed!"); + } + + main = new Main(); int[] array = new int[4]; main.partialLooping(new int[3], 0, 4); if ((array[0] != 1) && (array[1] != 1) && diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java index 4056275d3d..9070627f1c 100644 --- a/test/450-checker-types/src/Main.java +++ b/test/450-checker-types/src/Main.java @@ -364,6 +364,37 @@ public class Main { ((SubclassA)b).$noinline$g(); } + public SubclassA $noinline$getSubclass() { throw new RuntimeException(); } + + /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testArraySimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testArraySimpleRemove() { + Super[] b = new SubclassA[10]; + SubclassA[] c = (SubclassA[])b; + } + + /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testInvokeSimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testInvokeSimpleRemove() { + Super b = $noinline$getSubclass(); + ((SubclassA)b).$noinline$g(); + } + /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (before) + /// CHECK: CheckCast + + /// CHECK-START: void Main.testArrayGetSimpleRemove() instruction_simplifier_after_types (after) + /// CHECK-NOT: CheckCast + public void testArrayGetSimpleRemove() { + Super[] a = new SubclassA[10]; + ((SubclassA)a[0]).$noinline$g(); + } + public static void main(String[] args) { } } diff --git a/test/458-checker-instruction-simplification/src/Main.java b/test/458-checker-instruction-simplification/src/Main.java index ad5fc8ef93..3c3b939aeb 100644 --- a/test/458-checker-instruction-simplification/src/Main.java +++ b/test/458-checker-instruction-simplification/src/Main.java @@ -933,18 +933,18 @@ public class Main { * remove the second. */ - /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (before) + /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (before) /// CHECK-DAG: <<Arg:z\d+>> ParameterValue /// CHECK-DAG: <<NotArg:z\d+>> BooleanNot [<<Arg>>] /// CHECK-DAG: <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>] /// CHECK-DAG: Return [<<NotNotArg>>] - /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (after) + /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after) /// CHECK-DAG: <<Arg:z\d+>> ParameterValue /// CHECK-DAG: BooleanNot [<<Arg>>] /// CHECK-DAG: Return [<<Arg>>] - /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_types (after) + /// CHECK-START: boolean Main.NotNotBool(boolean) instruction_simplifier_after_bce (after) /// CHECK: BooleanNot /// CHECK-NOT: BooleanNot diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java index 5754723d8a..a4280de749 100644 --- a/test/482-checker-loop-back-edge-use/src/Main.java +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -36,8 +36,8 @@ public class Main { } /// CHECK-START: void Main.loop3(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,62)} uses:[58,62] - /// CHECK: Goto liveness:60 + /// CHECK: ParameterValue liveness:4 ranges:{[4,64)} uses:[60,64] + /// CHECK: Goto liveness:62 /// CHECK-START: void Main.loop3(boolean) liveness (after) /// CHECK-NOT: Goto liveness:56 @@ -63,9 +63,9 @@ public class Main { } /// CHECK-START: void Main.loop5(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,52)} uses:[35,44,48,52] - /// CHECK: Goto liveness:46 - /// CHECK: Goto liveness:50 + /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[37,46,50,54] + /// CHECK: Goto liveness:48 + /// CHECK: Goto liveness:52 public static void loop5(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { @@ -76,8 +76,8 @@ public class Main { } /// CHECK-START: void Main.loop6(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,48)} uses:[26,48] - /// CHECK: Goto liveness:46 + /// CHECK: ParameterValue liveness:4 ranges:{[4,50)} uses:[26,50] + /// CHECK: Goto liveness:48 /// CHECK-START: void Main.loop6(boolean) liveness (after) /// CHECK-NOT: Goto liveness:24 @@ -90,9 +90,9 @@ public class Main { } /// CHECK-START: void Main.loop7(boolean) liveness (after) - /// CHECK: ParameterValue liveness:4 ranges:{[4,52)} uses:[34,43,48,52] - /// CHECK: Goto liveness:46 - /// CHECK: Goto liveness:50 + /// CHECK: ParameterValue liveness:4 ranges:{[4,54)} uses:[36,45,50,54] + /// CHECK: Goto liveness:48 + /// CHECK: Goto liveness:52 public static void loop7(boolean incoming) { // 'incoming' must have a use at both back edges. while (Runtime.getRuntime() != null) { @@ -102,9 +102,9 @@ public class Main { } /// CHECK-START: void Main.loop8() liveness (after) - /// CHECK: StaticFieldGet liveness:14 ranges:{[14,46)} uses:[37,42,46] - /// CHECK: Goto liveness:40 - /// CHECK: Goto liveness:44 + /// CHECK: StaticFieldGet liveness:14 ranges:{[14,48)} uses:[39,44,48] + /// CHECK: Goto liveness:42 + /// CHECK: Goto liveness:46 public static void loop8() { // 'incoming' must have a use at both back edges. boolean incoming = field; @@ -114,8 +114,8 @@ public class Main { } /// CHECK-START: void Main.loop9() liveness (after) - /// CHECK: StaticFieldGet liveness:24 ranges:{[24,38)} uses:[33,38] - /// CHECK: Goto liveness:40 + /// CHECK: StaticFieldGet liveness:26 ranges:{[26,40)} uses:[35,40] + /// CHECK: Goto liveness:42 public static void loop9() { while (Runtime.getRuntime() != null) { // 'incoming' must only have a use in the inner loop. diff --git a/test/490-checker-inline/expected.txt b/test/490-checker-inline/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/490-checker-inline/expected.txt diff --git a/test/490-checker-inline/info.txt b/test/490-checker-inline/info.txt new file mode 100644 index 0000000000..0e42d771fe --- /dev/null +++ b/test/490-checker-inline/info.txt @@ -0,0 +1 @@ +Check that we inline virtual and interface calls. diff --git a/test/490-checker-inline/src/Main.java b/test/490-checker-inline/src/Main.java new file mode 100644 index 0000000000..21a01897e2 --- /dev/null +++ b/test/490-checker-inline/src/Main.java @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Itf { + public void invokeInterface(); +} + +public class Main implements Itf { + + public void invokeInterface () { + } + + public void invokeVirtual() { + } + + public static Main createMain() { + return new Main(); + } + + public static Itf createItf() { + return new Main(); + } + + /// CHECK-START: void Main.testMethod() inliner (before) + /// CHECK-DAG: InvokeVirtual + /// CHECK-DAG: InvokeInterface + + /// CHECK-START: void Main.testMethod() inliner (after) + /// CHECK-NOT: Invoke{{.*}} + + public static void testMethod() { + createMain().invokeVirtual(); + createItf().invokeInterface(); + } + + public static void main(String[] args) { + testMethod(); + } +} diff --git a/test/493-checker-inline-invoke-interface/expected.txt b/test/493-checker-inline-invoke-interface/expected.txt new file mode 100644 index 0000000000..93620a6fb5 --- /dev/null +++ b/test/493-checker-inline-invoke-interface/expected.txt @@ -0,0 +1,5 @@ +Hello from clinit +java.lang.Exception + at ForceStatic.<clinit>(Main.java:24) + at Main.foo(Main.java:31) + at Main.main(Main.java:42) diff --git a/test/493-checker-inline-invoke-interface/info.txt b/test/493-checker-inline-invoke-interface/info.txt new file mode 100644 index 0000000000..bac9c82c9d --- /dev/null +++ b/test/493-checker-inline-invoke-interface/info.txt @@ -0,0 +1,2 @@ +Check that we can optimize interface calls without +requiring the verifier to sharpen them. diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java new file mode 100644 index 0000000000..44b727fe55 --- /dev/null +++ b/test/493-checker-inline-invoke-interface/src/Main.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Itf { + public void foo(); +} + +class ForceStatic { + static { + System.out.println("Hello from clinit"); + new Exception().printStackTrace(); + } + static int field; +} + +public class Main implements Itf { + public void foo() { + int a = ForceStatic.field; + } + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (before) + /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeInterface + + /// CHECK-START: void Main.main(java.lang.String[]) inliner (after) + /// CHECK-NOT: Invoke{{.*}} + public static void main(String[] args) { + Itf itf = bar(); + itf.foo(); + } + + public static Itf bar() { + return new Main(); + } +} diff --git a/test/494-checker-instanceof-tests/expected.txt b/test/494-checker-instanceof-tests/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/494-checker-instanceof-tests/expected.txt diff --git a/test/494-checker-instanceof-tests/info.txt b/test/494-checker-instanceof-tests/info.txt new file mode 100644 index 0000000000..59e20bd6a9 --- /dev/null +++ b/test/494-checker-instanceof-tests/info.txt @@ -0,0 +1 @@ +Checker test for optimizations on instanceof. diff --git a/test/494-checker-instanceof-tests/src/Main.java b/test/494-checker-instanceof-tests/src/Main.java new file mode 100644 index 0000000000..bff9c72ded --- /dev/null +++ b/test/494-checker-instanceof-tests/src/Main.java @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static boolean $inline$classTypeTest(Object o) { + return o instanceof SubMain; + } + + public static boolean $inline$interfaceTypeTest(Object o) { + return o instanceof Itf; + } + + public static SubMain subMain; + public static Main mainField; + public static Unrelated unrelatedField; + public static FinalUnrelated finalUnrelatedField; + + /// CHECK-START: boolean Main.classTypeTestNull() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestNull() { + return $inline$classTypeTest(null); + } + + /// CHECK-START: boolean Main.classTypeTestExactMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestExactMain() { + return $inline$classTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.classTypeTestExactSubMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 1 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestExactSubMain() { + return $inline$classTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.classTypeTestSubMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> NotEqual + /// CHECK-DAG: Return [<<Value>>] + public static boolean classTypeTestSubMainOrNull() { + return $inline$classTypeTest(subMain); + } + + /// CHECK-START: boolean Main.classTypeTestMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean classTypeTestMainOrNull() { + return $inline$classTypeTest(mainField); + } + + /// CHECK-START: boolean Main.classTypeTestUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestUnrelated() { + return $inline$classTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.classTypeTestFinalUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean classTypeTestFinalUnrelated() { + return $inline$classTypeTest(finalUnrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestNull() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestNull() { + return $inline$interfaceTypeTest(null); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestExactMain() { + return $inline$interfaceTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactSubMain() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 1 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestExactSubMain() { + return $inline$interfaceTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestSubMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> NotEqual + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestSubMainOrNull() { + return $inline$interfaceTypeTest(subMain); + } + + /// CHECK-START: boolean Main.interfaceTypeTestMainOrNull() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestMainOrNull() { + return $inline$interfaceTypeTest(mainField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestUnrelated() register (after) + /// CHECK-DAG: <<Value:z\d+>> InstanceOf + /// CHECK-DAG: Return [<<Value>>] + public static boolean interfaceTypeTestUnrelated() { + // This method is the main difference between doing an instanceof on an interface + // or a class. We have to keep the instanceof in case a subclass of Unrelated + // implements the interface. + return $inline$interfaceTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestFinalUnrelated() register (after) + /// CHECK-DAG: <<Const:i\d+>> IntConstant 0 + /// CHECK-DAG: Return [<<Const>>] + public static boolean interfaceTypeTestFinalUnrelated() { + return $inline$interfaceTypeTest(finalUnrelatedField); + } + + public static void expect(boolean expected, boolean actual) { + if (expected != actual) { + throw new Error("Unexpected result"); + } + } + + public static void main(String[] args) { + expect(false, classTypeTestNull()); + expect(false, classTypeTestExactMain()); + expect(true, classTypeTestExactSubMain()); + + subMain = null; + expect(false, classTypeTestSubMainOrNull()); + subMain = new SubMain(); + expect(true, classTypeTestSubMainOrNull()); + + mainField = null; + expect(false, classTypeTestMainOrNull()); + mainField = new Main(); + expect(false, classTypeTestMainOrNull()); + mainField = new SubMain(); + expect(true, classTypeTestMainOrNull()); + + unrelatedField = null; + expect(false, classTypeTestUnrelated()); + unrelatedField = new Unrelated(); + expect(false, classTypeTestUnrelated()); + + finalUnrelatedField = null; + expect(false, classTypeTestFinalUnrelated()); + finalUnrelatedField = new FinalUnrelated(); + expect(false, classTypeTestFinalUnrelated()); + + expect(false, interfaceTypeTestNull()); + expect(false, interfaceTypeTestExactMain()); + expect(true, interfaceTypeTestExactSubMain()); + + subMain = null; + expect(false, interfaceTypeTestSubMainOrNull()); + subMain = new SubMain(); + expect(true, interfaceTypeTestSubMainOrNull()); + + mainField = null; + expect(false, interfaceTypeTestMainOrNull()); + mainField = new Main(); + expect(false, interfaceTypeTestMainOrNull()); + mainField = new SubMain(); + expect(true, interfaceTypeTestMainOrNull()); + + unrelatedField = null; + expect(false, interfaceTypeTestUnrelated()); + unrelatedField = new Unrelated(); + expect(false, interfaceTypeTestUnrelated()); + + finalUnrelatedField = null; + expect(false, interfaceTypeTestFinalUnrelated()); + finalUnrelatedField = new FinalUnrelated(); + expect(false, interfaceTypeTestFinalUnrelated()); + } +} + +interface Itf { +} + +class SubMain extends Main implements Itf { +} + +class Unrelated { +} + +final class FinalUnrelated { +} diff --git a/test/495-checker-checkcast-tests/expected.txt b/test/495-checker-checkcast-tests/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/495-checker-checkcast-tests/expected.txt diff --git a/test/495-checker-checkcast-tests/info.txt b/test/495-checker-checkcast-tests/info.txt new file mode 100644 index 0000000000..4517b22c69 --- /dev/null +++ b/test/495-checker-checkcast-tests/info.txt @@ -0,0 +1 @@ +Checker tests for optimizations on checkcast. diff --git a/test/495-checker-checkcast-tests/src/Main.java b/test/495-checker-checkcast-tests/src/Main.java new file mode 100644 index 0000000000..aa6d5a75f7 --- /dev/null +++ b/test/495-checker-checkcast-tests/src/Main.java @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static boolean $inline$classTypeTest(Object o) { + return ((SubMain)o) == o; + } + + public static boolean $inline$interfaceTypeTest(Object o) { + return ((Itf)o) == o; + } + + public static SubMain subMain; + public static Main mainField; + public static Unrelated unrelatedField; + public static FinalUnrelated finalUnrelatedField; + + /// CHECK-START: boolean Main.classTypeTestNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestNull() { + return $inline$classTypeTest(null); + } + + /// CHECK-START: boolean Main.classTypeTestExactMain() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestExactMain() { + return $inline$classTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.classTypeTestExactSubMain() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestExactSubMain() { + return $inline$classTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.classTypeTestSubMainOrNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean classTypeTestSubMainOrNull() { + return $inline$classTypeTest(subMain); + } + + /// CHECK-START: boolean Main.classTypeTestMainOrNull() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestMainOrNull() { + return $inline$classTypeTest(mainField); + } + + /// CHECK-START: boolean Main.classTypeTestUnrelated() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestUnrelated() { + return $inline$classTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.classTypeTestFinalUnrelated() register (after) + /// CHECK: CheckCast + public static boolean classTypeTestFinalUnrelated() { + return $inline$classTypeTest(finalUnrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestNull() { + return $inline$interfaceTypeTest(null); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactMain() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestExactMain() { + return $inline$interfaceTypeTest(new Main()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestExactSubMain() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestExactSubMain() { + return $inline$interfaceTypeTest(new SubMain()); + } + + /// CHECK-START: boolean Main.interfaceTypeTestSubMainOrNull() register (after) + /// CHECK-NOT: CheckCast + public static boolean interfaceTypeTestSubMainOrNull() { + return $inline$interfaceTypeTest(subMain); + } + + /// CHECK-START: boolean Main.interfaceTypeTestMainOrNull() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestMainOrNull() { + return $inline$interfaceTypeTest(mainField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestUnrelated() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestUnrelated() { + return $inline$interfaceTypeTest(unrelatedField); + } + + /// CHECK-START: boolean Main.interfaceTypeTestFinalUnrelated() register (after) + /// CHECK: CheckCast + public static boolean interfaceTypeTestFinalUnrelated() { + return $inline$interfaceTypeTest(finalUnrelatedField); + } + + public static void main(String[] args) { + classTypeTestNull(); + try { + classTypeTestExactMain(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + classTypeTestExactSubMain(); + + subMain = null; + classTypeTestSubMainOrNull(); + subMain = new SubMain(); + classTypeTestSubMainOrNull(); + + mainField = null; + classTypeTestMainOrNull(); + mainField = new Main(); + try { + classTypeTestMainOrNull(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + mainField = new SubMain(); + classTypeTestMainOrNull(); + + unrelatedField = null; + classTypeTestUnrelated(); + unrelatedField = new Unrelated(); + try { + classTypeTestUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + finalUnrelatedField = null; + classTypeTestFinalUnrelated(); + finalUnrelatedField = new FinalUnrelated(); + try { + classTypeTestFinalUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + interfaceTypeTestNull(); + try { + interfaceTypeTestExactMain(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + interfaceTypeTestExactSubMain(); + + subMain = null; + interfaceTypeTestSubMainOrNull(); + subMain = new SubMain(); + interfaceTypeTestSubMainOrNull(); + + mainField = null; + interfaceTypeTestMainOrNull(); + mainField = new Main(); + try { + interfaceTypeTestMainOrNull(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + mainField = new SubMain(); + interfaceTypeTestMainOrNull(); + + unrelatedField = null; + interfaceTypeTestUnrelated(); + unrelatedField = new Unrelated(); + try { + interfaceTypeTestUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + + finalUnrelatedField = null; + interfaceTypeTestFinalUnrelated(); + finalUnrelatedField = new FinalUnrelated(); + try { + interfaceTypeTestFinalUnrelated(); + throw new Error("ClassCastException expected"); + } catch (ClassCastException e) {} + } +} + +interface Itf { +} + +class SubMain extends Main implements Itf { +} + +class Unrelated { +} + +final class FinalUnrelated { +} diff --git a/test/496-checker-inlining-and-class-loader/expected.txt b/test/496-checker-inlining-and-class-loader/expected.txt new file mode 100644 index 0000000000..c6fcb51ecf --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/expected.txt @@ -0,0 +1,4 @@ +Request for LoadedByMyClassLoader +Request for Main +In between the two calls. +In $noinline$bar diff --git a/test/496-checker-inlining-and-class-loader/info.txt b/test/496-checker-inlining-and-class-loader/info.txt new file mode 100644 index 0000000000..aa4b256207 --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/info.txt @@ -0,0 +1,2 @@ +Regression test to ensure compilers preserve JLS +semantics of class loading. diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java new file mode 100644 index 0000000000..f6d0b41a58 --- /dev/null +++ b/test/496-checker-inlining-and-class-loader/src/Main.java @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.List; + +class MyClassLoader extends ClassLoader { + MyClassLoader() throws Exception { + super(MyClassLoader.class.getClassLoader()); + + // Some magic to get access to the pathList field of BaseDexClassLoader. + ClassLoader loader = getClass().getClassLoader(); + Class<?> baseDexClassLoader = loader.getClass().getSuperclass(); + Field f = baseDexClassLoader.getDeclaredField("pathList"); + f.setAccessible(true); + Object pathList = f.get(loader); + + // Some magic to get access to the dexField field of pathList. + f = pathList.getClass().getDeclaredField("dexElements"); + f.setAccessible(true); + dexElements = (Object[]) f.get(pathList); + dexFileField = dexElements[0].getClass().getDeclaredField("dexFile"); + dexFileField.setAccessible(true); + } + + Object[] dexElements; + Field dexFileField; + + protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException { + System.out.println("Request for " + className); + + // We're only going to handle LoadedByMyClassLoader. + if (className != "LoadedByMyClassLoader") { + return getParent().loadClass(className); + } + + // Mimic what DexPathList.findClass is doing. + try { + for (Object element : dexElements) { + Object dex = dexFileField.get(element); + Method method = dex.getClass().getDeclaredMethod( + "loadClassBinaryName", String.class, ClassLoader.class, List.class); + + if (dex != null) { + Class clazz = (Class)method.invoke(dex, className, this, null); + if (clazz != null) { + return clazz; + } + } + } + } catch (Exception e) { /* Ignore */ } + return null; + } +} + +class LoadedByMyClassLoader { + /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (before) + /// CHECK: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: InvokeStaticOrDirect + /// CHECK-NEXT: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + + /// CHECK-START: void LoadedByMyClassLoader.bar() inliner (after) + /// CHECK: LoadClass + /// CHECK-NEXT: ClinitCheck + /* We inlined Main.$inline$bar */ + /// CHECK-NEXT: LoadClass + /// CHECK-NEXT: ClinitCheck + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + + /// CHECK-START: void LoadedByMyClassLoader.bar() register (before) + /* Load and initialize Main */ + /// CHECK: LoadClass gen_clinit_check:true + /* Load and initialize System */ + /// CHECK-NEXT: LoadClass gen_clinit_check:true + /// CHECK-NEXT: StaticFieldGet + /// CHECK-NEXT: LoadString + /// CHECK-NEXT: NullCheck + /// CHECK-NEXT: InvokeVirtual + public static void bar() { + Main.$inline$bar(); + System.out.println("In between the two calls."); + Main.$noinline$bar(); + } +} + +class Main { + public static void main(String[] args) throws Exception { + MyClassLoader o = new MyClassLoader(); + Class foo = o.loadClass("LoadedByMyClassLoader"); + Method m = foo.getDeclaredMethod("bar"); + m.invoke(null); + } + + public static void $inline$bar() { + } + + public static void $noinline$bar() { + try { + System.out.println("In $noinline$bar"); + } catch (Throwable t) { /* Ignore */ } + } +} diff --git a/test/497-inlining-and-class-loader/clear_dex_cache.cc b/test/497-inlining-and-class-loader/clear_dex_cache.cc new file mode 100644 index 0000000000..f9b33a2874 --- /dev/null +++ b/test/497-inlining-and-class-loader/clear_dex_cache.cc @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "art_method-inl.h" +#include "jni.h" +#include "scoped_thread_state_change.h" +#include "stack.h" +#include "thread.h" + +namespace art { + +namespace { + +extern "C" JNIEXPORT jobject JNICALL Java_Main_cloneResolvedMethods(JNIEnv*, jclass, jclass cls) { + ScopedObjectAccess soa(Thread::Current()); + return soa.Vm()->AddGlobalRef( + soa.Self(), + soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetResolvedMethods()->Clone(soa.Self())); +} + +extern "C" JNIEXPORT void JNICALL Java_Main_restoreResolvedMethods( + JNIEnv*, jclass, jclass cls, jobject old_cache) { + ScopedObjectAccess soa(Thread::Current()); + mirror::PointerArray* now = soa.Decode<mirror::Class*>(cls)->GetDexCache()->GetResolvedMethods(); + mirror::PointerArray* old = soa.Decode<mirror::PointerArray*>(old_cache); + for (size_t i = 0, e = old->GetLength(); i < e; ++i) { + now->SetElementPtrSize(i, old->GetElementPtrSize<void*>(i, sizeof(void*)), sizeof(void*)); + } +} + +} // namespace + +} // namespace art diff --git a/test/497-inlining-and-class-loader/expected.txt b/test/497-inlining-and-class-loader/expected.txt new file mode 100644 index 0000000000..3e1d85e309 --- /dev/null +++ b/test/497-inlining-and-class-loader/expected.txt @@ -0,0 +1,7 @@ +java.lang.Exception + at Main.$noinline$bar(Main.java:127) + at Level2.$inline$bar(Level1.java:25) + at Level1.$inline$bar(Level1.java:19) + at LoadedByMyClassLoader.bar(Main.java:82) + at java.lang.reflect.Method.invoke(Native Method) + at Main.main(Main.java:101) diff --git a/test/497-inlining-and-class-loader/info.txt b/test/497-inlining-and-class-loader/info.txt new file mode 100644 index 0000000000..e7f02aaf34 --- /dev/null +++ b/test/497-inlining-and-class-loader/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing to ensure it is using +the correct class loader when walking inlined frames. diff --git a/test/497-inlining-and-class-loader/src/Level1.java b/test/497-inlining-and-class-loader/src/Level1.java new file mode 100644 index 0000000000..977af8321e --- /dev/null +++ b/test/497-inlining-and-class-loader/src/Level1.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Level1 { + public static void $inline$bar() { + Level2.$inline$bar(); + } +} + +class Level2 { + public static void $inline$bar() { + Main.$noinline$bar(); + } +} diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java new file mode 100644 index 0000000000..0f7eb599cb --- /dev/null +++ b/test/497-inlining-and-class-loader/src/Main.java @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.util.List; + +class MyClassLoader extends ClassLoader { + MyClassLoader() throws Exception { + super(MyClassLoader.class.getClassLoader()); + + // Some magic to get access to the pathList field of BaseDexClassLoader. + ClassLoader loader = getClass().getClassLoader(); + Class<?> baseDexClassLoader = loader.getClass().getSuperclass(); + Field f = baseDexClassLoader.getDeclaredField("pathList"); + f.setAccessible(true); + Object pathList = f.get(loader); + + // Some magic to get access to the dexField field of pathList. + f = pathList.getClass().getDeclaredField("dexElements"); + f.setAccessible(true); + dexElements = (Object[]) f.get(pathList); + dexFileField = dexElements[0].getClass().getDeclaredField("dexFile"); + dexFileField.setAccessible(true); + } + + Object[] dexElements; + Field dexFileField; + + static ClassLoader level1ClassLoader; + + protected Class<?> loadClass(String className, boolean resolve) throws ClassNotFoundException { + if (this != level1ClassLoader) { + if (className.equals("Level1")) { + return level1ClassLoader.loadClass(className); + } else if (className.equals("Level2")) { + throw new ClassNotFoundException("None of my methods require Level2!"); + } else if (!className.equals("LoadedByMyClassLoader")) { + // We're only going to handle LoadedByMyClassLoader. + return getParent().loadClass(className); + } + } else { + if (className != "Level1" && className != "Level2") { + return getParent().loadClass(className); + } + } + + // Mimic what DexPathList.findClass is doing. + try { + for (Object element : dexElements) { + Object dex = dexFileField.get(element); + Method method = dex.getClass().getDeclaredMethod( + "loadClassBinaryName", String.class, ClassLoader.class, List.class); + + if (dex != null) { + Class clazz = (Class)method.invoke(dex, className, this, null); + if (clazz != null) { + return clazz; + } + } + } + } catch (Exception e) { /* Ignore */ } + return null; + } +} + +class LoadedByMyClassLoader { + public static void bar() { + Level1.$inline$bar(); + } +} + +class Main { + static { + System.loadLibrary("arttest"); + } + + public static void main(String[] args) throws Exception { + // Clone resolved methods, to restore the original version just + // before we walk the stack in $noinline$bar. + savedResolvedMethods = cloneResolvedMethods(Main.class); + + MyClassLoader o = new MyClassLoader(); + MyClassLoader.level1ClassLoader = new MyClassLoader(); + Class foo = o.loadClass("LoadedByMyClassLoader"); + Method m = foo.getDeclaredMethod("bar"); + try { + m.invoke(null); + } catch (Error e) { /* Ignore */ } + } + + public static void $inline$bar() { + } + + public static void $noinline$bar() { + try { + // Be evil and clear all dex cache entries. + Field f = Class.class.getDeclaredField("dexCache"); + f.setAccessible(true); + Object dexCache = f.get(Main.class); + f = dexCache.getClass().getDeclaredField("resolvedTypes"); + f.setAccessible(true); + Object[] array = (Object[]) f.get(dexCache); + for (int i = 0; i < array.length; i++) { + array[i] = null; + } + restoreResolvedMethods(Main.class, savedResolvedMethods); + } catch (Throwable t) { /* Ignore */ } + + // This will walk the stack, trying to resolve methods in it. + // Because we cleared dex cache entries, we will have to find + // classes again, which require to use the correct class loader + // in the presence of inlining. + new Exception().printStackTrace(); + } + static Object savedResolvedMethods; + + static native Object cloneResolvedMethods(Class<?> cls); + static native void restoreResolvedMethods(Class<?> cls, Object saved); +} diff --git a/test/498-type-propagation/expected.txt b/test/498-type-propagation/expected.txt new file mode 100644 index 0000000000..ccaf6f8f0f --- /dev/null +++ b/test/498-type-propagation/expected.txt @@ -0,0 +1 @@ +Enter diff --git a/test/498-type-propagation/info.txt b/test/498-type-propagation/info.txt new file mode 100644 index 0000000000..b895e91f9d --- /dev/null +++ b/test/498-type-propagation/info.txt @@ -0,0 +1,2 @@ +Regression test for the SSA building of the optimizing +compiler. See comment in smali file. diff --git a/test/498-type-propagation/smali/TypePropagation.smali b/test/498-type-propagation/smali/TypePropagation.smali new file mode 100644 index 0000000000..088ca89985 --- /dev/null +++ b/test/498-type-propagation/smali/TypePropagation.smali @@ -0,0 +1,30 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTypePropagation; + +.super Ljava/lang/Object; + +.method public static method([I)V + .registers 2 + const/4 v0, 0 + # When building the SSA graph, we will create a phi for v0, which will be of type + # integer. Only when we get rid of that phi in the redundant phi elimination will + # we realize it's just null. + :start + if-eq v1, v0, :end + if-eq v1, v0, :start + :end + return-void +.end method diff --git a/test/498-type-propagation/src/Main.java b/test/498-type-propagation/src/Main.java new file mode 100644 index 0000000000..7a14172bf5 --- /dev/null +++ b/test/498-type-propagation/src/Main.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + public static void main(String[] args) throws Exception { + // Workaround for b/18051191. + System.out.println("Enter"); + Class<?> c = Class.forName("TypePropagation"); + Method m = c.getMethod("method", int[].class); + int[] array = new int[7]; + Object[] arguments = { array }; + m.invoke(null, arguments); + } +} diff --git a/test/499-bce-phi-array-length/expected.txt b/test/499-bce-phi-array-length/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/499-bce-phi-array-length/expected.txt diff --git a/test/499-bce-phi-array-length/info.txt b/test/499-bce-phi-array-length/info.txt new file mode 100644 index 0000000000..ee52699579 --- /dev/null +++ b/test/499-bce-phi-array-length/info.txt @@ -0,0 +1,2 @@ +Regression test for BCE phase of optimizing. See Main.java +for a description of the bug. diff --git a/test/499-bce-phi-array-length/src/Main.java b/test/499-bce-phi-array-length/src/Main.java new file mode 100644 index 0000000000..c8c84a1a8f --- /dev/null +++ b/test/499-bce-phi-array-length/src/Main.java @@ -0,0 +1,42 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static int foo(int start, int[] array) { + int result = 0; + // We will create HDeoptimize nodes for this first loop, and a phi + // for the array length which will only be used within the loop. + for (int i = start; i < 3; i++) { + result += array[i]; + for (int j = 0; j < 2; ++j) { + // The HBoundsCheck for this array access will be updated to access + // the array length phi created for the deoptimization checks of the + // first loop. This crashed the compiler which used to DCHECK an array + // length in a bounds check cannot be a phi. + result += array[j]; + } + } + return result; + } + + public static void main(String[] args) { + int[] a = new int[] { 1, 2, 3, 4, 5 }; + int result = foo(1, a); + if (result != 11) { + throw new Error("Got " + result + ", expected " + 11); + } + } +} diff --git a/test/500-instanceof/expected.txt b/test/500-instanceof/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/500-instanceof/expected.txt diff --git a/test/500-instanceof/info.txt b/test/500-instanceof/info.txt new file mode 100644 index 0000000000..b28756aa34 --- /dev/null +++ b/test/500-instanceof/info.txt @@ -0,0 +1,2 @@ +Regression test for optimizing which used to be +too aggresive in removing instanceof checks. diff --git a/test/500-instanceof/src/Main.java b/test/500-instanceof/src/Main.java new file mode 100644 index 0000000000..80fdb0288d --- /dev/null +++ b/test/500-instanceof/src/Main.java @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Itf { +} + +class Foo { +} + +class Main extends Foo implements Itf { + public static void main(String[] args) { + Itf parent = getParent(); + if (!(parent instanceof Foo)) { + throw new Error("Instanceof should have succeeded"); + } + } + + static Itf getParent() { return new Main(); } +} diff --git a/test/501-null-constant-dce/expected.txt b/test/501-null-constant-dce/expected.txt new file mode 100644 index 0000000000..ccaf6f8f0f --- /dev/null +++ b/test/501-null-constant-dce/expected.txt @@ -0,0 +1 @@ +Enter diff --git a/test/501-null-constant-dce/info.txt b/test/501-null-constant-dce/info.txt new file mode 100644 index 0000000000..2c4a686b5f --- /dev/null +++ b/test/501-null-constant-dce/info.txt @@ -0,0 +1 @@ +Regression test for the optimizing compiler. See comment in smali file. diff --git a/test/501-null-constant-dce/smali/DCE.smali b/test/501-null-constant-dce/smali/DCE.smali new file mode 100644 index 0000000000..4a1765e796 --- /dev/null +++ b/test/501-null-constant-dce/smali/DCE.smali @@ -0,0 +1,37 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LDCE; + +.super Ljava/lang/Object; + +.method public static method([I)LDCE; + .registers 2 + const/4 v0, 0 + # Jump over the code that requires the null constant + # so that the compiler sees the null constant as dead code. + if-eq v0, v0, :end + invoke-static {v0}, LDCE;->method([I)LDCE; + :end + invoke-static {}, LDCE;->$inline$returnNull()LDCE; + move-result-object v0 + return-object v0 +.end method + +.method public static $inline$returnNull()LDCE; + .registers 2 + const/4 v0, 0 + # Return null to make `method` call GetConstantNull again. + return-object v0 +.end method diff --git a/test/501-null-constant-dce/src/Main.java b/test/501-null-constant-dce/src/Main.java new file mode 100644 index 0000000000..3a2d491b15 --- /dev/null +++ b/test/501-null-constant-dce/src/Main.java @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + public static void main(String[] args) throws Exception { + // Workaround for b/18051191. + System.out.println("Enter"); + Class<?> c = Class.forName("DCE"); + Method m = c.getMethod("method", int[].class); + int[] array = new int[7]; + Object[] arguments = { array }; + Object result = m.invoke(null, arguments); + if (result != null) { + throw new Error("Expected null, got " + result); + } + } +} diff --git a/test/501-regression-packed-switch/expected.txt b/test/501-regression-packed-switch/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/501-regression-packed-switch/expected.txt diff --git a/test/501-regression-packed-switch/info.txt b/test/501-regression-packed-switch/info.txt new file mode 100644 index 0000000000..fbd93fa815 --- /dev/null +++ b/test/501-regression-packed-switch/info.txt @@ -0,0 +1,2 @@ +Regression test for the interpreter and optimizing's builder which used +to trip when compiled code contained a packed switch with no targets. diff --git a/test/501-regression-packed-switch/smali/Test.smali b/test/501-regression-packed-switch/smali/Test.smali new file mode 100644 index 0000000000..8756ed5f23 --- /dev/null +++ b/test/501-regression-packed-switch/smali/Test.smali @@ -0,0 +1,29 @@ +# +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTest; + +.super Ljava/lang/Object; + +.method public static EmptyPackedSwitch(I)I + .registers 1 + packed-switch v0, :pswitch_data_6a + const/4 v0, 0x5 + return v0 + + :pswitch_data_6a + .packed-switch 0x0 + .end packed-switch +.end method diff --git a/test/501-regression-packed-switch/src/Main.java b/test/501-regression-packed-switch/src/Main.java new file mode 100644 index 0000000000..b80bc62c50 --- /dev/null +++ b/test/501-regression-packed-switch/src/Main.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; +import java.lang.reflect.Type; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String args[]) throws Exception { + Class<?> c = Class.forName("Test"); + Method m = c.getMethod("EmptyPackedSwitch", new Class[] { int.class }); + Integer result = (Integer) m.invoke(null, new Integer(42)); + if (result != 5) { + throw new Error("Expected 5, got " + result); + } + } +} diff --git a/test/503-dead-instructions/expected.txt b/test/503-dead-instructions/expected.txt new file mode 100644 index 0000000000..ccaf6f8f0f --- /dev/null +++ b/test/503-dead-instructions/expected.txt @@ -0,0 +1 @@ +Enter diff --git a/test/503-dead-instructions/info.txt b/test/503-dead-instructions/info.txt new file mode 100644 index 0000000000..7e3f1aba34 --- /dev/null +++ b/test/503-dead-instructions/info.txt @@ -0,0 +1,2 @@ +Regression test for the building phase of the optimizing +compiler. See comment in smali file. diff --git a/test/503-dead-instructions/smali/DeadInstructions.smali b/test/503-dead-instructions/smali/DeadInstructions.smali new file mode 100644 index 0000000000..9f6c5653fa --- /dev/null +++ b/test/503-dead-instructions/smali/DeadInstructions.smali @@ -0,0 +1,63 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LDeadInstructions; + +.super Ljava/lang/Object; + +.method public static method1()V + .registers 2 + return-void + # Create a label and a branch to that label to trick the + # optimizing compiler into thinking the invoke is live. + :start + const/4 v0, 0 + const/4 v1, 0 + # Provide more arguments than we should. Because this is dead + # code, the verifier will not check the argument count. So + # the compilers must do the same. + invoke-static {v0, v1}, LDeadInstructions;->method1()V + goto :start +.end method + +.method public static method2(J)V + .registers 3 + return-void + :start + const/4 v0, 0 + const/4 v1, 0 + const/4 v2, 0 + # Give a non-sequential pair for the long argument. + invoke-static {v0, v2}, LDeadInstructions;->method2(J)V + goto :start +.end method + +.method public static method3()V + .registers 1 + return-void + :start + const/4 v0, 0 + # Give one half of a pair. + invoke-static {v0}, LDeadInstructions;->method2(J)V + goto :start +.end method + +.method public static method4()V + .registers 2 + return-void + :start + # Provide less arguments than we should. + invoke-static {}, LDeadInstructions;->method3(J)V + goto :start +.end method diff --git a/test/503-dead-instructions/src/Main.java b/test/503-dead-instructions/src/Main.java new file mode 100644 index 0000000000..6249dc79fa --- /dev/null +++ b/test/503-dead-instructions/src/Main.java @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; + +public class Main { + public static void main(String[] args) throws Exception { + // Workaround for b/18051191. + System.out.println("Enter"); + Class<?> c = Class.forName("DeadInstructions"); + Method m = c.getMethod("method1"); + Object[] arguments1 = { }; + m.invoke(null, arguments1); + + Object[] arguments2 = { (long)4 }; + m = c.getMethod("method2", long.class); + m.invoke(null, arguments2); + + Object[] arguments3 = { }; + m = c.getMethod("method3"); + m.invoke(null, arguments3); + + Object[] arguments4 = { }; + m = c.getMethod("method4"); + m.invoke(null, arguments4); + } +} diff --git a/test/504-regression-baseline-entry/expected.txt b/test/504-regression-baseline-entry/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/504-regression-baseline-entry/expected.txt diff --git a/test/504-regression-baseline-entry/info.txt b/test/504-regression-baseline-entry/info.txt new file mode 100644 index 0000000000..26cc9ce75b --- /dev/null +++ b/test/504-regression-baseline-entry/info.txt @@ -0,0 +1,2 @@ +Regression test for the baseline compiler which required the entry block to fall +through to the next block.
\ No newline at end of file diff --git a/test/504-regression-baseline-entry/smali/Test.smali b/test/504-regression-baseline-entry/smali/Test.smali new file mode 100644 index 0000000000..06412e7618 --- /dev/null +++ b/test/504-regression-baseline-entry/smali/Test.smali @@ -0,0 +1,30 @@ +# +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LTest; + +.super Ljava/lang/Object; + +.method public static SingleGotoStart()I + .registers 1 + goto :second + + :first + return v0 + + :second + const/4 v0, 0x5 + goto :first +.end method diff --git a/test/504-regression-baseline-entry/src/Main.java b/test/504-regression-baseline-entry/src/Main.java new file mode 100644 index 0000000000..2c9df28342 --- /dev/null +++ b/test/504-regression-baseline-entry/src/Main.java @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.lang.reflect.Method; +import java.lang.reflect.Type; + +public class Main { + + // Workaround for b/18051191. + class InnerClass {} + + public static void main(String args[]) throws Exception { + Class<?> c = Class.forName("Test"); + Method m = c.getMethod("SingleGotoStart", (Class[]) null); + Integer result = (Integer) m.invoke(null); + if (result != 5) { + throw new Error("Expected 5, got " + result); + } + } +} diff --git a/test/505-simplifier-type-propagation/expected.txt b/test/505-simplifier-type-propagation/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/505-simplifier-type-propagation/expected.txt diff --git a/test/505-simplifier-type-propagation/info.txt b/test/505-simplifier-type-propagation/info.txt new file mode 100644 index 0000000000..cd84432ff1 --- /dev/null +++ b/test/505-simplifier-type-propagation/info.txt @@ -0,0 +1,3 @@ +Regression test for the optimizing compiler, where +the code generators did not expect type conversion +instructions from one type to the same type. diff --git a/test/505-simplifier-type-propagation/src/Main.java b/test/505-simplifier-type-propagation/src/Main.java new file mode 100644 index 0000000000..780cb3469e --- /dev/null +++ b/test/505-simplifier-type-propagation/src/Main.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +class Main { + public static void main(String[] args) { + byte result = bar((byte)2); + if (result != 2) { + throw new Error("Expected 2, got " + result); + } + } + + public static byte bar(byte myByte) { + int a = 0; + // The following call will be inlined, which will make + // the type conversion below from byte to byte. + if ($inline$foo()) { + a = myByte; + } + return (byte)a; + } + + public static boolean $inline$foo() { + return true; + } +} diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index a6b216bf3a..3c6506b6b4 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -16,4 +16,7 @@ MoveExc MoveExceptionOnEntry EmptySparseSwitch b/20224106 +b/17410612 +b/21865464 +b/21873167 Done! diff --git a/test/800-smali/smali/b_17410612.smali b/test/800-smali/smali/b_17410612.smali new file mode 100644 index 0000000000..17718cbf60 --- /dev/null +++ b/test/800-smali/smali/b_17410612.smali @@ -0,0 +1,14 @@ +.class public LB17410612; + +# Test that an invoke with a long parameter has the long parameter in +# a pair. This should fail in the verifier and not an abort in the compiler. + +.super Ljava/lang/Object; + +.method public static run()V + .registers 4 + const-wide v0, 0 # Make (v0, v1) a long + const-wide v2, 0 # Make (v2, v3) a long + invoke-static {v0, v3}, Ljava/lang/Long;->valueOf(J)Ljava/lang/Long; + return-void +.end method diff --git a/test/800-smali/smali/b_21865464.smali b/test/800-smali/smali/b_21865464.smali new file mode 100644 index 0000000000..df56a54aaa --- /dev/null +++ b/test/800-smali/smali/b_21865464.smali @@ -0,0 +1,29 @@ +# Copyright (C) 2015 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LB21865464; + +.super Ljava/lang/Object; + +.method public static run()V + .registers 2 + return-void + goto :start + :start + # The following is dead code but used to crash the compiler. + const/4 v0, 0 + return-wide v0 + return v0 + return-object v0 +.end method diff --git a/test/800-smali/smali/b_21873167.smali b/test/800-smali/smali/b_21873167.smali new file mode 100644 index 0000000000..c0c09cbbf2 --- /dev/null +++ b/test/800-smali/smali/b_21873167.smali @@ -0,0 +1,18 @@ +.class public LB21873167; +.super Ljava/lang/Object; + +.method public constructor <init>()V + .registers 1 + invoke-direct {p0}, Ljava/lang/Object;-><init>()V + return-void +.end method + +.method public test()V + .registers 1 + :start + monitor-enter p0 + monitor-exit p0 + :end + return-void + .catchall {:start .. :end} :end +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index 3e88364089..d1c275cfef 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -81,6 +81,11 @@ public class Main { null)); testCases.add(new TestCase("b/20224106", "B20224106", "run", null, new VerifyError(), 0)); + testCases.add(new TestCase("b/17410612", "B17410612", "run", null, new VerifyError(), + 0)); + testCases.add(new TestCase("b/21865464", "B21865464", "run", null, null, + null)); + testCases.add(new TestCase("b/21873167", "B21873167", "test", null, null, null)); } public void runTests() { diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index 847ad0d2f7..fcb9f8a779 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -34,7 +34,8 @@ LIBARTTEST_COMMON_SRC_FILES := \ 455-set-vreg/set_vreg_jni.cc \ 457-regs/regs_jni.cc \ 461-get-reference-vreg/get_reference_vreg_jni.cc \ - 466-get-live-vreg/get_live_vreg_jni.cc + 466-get-live-vreg/get_live_vreg_jni.cc \ + 497-inlining-and-class-loader/clear_dex_cache.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ifdef TARGET_2ND_ARCH diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index e95f147e65..c2380cc668 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -229,16 +229,10 @@ endif TEST_ART_BROKEN_NO_RELOCATE_TESTS := -# Tests that are broken with GC stress. -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := - -ifneq (,$(filter gcstress,$(GC_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DBEUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := +# 098-ddmc is broken until we restore the old behavior of getRecentAllocation() of DDMS. b/20037135 +ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ + $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ + $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), 098-ddmc, $(ALL_ADDRESS_SIZES)) # 115-native-bridge setup is complicated. Need to implement it correctly for the target. ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \ @@ -344,6 +338,7 @@ TEST_ART_BROKEN_NDEBUG_TESTS := \ 457-regs \ 461-get-reference-vreg \ 466-get-live-vreg \ + 497-inlining-and-class-loader \ ifneq (,$(filter ndebug,$(RUN_TYPES))) ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \ diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 240ed41ff2..09841bfcec 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -296,6 +296,10 @@ if [ "$RELOCATE" = "y" ]; then else FLAGS="$FLAGS -Xnorelocate" COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate" + if [ "$HOST" = "y" ]; then + # Increase ulimit to 64MB in case we are running hprof test. + ulimit -S 64000 || exit 1 + fi fi if [ "$HOST" = "n" ]; then diff --git a/test/run-test b/test/run-test index ed3309923b..ed033217b8 100755 --- a/test/run-test +++ b/test/run-test @@ -96,6 +96,7 @@ basic_verify="false" gc_verify="false" gc_stress="false" always_clean="no" +never_clean="no" have_dex2oat="yes" have_patchoat="yes" have_image="yes" @@ -270,6 +271,9 @@ while true; do elif [ "x$1" = "x--always-clean" ]; then always_clean="yes" shift + elif [ "x$1" = "x--never-clean" ]; then + never_clean="yes" + shift elif [ "x$1" = "x--dex2oat-swap" ]; then run_args="${run_args} --dex2oat-swap" shift @@ -472,6 +476,7 @@ if [ "$usage" = "yes" ]; then echo " --gcstress Run with gc stress testing" echo " --gcverify Run with gc verification" echo " --always-clean Delete the test files even if the test fails." + echo " --never-clean Keep the test files even if the test succeeds." echo " --android-root [path] The path on target for the android root. (/system by default)." echo " --dex2oat-swap Use a dex2oat swap file." ) 1>&2 @@ -668,7 +673,7 @@ fi ) 1>&2 # Clean up test files. -if [ "$always_clean" = "yes" -o "$good" = "yes" ]; then +if [ "$always_clean" = "yes" -o "$good" = "yes" ] && [ "$never_clean" = "no" ]; then cd "$oldwd" rm -rf "$tmp_dir" if [ "$target_mode" = "yes" -a "$build_exit" = "0" ]; then diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh index 77e6b1ad14..62fd67bfd7 100755 --- a/tools/buildbot-build.sh +++ b/tools/buildbot-build.sh @@ -60,7 +60,7 @@ while true; do done if [[ $mode == "host" ]]; then - make_command="make $j_arg build-art-host-tests $common_targets" + make_command="make $j_arg build-art-host-tests $common_targets out/host/linux-x86/lib/libjavacoretests.so out/host/linux-x86/lib64/libjavacoretests.so" echo "Executing $make_command" $make_command elif [[ $mode == "target" ]]; then @@ -70,7 +70,7 @@ elif [[ $mode == "target" ]]; then # Use '-e' to force the override of TARGET_GLOBAL_LDFLAGS. # Also, we build extra tools that will be used by tests, so that # they are compiled with our own linker. - make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto linker toybox toolbox sh" + make_command="make -e $j_arg build-art-target-tests $common_targets libjavacrypto libjavacoretests linker toybox toolbox sh" echo "Executing env $env $make_command" env $env $make_command fi diff --git a/tools/checker/match/line.py b/tools/checker/match/line.py index 711d814b10..ce11e2a528 100644 --- a/tools/checker/match/line.py +++ b/tools/checker/match/line.py @@ -41,7 +41,7 @@ def matchWords(checkerWord, stringWord, variables, pos): if expression.name in variables: pattern = re.escape(variables[expression.name]) else: - Logger.testFailed("Multiple definitions of variable \"{}\"".format(expression.name), + Logger.testFailed("Missing definition of variable \"{}\"".format(expression.name), pos.fileName, pos.lineNo) else: pattern = expression.pattern diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index 77e800411c..7135dbaf55 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -39,6 +39,7 @@ device_dir="--device-dir=/data/local/tmp" # image. vm_command="--vm-command=$art" image_compiler_option="" +debug="no" while true; do if [[ "$1" == "--mode=host" ]]; then @@ -53,12 +54,17 @@ while true; do # Vogar knows which VM to use on host. vm_command="" # We only compile the image on the host. Note that not providing this option - # puts us below the adb command limit for vogar. + # for target testing puts us below the adb command limit for vogar. image_compiler_option="--vm-arg -Ximage-compiler-option --vm-arg --debuggable" shift elif [[ $1 == -Ximage:* ]]; then image="$1" shift + elif [[ $1 == "--debug" ]]; then + debug="yes" + # Remove the --debug from the arguments. + args=${args/$1} + shift elif [[ "$1" == "" ]]; then break else @@ -66,9 +72,16 @@ while true; do fi done +vm_args="--vm-arg $image" +if [[ $debug == "yes" ]]; then + art="$art -d" + art_debugee="$art_debugee -d" + vm_args="$vm_args --vm-arg -XXlib:libartd.so" +fi + # Run the tests using vogar. vogar $vm_command \ - --vm-arg $image \ + $vm_args \ --verbose \ $args \ $device_dir \ diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh index 344d2dedb3..1b8748bd99 100755 --- a/tools/run-libcore-tests.sh +++ b/tools/run-libcore-tests.sh @@ -33,7 +33,8 @@ if [ ! -f $test_jar ]; then fi # Packages that currently work correctly with the expectation files. -working_packages=("libcore.icu" +working_packages=("dalvik.system" + "libcore.icu" "libcore.io" "libcore.java.lang" "libcore.java.math" @@ -62,7 +63,37 @@ working_packages=("libcore.icu" "tests.java.lang.String" "jsr166") +vogar_args=$@ +while true; do + if [[ "$1" == "--mode=device" ]]; then + vogar_args="$vogar_args --device-dir=/data/local/tmp" + vogar_args="$vogar_args --vm-command=/data/local/tmp/system/bin/art" + vogar_args="$vogar_args --vm-arg -Ximage:/data/art-test/core-optimizing.art" + shift + elif [[ "$1" == "--mode=host" ]]; then + # We explicitly give a wrong path for the image, to ensure vogar + # will create a boot image with the default compiler. Note that + # giving an existing image on host does not work because of + # classpath/resources differences when compiling the boot image. + vogar_args="$vogar_args --vm-arg -Ximage:/non/existent" + shift + elif [[ "$1" == "--debug" ]]; then + # Remove the --debug from the arguments. + vogar_args=${vogar_args/$1} + vogar_args="$vogar_args --vm-arg -XXlib:libartd.so" + # Increase the timeout, as vogar cannot set individual test + # timeout when being asked to run packages, and some tests go above + # the default timeout. + vogar_args="$vogar_args --timeout 180" + shift + elif [[ "$1" == "" ]]; then + break + else + shift + fi +done + # Run the tests using vogar. echo "Running tests for the following test packages:" echo ${working_packages[@]} | tr " " "\n" -vogar $@ --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]} +vogar $vogar_args --expectations art/tools/libcore_failures.txt --classpath $jsr166_test_jar --classpath $test_jar ${working_packages[@]} |