diff options
author | 2019-03-07 22:40:36 +0000 | |
---|---|---|
committer | 2019-03-08 17:44:58 +0000 | |
commit | 10d0c96a3539d91d085d63b3ccad80d4fd68c386 (patch) | |
tree | 336cd7633e91d84b731d61d5738adb6534920116 | |
parent | bdc6241921b5a46273e4ffb18d3fd12c3dd1b39a (diff) |
Revert^2 "Add peak RSS stats to GC perf dump"
This reverts commit cc292c611af7cdea6a2d9196fc347468b9233f71.
Reason for revert: RSS code is enabled only on linux now.
Test: art/test/testrunner/testrunner.py --target --runtime-option=-XX:DumpGCPerformanceOnShutdown
Bug: b/112187497
Change-Id: Iea5926d3dd4f6248f85422627b6ee0da559beb39
-rw-r--r-- | runtime/gc/accounting/card_table.h | 8 | ||||
-rw-r--r-- | runtime/gc/collector/concurrent_copying.cc | 74 | ||||
-rw-r--r-- | runtime/gc/collector/concurrent_copying.h | 1 | ||||
-rw-r--r-- | runtime/gc/collector/garbage_collector.cc | 70 | ||||
-rw-r--r-- | runtime/gc/collector/garbage_collector.h | 6 | ||||
-rw-r--r-- | runtime/gc/space/large_object_space.cc | 13 | ||||
-rw-r--r-- | runtime/gc/space/large_object_space.h | 5 | ||||
-rw-r--r-- | runtime/gc/space/space.h | 4 | ||||
-rw-r--r-- | runtime/runtime.h | 4 |
9 files changed, 182 insertions, 3 deletions
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index 30c438614b..5f4675d1cf 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -94,6 +94,14 @@ class CardTable { return biased_begin_; } + void* MemMapBegin() const { + return mem_map_.BaseBegin(); + } + + size_t MemMapSize() const { + return mem_map_.BaseSize(); + } + /* * Modify cards in the range from scan_begin (inclusive) to scan_end (exclusive). Each card * value v is replaced by visitor(v). Visitor() should not have side-effects. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 642b12e9b7..5483364600 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -2528,6 +2528,72 @@ void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) { } } +void ConcurrentCopying::CaptureRssAtPeak() { + using range_t = std::pair<void*, void*>; + // This operation is expensive as several calls to mincore() are performed. + // Also, this must be called before clearing regions in ReclaimPhase(). + // Therefore, we make it conditional on the flag that enables dumping GC + // performance info on shutdown. + if (Runtime::Current()->GetDumpGCPerformanceOnShutdown()) { + std::list<range_t> gc_ranges; + auto add_gc_range = [&gc_ranges](void* start, size_t size) { + void* end = static_cast<char*>(start) + RoundUp(size, kPageSize); + gc_ranges.emplace_back(range_t(start, end)); + }; + + // region space + DCHECK(IsAligned<kPageSize>(region_space_->Limit())); + gc_ranges.emplace_back(range_t(region_space_->Begin(), region_space_->Limit())); + // mark bitmap + add_gc_range(region_space_bitmap_->Begin(), region_space_bitmap_->Size()); + + // non-moving space + { + DCHECK(IsAligned<kPageSize>(heap_->non_moving_space_->Limit())); + gc_ranges.emplace_back(range_t(heap_->non_moving_space_->Begin(), + heap_->non_moving_space_->Limit())); + // mark bitmap + accounting::ContinuousSpaceBitmap *bitmap = heap_->non_moving_space_->GetMarkBitmap(); + add_gc_range(bitmap->Begin(), bitmap->Size()); + // live bitmap. Deal with bound bitmaps. + ReaderMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + if (heap_->non_moving_space_->HasBoundBitmaps()) { + DCHECK_EQ(bitmap, heap_->non_moving_space_->GetLiveBitmap()); + bitmap = heap_->non_moving_space_->GetTempBitmap(); + } else { + bitmap = heap_->non_moving_space_->GetLiveBitmap(); + } + add_gc_range(bitmap->Begin(), bitmap->Size()); + } + // large-object space + if (heap_->GetLargeObjectsSpace()) { + heap_->GetLargeObjectsSpace()->ForEachMemMap([&add_gc_range](const MemMap& map) { + DCHECK(IsAligned<kPageSize>(map.BaseSize())); + add_gc_range(map.BaseBegin(), map.BaseSize()); + }); + // mark bitmap + accounting::LargeObjectBitmap* bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap(); + add_gc_range(bitmap->Begin(), bitmap->Size()); + // live bitmap + bitmap = heap_->GetLargeObjectsSpace()->GetLiveBitmap(); + add_gc_range(bitmap->Begin(), bitmap->Size()); + } + // card table + add_gc_range(heap_->GetCardTable()->MemMapBegin(), heap_->GetCardTable()->MemMapSize()); + // inter-region refs + if (use_generational_cc_ && !young_gen_) { + // region space + add_gc_range(region_space_inter_region_bitmap_->Begin(), + region_space_inter_region_bitmap_->Size()); + // non-moving space + add_gc_range(non_moving_space_inter_region_bitmap_->Begin(), + non_moving_space_inter_region_bitmap_->Size()); + } + // Extract RSS using mincore(). Updates the cummulative RSS counter. + ExtractRssFromMincore(&gc_ranges); + } +} + void ConcurrentCopying::ReclaimPhase() { TimingLogger::ScopedTiming split("ReclaimPhase", GetTimings()); if (kVerboseMode) { @@ -2552,6 +2618,14 @@ void ConcurrentCopying::ReclaimPhase() { CheckEmptyMarkStack(); } + // Capture RSS at the time when memory usage is at its peak. All GC related + // memory ranges like java heap, card table, bitmap etc. are taken into + // account. + // TODO: We can fetch resident memory for region space directly by going + // through list of allocated regions. This way we can avoid calling mincore on + // the biggest memory range, thereby reducing the cost of this function. + CaptureRssAtPeak(); + { // Record freed objects. TimingLogger::ScopedTiming split2("RecordFree", GetTimings()); diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index 124713c17c..44ee7c2023 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -88,6 +88,7 @@ class ConcurrentCopying : public GarbageCollector { !rb_slow_path_histogram_lock_, !skipped_blocks_lock_); + void CaptureRssAtPeak() REQUIRES(!mark_stack_lock_); void BindBitmaps() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); GcType GetGcType() const override { diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index ec0ac6fbf7..e4ae10a05f 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -15,6 +15,8 @@ */ #include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> #include "garbage_collector.h" @@ -65,6 +67,9 @@ GarbageCollector::GarbageCollector(Heap* heap, const std::string& name) : heap_(heap), name_(name), pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount), + rss_histogram_((name_ + " peak-rss").c_str(), + /*initial_bucket_width=*/ 10, + /*max_buckets=*/ 20), freed_bytes_histogram_((name_ + " freed-bytes").c_str(), /*initial_bucket_width=*/ 10, /*max_buckets=*/ 20), @@ -84,11 +89,64 @@ void GarbageCollector::ResetCumulativeStatistics() { total_time_ns_ = 0u; total_freed_objects_ = 0u; total_freed_bytes_ = 0; + rss_histogram_.Reset(); freed_bytes_histogram_.Reset(); MutexLock mu(Thread::Current(), pause_histogram_lock_); pause_histogram_.Reset(); } +uint64_t GarbageCollector::ExtractRssFromMincore( + std::list<std::pair<void*, void*>>* gc_ranges) { + using range_t = std::pair<void*, void*>; + uint64_t rss = 0; + if (gc_ranges->empty()) { + return 0; + } + // mincore() is linux-specific syscall. +#if defined(__linux__) + // Sort gc_ranges + gc_ranges->sort([](const range_t& a, const range_t& b) { + return std::less()(a.first, b.first); + }); + // Merge gc_ranges. It's necessary because the kernel may merge contiguous + // regions if their properties match. This is sufficient as kernel doesn't + // merge those adjoining ranges which differ only in name. + size_t vec_len = 0; + for (auto it = gc_ranges->begin(); it != gc_ranges->end(); it++) { + auto next_it = it; + next_it++; + while (next_it != gc_ranges->end()) { + if (it->second == next_it->first) { + it->second = next_it->second; + next_it = gc_ranges->erase(next_it); + } else { + break; + } + } + size_t length = static_cast<uint8_t*>(it->second) - static_cast<uint8_t*>(it->first); + // Compute max length for vector allocation later. + vec_len = std::max(vec_len, length / kPageSize); + } + std::unique_ptr<unsigned char[]> vec(new unsigned char[vec_len]); + for (const auto it : *gc_ranges) { + size_t length = static_cast<uint8_t*>(it.second) - static_cast<uint8_t*>(it.first); + if (mincore(it.first, length, vec.get()) == 0) { + for (size_t i = 0; i < length / kPageSize; i++) { + // Least significant bit represents residency of a page. Other bits are + // reserved. + rss += vec[i] & 0x1; + } + } else { + LOG(WARNING) << "Call to mincore() on memory range [0x" << std::hex << it.first + << ", 0x" << it.second << std::dec << ") failed: " << strerror(errno); + } + } + rss *= kPageSize; + rss_histogram_.AddValue(rss / KB); +#endif + return rss; +} + void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) { ScopedTrace trace(android::base::StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName())); Thread* self = Thread::Current(); @@ -171,6 +229,7 @@ void GarbageCollector::ResetMeasurements() { pause_histogram_.Reset(); } cumulative_timings_.Reset(); + rss_histogram_.Reset(); freed_bytes_histogram_.Reset(); total_thread_cpu_time_ns_ = 0u; total_time_ns_ = 0u; @@ -243,6 +302,17 @@ void GarbageCollector::DumpPerformanceInfo(std::ostream& os) { pause_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data); } } +#if defined(__linux__) + if (rss_histogram_.SampleSize() > 0) { + os << rss_histogram_.Name() + << ": Avg: " << PrettySize(rss_histogram_.Mean() * KB) + << " Max: " << PrettySize(rss_histogram_.Max() * KB) + << " Min: " << PrettySize(rss_histogram_.Min() * KB) << "\n"; + os << "Peak-rss Histogram: "; + rss_histogram_.DumpBins(os); + os << "\n"; + } +#endif if (freed_bytes_histogram_.SampleSize() > 0) { os << freed_bytes_histogram_.Name() << ": Avg: " << PrettySize(freed_bytes_histogram_.Mean() * KB) diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index 22afac60a4..a4f94675dc 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -18,7 +18,7 @@ #define ART_RUNTIME_GC_COLLECTOR_GARBAGE_COLLECTOR_H_ #include <stdint.h> -#include <vector> +#include <list> #include "base/histogram.h" #include "base/mutex.h" @@ -111,6 +111,9 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark void RecordFreeLOS(const ObjectBytePair& freed); virtual void DumpPerformanceInfo(std::ostream& os) REQUIRES(!pause_histogram_lock_); + // Extract RSS for GC-specific memory ranges using mincore(). + uint64_t ExtractRssFromMincore(std::list<std::pair<void*, void*>>* gc_ranges); + // Helper functions for querying if objects are marked. These are used for processing references, // and will be used for reading system weaks while the GC is running. virtual mirror::Object* IsMarked(mirror::Object* obj) @@ -149,6 +152,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark std::string name_; // Cumulative statistics. Histogram<uint64_t> pause_histogram_ GUARDED_BY(pause_histogram_lock_); + Histogram<uint64_t> rss_histogram_; Histogram<size_t> freed_bytes_histogram_; uint64_t total_thread_cpu_time_ns_; uint64_t total_time_ns_; diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 1658dba413..2c18888c5f 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -231,6 +231,13 @@ void LargeObjectMapSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) } } +void LargeObjectMapSpace::ForEachMemMap(std::function<void(const MemMap&)> func) const { + MutexLock mu(Thread::Current(), lock_); + for (auto& pair : large_objects_) { + func(pair.second.mem_map); + } +} + bool LargeObjectMapSpace::Contains(const mirror::Object* obj) const { Thread* self = Thread::Current(); if (lock_.IsExclusiveHeld(self)) { @@ -398,6 +405,12 @@ void FreeListSpace::Walk(DlMallocSpace::WalkCallback callback, void* arg) { CHECK_EQ(cur_info, end_info); } +void FreeListSpace::ForEachMemMap(std::function<void(const MemMap&)> func) const { + MutexLock mu(Thread::Current(), lock_); + func(allocation_info_map_); + func(mem_map_); +} + void FreeListSpace::RemoveFreePrev(AllocationInfo* info) { CHECK_GT(info->GetPrevFree(), 0U); auto it = free_blocks_.lower_bound(info); diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h index a4d6a24263..86ecd85c83 100644 --- a/runtime/gc/space/large_object_space.h +++ b/runtime/gc/space/large_object_space.h @@ -110,6 +110,7 @@ class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace { // objects. virtual void SetAllLargeObjectsAsZygoteObjects(Thread* self) = 0; + virtual void ForEachMemMap(std::function<void(const MemMap&)> func) const = 0; // GetRangeAtomic returns Begin() and End() atomically, that is, it never returns Begin() and // End() from different allocations. virtual std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const = 0; @@ -160,7 +161,7 @@ class LargeObjectMapSpace : public LargeObjectSpace { void Walk(DlMallocSpace::WalkCallback, void* arg) override REQUIRES(!lock_); // TODO: disabling thread safety analysis as this may be called when we already hold lock_. bool Contains(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS; - + void ForEachMemMap(std::function<void(const MemMap&)> func) const override REQUIRES(!lock_); std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const override REQUIRES(!lock_); protected: @@ -193,7 +194,7 @@ class FreeListSpace final : public LargeObjectSpace { size_t Free(Thread* self, mirror::Object* obj) override REQUIRES(!lock_); void Walk(DlMallocSpace::WalkCallback callback, void* arg) override REQUIRES(!lock_); void Dump(std::ostream& os) const REQUIRES(!lock_); - + void ForEachMemMap(std::function<void(const MemMap&)> func) const override REQUIRES(!lock_); std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const override REQUIRES(!lock_); protected: diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index 903263f26a..3a42f9847c 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -443,6 +443,10 @@ class ContinuousMemMapAllocSpace : public MemMapSpace, public AllocSpace { return mark_bitmap_.get(); } + accounting::ContinuousSpaceBitmap* GetTempBitmap() const { + return temp_bitmap_.get(); + } + collector::ObjectBytePair Sweep(bool swap_bitmaps); virtual accounting::ContinuousSpaceBitmap::SweepCallback* GetSweepCallback() = 0; diff --git a/runtime/runtime.h b/runtime/runtime.h index 564a1b918b..6a9e1e9fec 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -775,6 +775,10 @@ class Runtime { dump_gc_performance_on_shutdown_ = value; } + bool GetDumpGCPerformanceOnShutdown() const { + return dump_gc_performance_on_shutdown_; + } + void IncrementDeoptimizationCount(DeoptimizationKind kind) { DCHECK_LE(kind, DeoptimizationKind::kLast); deoptimization_counts_[static_cast<size_t>(kind)]++; |