| /* |
| * Copyright (C) 2012 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <sys/mman.h> |
| |
| #include "garbage_collector.h" |
| |
| #include "android-base/stringprintf.h" |
| |
| #include "base/dumpable.h" |
| #include "base/histogram-inl.h" |
| #include "base/logging.h" // For VLOG_IS_ON. |
| #include "base/mutex-inl.h" |
| #include "base/systrace.h" |
| #include "base/time_utils.h" |
| #include "base/utils.h" |
| #include "gc/accounting/heap_bitmap.h" |
| #include "gc/gc_pause_listener.h" |
| #include "gc/heap.h" |
| #include "gc/space/large_object_space.h" |
| #include "gc/space/space-inl.h" |
| #include "runtime.h" |
| #include "thread-current-inl.h" |
| #include "thread_list.h" |
| |
| namespace art { |
| namespace gc { |
| namespace collector { |
| |
| namespace { |
| |
| // Report a GC metric via the ATrace interface. |
| void TraceGCMetric(const char* name, int64_t value) { |
| // ART's interface with systrace (through libartpalette) only supports |
| // reporting 32-bit (signed) integer values at the moment. Upon |
| // underflows/overflows, clamp metric values at `int32_t` min/max limits and |
| // report these events via a corresponding underflow/overflow counter; also |
| // log a warning about the first underflow/overflow occurrence. |
| // |
| // TODO(b/300015145): Consider extending libarpalette to allow reporting this |
| // value as a 64-bit (signed) integer (instead of a 32-bit (signed) integer). |
| // Note that this is likely unnecessary at the moment (November 2023) for any |
| // size-related GC metric, given the maximum theoretical size of a managed |
| // heap (4 GiB). |
| if (UNLIKELY(value < std::numeric_limits<int32_t>::min())) { |
| ATraceIntegerValue(name, std::numeric_limits<int32_t>::min()); |
| std::string underflow_counter_name = std::string(name) + " int32_t underflow"; |
| ATraceIntegerValue(underflow_counter_name.c_str(), 1); |
| static bool int32_underflow_reported = false; |
| if (!int32_underflow_reported) { |
| LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value |
| << " causing a 32-bit integer underflow"; |
| int32_underflow_reported = true; |
| } |
| return; |
| } |
| if (UNLIKELY(value > std::numeric_limits<int32_t>::max())) { |
| ATraceIntegerValue(name, std::numeric_limits<int32_t>::max()); |
| std::string overflow_counter_name = std::string(name) + " int32_t overflow"; |
| ATraceIntegerValue(overflow_counter_name.c_str(), 1); |
| static bool int32_overflow_reported = false; |
| if (!int32_overflow_reported) { |
| LOG(WARNING) << "GC Metric \"" << name << "\" with value " << value |
| << " causing a 32-bit integer overflow"; |
| int32_overflow_reported = true; |
| } |
| return; |
| } |
| ATraceIntegerValue(name, value); |
| } |
| |
| } // namespace |
| |
| Iteration::Iteration() |
| : duration_ns_(0), timings_("GC iteration timing logger", true, VLOG_IS_ON(heap)) { |
| Reset(kGcCauseBackground, false); // Reset to some place holder values. |
| } |
| |
| void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) { |
| timings_.Reset(); |
| pause_times_.clear(); |
| duration_ns_ = 0; |
| bytes_scanned_ = 0; |
| clear_soft_references_ = clear_soft_references; |
| gc_cause_ = gc_cause; |
| freed_ = ObjectBytePair(); |
| freed_los_ = ObjectBytePair(); |
| freed_bytes_revoke_ = 0; |
| } |
| |
| uint64_t Iteration::GetEstimatedThroughput() const { |
| // Add 1ms to prevent possible division by 0. |
| return (static_cast<uint64_t>(freed_.bytes) * 1000) / (NsToMs(GetDurationNs()) + 1); |
| } |
| |
| GarbageCollector::GarbageCollector(Heap* heap, const std::string& name) |
| : heap_(heap), |
| name_(name), |
| pause_histogram_((name_ + " paused").c_str(), kPauseBucketSize, kPauseBucketCount), |
| rss_histogram_((name_ + " peak-rss").c_str(), kMemBucketSize, kMemBucketCount), |
| freed_bytes_histogram_((name_ + " freed-bytes").c_str(), kMemBucketSize, kMemBucketCount), |
| gc_time_histogram_(nullptr), |
| metrics_gc_count_(nullptr), |
| metrics_gc_count_delta_(nullptr), |
| gc_throughput_histogram_(nullptr), |
| gc_tracing_throughput_hist_(nullptr), |
| gc_throughput_avg_(nullptr), |
| gc_tracing_throughput_avg_(nullptr), |
| gc_scanned_bytes_(nullptr), |
| gc_scanned_bytes_delta_(nullptr), |
| gc_freed_bytes_(nullptr), |
| gc_freed_bytes_delta_(nullptr), |
| gc_duration_(nullptr), |
| gc_duration_delta_(nullptr), |
| cumulative_timings_(name), |
| pause_histogram_lock_("pause histogram lock", kDefaultMutexLevel, true), |
| is_transaction_active_(false), |
| are_metrics_initialized_(false) { |
| ResetCumulativeStatistics(); |
| } |
| |
| void GarbageCollector::RegisterPause(uint64_t nano_length) { |
| GetCurrentIteration()->pause_times_.push_back(nano_length); |
| } |
| |
| void GarbageCollector::ResetCumulativeStatistics() { |
| cumulative_timings_.Reset(); |
| total_thread_cpu_time_ns_ = 0u; |
| total_time_ns_ = 0u; |
| total_freed_objects_ = 0u; |
| total_freed_bytes_ = 0; |
| total_scanned_bytes_ = 0; |
| rss_histogram_.Reset(); |
| freed_bytes_histogram_.Reset(); |
| MutexLock mu(Thread::Current(), pause_histogram_lock_); |
| pause_histogram_.Reset(); |
| } |
| |
| uint64_t GarbageCollector::ExtractRssFromMincore( |
| std::list<std::pair<void*, void*>>* gc_ranges) { |
| uint64_t rss = 0; |
| if (gc_ranges->empty()) { |
| return 0; |
| } |
| // mincore() is linux-specific syscall. |
| #if defined(__linux__) |
| using range_t = std::pair<void*, void*>; |
| // Sort gc_ranges |
| gc_ranges->sort([](const range_t& a, const range_t& b) { |
| return std::less()(a.first, b.first); |
| }); |
| // Merge gc_ranges. It's necessary because the kernel may merge contiguous |
| // regions if their properties match. This is sufficient as kernel doesn't |
| // merge those adjoining ranges which differ only in name. |
| size_t vec_len = 0; |
| for (auto it = gc_ranges->begin(); it != gc_ranges->end(); it++) { |
| auto next_it = it; |
| next_it++; |
| while (next_it != gc_ranges->end()) { |
| if (it->second == next_it->first) { |
| it->second = next_it->second; |
| next_it = gc_ranges->erase(next_it); |
| } else { |
| break; |
| } |
| } |
| size_t length = static_cast<uint8_t*>(it->second) - static_cast<uint8_t*>(it->first); |
| // Compute max length for vector allocation later. |
| vec_len = std::max(vec_len, length / kPageSize); |
| } |
| std::unique_ptr<unsigned char[]> vec(new unsigned char[vec_len]); |
| for (const auto it : *gc_ranges) { |
| size_t length = static_cast<uint8_t*>(it.second) - static_cast<uint8_t*>(it.first); |
| if (mincore(it.first, length, vec.get()) == 0) { |
| for (size_t i = 0; i < length / kPageSize; i++) { |
| // Least significant bit represents residency of a page. Other bits are |
| // reserved. |
| rss += vec[i] & 0x1; |
| } |
| } else { |
| LOG(WARNING) << "Call to mincore() on memory range [0x" << std::hex << it.first |
| << ", 0x" << it.second << std::dec << ") failed: " << strerror(errno); |
| } |
| } |
| rss *= kPageSize; |
| rss_histogram_.AddValue(rss / KB); |
| #endif |
| return rss; |
| } |
| |
| void GarbageCollector::Run(GcCause gc_cause, bool clear_soft_references) { |
| ScopedTrace trace(android::base::StringPrintf("%s %s GC", PrettyCause(gc_cause), GetName())); |
| Thread* self = Thread::Current(); |
| Runtime* runtime = Runtime::Current(); |
| uint64_t start_time = NanoTime(); |
| uint64_t thread_cpu_start_time = ThreadCpuNanoTime(); |
| GetHeap()->CalculatePreGcWeightedAllocatedBytes(); |
| Iteration* current_iteration = GetCurrentIteration(); |
| current_iteration->Reset(gc_cause, clear_soft_references); |
| // Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't |
| // change in the middle of a GC. |
| is_transaction_active_ = runtime->IsActiveTransaction(); |
| RunPhases(); // Run all the GC phases. |
| GetHeap()->CalculatePostGcWeightedAllocatedBytes(); |
| // Add the current timings to the cumulative timings. |
| cumulative_timings_.AddLogger(*GetTimings()); |
| // Update cumulative statistics with how many bytes the GC iteration freed. |
| total_freed_objects_ += current_iteration->GetFreedObjects() + |
| current_iteration->GetFreedLargeObjects(); |
| total_scanned_bytes_ += current_iteration->GetScannedBytes(); |
| int64_t freed_bytes = current_iteration->GetFreedBytes() + |
| current_iteration->GetFreedLargeObjectBytes(); |
| total_freed_bytes_ += freed_bytes; |
| // Rounding negative freed bytes to 0 as we are not interested in such corner cases. |
| freed_bytes_histogram_.AddValue(std::max<int64_t>(freed_bytes / KB, 0)); |
| uint64_t end_time = NanoTime(); |
| uint64_t thread_cpu_end_time = ThreadCpuNanoTime(); |
| total_thread_cpu_time_ns_ += thread_cpu_end_time - thread_cpu_start_time; |
| uint64_t duration_ns = end_time - start_time; |
| current_iteration->SetDurationNs(duration_ns); |
| if (Locks::mutator_lock_->IsExclusiveHeld(self)) { |
| // The entire GC was paused, clear the fake pauses which might be in the pause times and add |
| // the whole GC duration. |
| current_iteration->pause_times_.clear(); |
| RegisterPause(duration_ns); |
| } |
| total_time_ns_ += duration_ns; |
| uint64_t total_pause_time_ns = 0; |
| for (uint64_t pause_time : current_iteration->GetPauseTimes()) { |
| MutexLock mu(self, pause_histogram_lock_); |
| pause_histogram_.AdjustAndAddValue(pause_time); |
| total_pause_time_ns += pause_time; |
| } |
| metrics::ArtMetrics* metrics = runtime->GetMetrics(); |
| // Report STW pause time in microseconds. |
| const uint64_t total_pause_time_us = total_pause_time_ns / 1'000; |
| metrics->WorldStopTimeDuringGCAvg()->Add(total_pause_time_us); |
| metrics->GcWorldStopTime()->Add(total_pause_time_us); |
| metrics->GcWorldStopTimeDelta()->Add(total_pause_time_us); |
| metrics->GcWorldStopCount()->AddOne(); |
| metrics->GcWorldStopCountDelta()->AddOne(); |
| // Report total collection time of all GCs put together. |
| metrics->TotalGcCollectionTime()->Add(NsToMs(duration_ns)); |
| metrics->TotalGcCollectionTimeDelta()->Add(NsToMs(duration_ns)); |
| if (are_metrics_initialized_) { |
| metrics_gc_count_->Add(1); |
| metrics_gc_count_delta_->Add(1); |
| // Report GC time in milliseconds. |
| gc_time_histogram_->Add(NsToMs(duration_ns)); |
| // Throughput in bytes/s. Add 1us to prevent possible division by 0. |
| uint64_t throughput = (current_iteration->GetScannedBytes() * 1'000'000) |
| / (NsToUs(duration_ns) + 1); |
| // Report in MB/s. |
| throughput /= MB; |
| gc_tracing_throughput_hist_->Add(throughput); |
| gc_tracing_throughput_avg_->Add(throughput); |
| |
| // Report GC throughput in MB/s. |
| throughput = current_iteration->GetEstimatedThroughput() / MB; |
| gc_throughput_histogram_->Add(throughput); |
| gc_throughput_avg_->Add(throughput); |
| |
| gc_scanned_bytes_->Add(current_iteration->GetScannedBytes()); |
| gc_scanned_bytes_delta_->Add(current_iteration->GetScannedBytes()); |
| gc_freed_bytes_->Add(current_iteration->GetFreedBytes()); |
| gc_freed_bytes_delta_->Add(current_iteration->GetFreedBytes()); |
| gc_duration_->Add(NsToMs(current_iteration->GetDurationNs())); |
| gc_duration_delta_->Add(NsToMs(current_iteration->GetDurationNs())); |
| } |
| |
| // Report some metrics via the ATrace interface, to surface them in Perfetto. |
| TraceGCMetric("freed_normal_object_bytes", current_iteration->GetFreedBytes()); |
| TraceGCMetric("freed_large_object_bytes", current_iteration->GetFreedLargeObjectBytes()); |
| TraceGCMetric("freed_bytes", freed_bytes); |
| |
| is_transaction_active_ = false; |
| } |
| |
| void GarbageCollector::SwapBitmaps() { |
| TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); |
| // Swap the live and mark bitmaps for each alloc space. This is needed since sweep re-swaps |
| // these bitmaps. The bitmap swapping is an optimization so that we do not need to clear the live |
| // bits of dead objects in the live bitmap. |
| const GcType gc_type = GetGcType(); |
| for (const auto& space : GetHeap()->GetContinuousSpaces()) { |
| // We never allocate into zygote spaces. |
| if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect || |
| (gc_type == kGcTypeFull && |
| space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect)) { |
| if (space->GetLiveBitmap() != nullptr && !space->HasBoundBitmaps()) { |
| CHECK(space->IsContinuousMemMapAllocSpace()); |
| space->AsContinuousMemMapAllocSpace()->SwapBitmaps(); |
| } |
| } |
| } |
| for (const auto& disc_space : GetHeap()->GetDiscontinuousSpaces()) { |
| disc_space->AsLargeObjectSpace()->SwapBitmaps(); |
| } |
| } |
| |
| uint64_t GarbageCollector::GetEstimatedMeanThroughput() const { |
| // Add 1ms to prevent possible division by 0. |
| return (total_freed_bytes_ * 1000) / (NsToMs(GetCumulativeTimings().GetTotalNs()) + 1); |
| } |
| |
| void GarbageCollector::ResetMeasurements() { |
| { |
| MutexLock mu(Thread::Current(), pause_histogram_lock_); |
| pause_histogram_.Reset(); |
| } |
| cumulative_timings_.Reset(); |
| rss_histogram_.Reset(); |
| freed_bytes_histogram_.Reset(); |
| total_thread_cpu_time_ns_ = 0u; |
| total_time_ns_ = 0u; |
| total_freed_objects_ = 0u; |
| total_freed_bytes_ = 0; |
| total_scanned_bytes_ = 0u; |
| } |
| |
| GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector, bool with_reporting) |
| : start_time_(NanoTime()), collector_(collector), with_reporting_(with_reporting) { |
| Runtime* runtime = Runtime::Current(); |
| runtime->GetThreadList()->SuspendAll(__FUNCTION__); |
| if (with_reporting) { |
| GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener(); |
| if (pause_listener != nullptr) { |
| pause_listener->StartPause(); |
| } |
| } |
| } |
| |
| GarbageCollector::ScopedPause::~ScopedPause() { |
| collector_->RegisterPause(NanoTime() - start_time_); |
| Runtime* runtime = Runtime::Current(); |
| if (with_reporting_) { |
| GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener(); |
| if (pause_listener != nullptr) { |
| pause_listener->EndPause(); |
| } |
| } |
| runtime->GetThreadList()->ResumeAll(); |
| } |
| |
| // Returns the current GC iteration and assocated info. |
| Iteration* GarbageCollector::GetCurrentIteration() { |
| return heap_->GetCurrentGcIteration(); |
| } |
| const Iteration* GarbageCollector::GetCurrentIteration() const { |
| return heap_->GetCurrentGcIteration(); |
| } |
| |
| void GarbageCollector::RecordFree(const ObjectBytePair& freed) { |
| GetCurrentIteration()->freed_.Add(freed); |
| heap_->RecordFree(freed.objects, freed.bytes); |
| } |
| void GarbageCollector::RecordFreeLOS(const ObjectBytePair& freed) { |
| GetCurrentIteration()->freed_los_.Add(freed); |
| heap_->RecordFree(freed.objects, freed.bytes); |
| } |
| |
| uint64_t GarbageCollector::GetTotalPausedTimeNs() { |
| MutexLock mu(Thread::Current(), pause_histogram_lock_); |
| return pause_histogram_.AdjustedSum(); |
| } |
| |
| void GarbageCollector::DumpPerformanceInfo(std::ostream& os) { |
| const CumulativeLogger& logger = GetCumulativeTimings(); |
| const size_t iterations = logger.GetIterations(); |
| if (iterations == 0) { |
| return; |
| } |
| os << Dumpable<CumulativeLogger>(logger); |
| const uint64_t total_ns = logger.GetTotalNs(); |
| const double seconds = NsToMs(total_ns) / 1000.0; |
| const uint64_t freed_bytes = GetTotalFreedBytes(); |
| const uint64_t freed_objects = GetTotalFreedObjects(); |
| const uint64_t scanned_bytes = GetTotalScannedBytes(); |
| { |
| MutexLock mu(Thread::Current(), pause_histogram_lock_); |
| if (pause_histogram_.SampleSize() > 0) { |
| Histogram<uint64_t>::CumulativeData cumulative_data; |
| pause_histogram_.CreateHistogram(&cumulative_data); |
| pause_histogram_.PrintConfidenceIntervals(os, 0.99, cumulative_data); |
| } |
| } |
| #if defined(__linux__) |
| if (rss_histogram_.SampleSize() > 0) { |
| os << rss_histogram_.Name() |
| << ": Avg: " << PrettySize(rss_histogram_.Mean() * KB) |
| << " Max: " << PrettySize(rss_histogram_.Max() * KB) |
| << " Min: " << PrettySize(rss_histogram_.Min() * KB) << "\n"; |
| os << "Peak-rss Histogram: "; |
| rss_histogram_.DumpBins(os); |
| os << "\n"; |
| } |
| #endif |
| if (freed_bytes_histogram_.SampleSize() > 0) { |
| os << freed_bytes_histogram_.Name() |
| << ": Avg: " << PrettySize(freed_bytes_histogram_.Mean() * KB) |
| << " Max: " << PrettySize(freed_bytes_histogram_.Max() * KB) |
| << " Min: " << PrettySize(freed_bytes_histogram_.Min() * KB) << "\n"; |
| os << "Freed-bytes histogram: "; |
| freed_bytes_histogram_.DumpBins(os); |
| os << "\n"; |
| } |
| const double cpu_seconds = NsToMs(GetTotalCpuTime()) / 1000.0; |
| os << GetName() << " total time: " << PrettyDuration(total_ns) |
| << " mean time: " << PrettyDuration(total_ns / iterations) << "\n" |
| << GetName() << " freed: " << freed_objects |
| << " objects with total size " << PrettySize(freed_bytes) << "\n" |
| << GetName() << " throughput: " << freed_objects / seconds << "/s / " |
| << PrettySize(freed_bytes / seconds) << "/s" |
| << " per cpu-time: " |
| << static_cast<uint64_t>(freed_bytes / cpu_seconds) << "/s / " |
| << PrettySize(freed_bytes / cpu_seconds) << "/s\n" |
| << GetName() << " tracing throughput: " |
| << PrettySize(scanned_bytes / seconds) << "/s " |
| << " per cpu-time: " |
| << PrettySize(scanned_bytes / cpu_seconds) << "/s\n"; |
| } |
| |
| } // namespace collector |
| } // namespace gc |
| } // namespace art |