diff options
| -rw-r--r-- | runtime/gc/heap-inl.h | 50 | ||||
| -rw-r--r-- | runtime/gc/heap.cc | 253 | ||||
| -rw-r--r-- | runtime/gc/heap.h | 139 | ||||
| -rw-r--r-- | runtime/native/dalvik_system_VMRuntime.cc | 26 | ||||
| -rw-r--r-- | test/175-alloc-big-bignums/expected.txt | 1 | ||||
| -rw-r--r-- | test/175-alloc-big-bignums/info.txt | 11 | ||||
| -rw-r--r-- | test/175-alloc-big-bignums/src/Main.java | 38 |
7 files changed, 369 insertions, 149 deletions
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index 9e1ba35a23..1c09b5c9bf 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -214,7 +214,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, if (AllocatorMayHaveConcurrentGC(allocator) && IsGcConcurrent()) { // New_num_bytes_allocated is zero if we didn't update num_bytes_allocated_. // That's fine. - CheckConcurrentGC(self, new_num_bytes_allocated, &obj); + CheckConcurrentGCForJava(self, new_num_bytes_allocated, &obj); } VerifyObject(obj); self->VerifyStack(); @@ -254,8 +254,8 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, size_t* bytes_allocated, size_t* usable_size, size_t* bytes_tl_bulk_allocated) { - if (allocator_type != kAllocatorTypeTLAB && - allocator_type != kAllocatorTypeRegionTLAB && + if (allocator_type != kAllocatorTypeRegionTLAB && + allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRosAlloc && UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, alloc_size, kGrow))) { return nullptr; @@ -396,30 +396,46 @@ inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_co inline bool Heap::IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size, bool grow) { - size_t new_footprint = num_bytes_allocated_.load(std::memory_order_relaxed) + alloc_size; - if (UNLIKELY(new_footprint > max_allowed_footprint_)) { - if (UNLIKELY(new_footprint > growth_limit_)) { + size_t old_target = target_footprint_.load(std::memory_order_relaxed); + while (true) { + size_t old_allocated = num_bytes_allocated_.load(std::memory_order_relaxed); + size_t new_footprint = old_allocated + alloc_size; + // Tests against heap limits are inherently approximate, since multiple allocations may + // race, and this is not atomic with the allocation. + if (UNLIKELY(new_footprint <= old_target)) { + return false; + } else if (UNLIKELY(new_footprint > growth_limit_)) { return true; } - if (!AllocatorMayHaveConcurrentGC(allocator_type) || !IsGcConcurrent()) { - if (!grow) { + // We are between target_footprint_ and growth_limit_ . + if (AllocatorMayHaveConcurrentGC(allocator_type) && IsGcConcurrent()) { + return false; + } else { + if (grow) { + if (target_footprint_.compare_exchange_weak(/*inout ref*/old_target, new_footprint, + std::memory_order_relaxed)) { + VlogHeapGrowth(old_target, new_footprint, alloc_size); + return false; + } // else try again. + } else { return true; } - // TODO: Grow for allocation is racy, fix it. - VlogHeapGrowth(max_allowed_footprint_, new_footprint, alloc_size); - max_allowed_footprint_ = new_footprint; } } - return false; } -// Request a GC if new_num_bytes_allocated is sufficiently large. -// A call with new_num_bytes_allocated == 0 is a fast no-op. -inline void Heap::CheckConcurrentGC(Thread* self, +inline bool Heap::ShouldConcurrentGCForJava(size_t new_num_bytes_allocated) { + // For a Java allocation, we only check whether the number of Java allocated bytes excceeds a + // threshold. By not considering native allocation here, we (a) ensure that Java heap bounds are + // maintained, and (b) reduce the cost of the check here. + return new_num_bytes_allocated >= concurrent_start_bytes_; +} + +inline void Heap::CheckConcurrentGCForJava(Thread* self, size_t new_num_bytes_allocated, ObjPtr<mirror::Object>* obj) { - if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) { - RequestConcurrentGCAndSaveObject(self, false, obj); + if (UNLIKELY(ShouldConcurrentGCForJava(new_num_bytes_allocated))) { + RequestConcurrentGCAndSaveObject(self, false /* force_full */, obj); } } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index dc79731ab6..77254ce8b8 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -17,6 +17,7 @@ #include "heap.h" #include <limits> +#include <malloc.h> // For mallinfo() #include <memory> #include <vector> @@ -187,7 +188,7 @@ Heap::Heap(size_t initial_size, bool low_memory_mode, size_t long_pause_log_threshold, size_t long_gc_log_threshold, - bool ignore_max_footprint, + bool ignore_target_footprint, bool use_tlab, bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, @@ -218,7 +219,7 @@ Heap::Heap(size_t initial_size, post_gc_last_process_cpu_time_ns_(process_cpu_start_time_ns_), pre_gc_weighted_allocated_bytes_(0.0), post_gc_weighted_allocated_bytes_(0.0), - ignore_max_footprint_(ignore_max_footprint), + ignore_target_footprint_(ignore_target_footprint), zygote_creation_lock_("zygote creation lock", kZygoteCreationLock), zygote_space_(nullptr), large_object_threshold_(large_object_threshold), @@ -231,13 +232,14 @@ Heap::Heap(size_t initial_size, next_gc_type_(collector::kGcTypePartial), capacity_(capacity), growth_limit_(growth_limit), - max_allowed_footprint_(initial_size), + target_footprint_(initial_size), concurrent_start_bytes_(std::numeric_limits<size_t>::max()), total_bytes_freed_ever_(0), total_objects_freed_ever_(0), num_bytes_allocated_(0), - new_native_bytes_allocated_(0), + native_bytes_registered_(0), old_native_bytes_allocated_(0), + native_objects_notified_(0), num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), @@ -616,11 +618,11 @@ Heap::Heap(size_t initial_size, task_processor_.reset(new TaskProcessor()); reference_processor_.reset(new ReferenceProcessor()); pending_task_lock_ = new Mutex("Pending task lock"); - if (ignore_max_footprint_) { + if (ignore_target_footprint_) { SetIdealFootprint(std::numeric_limits<size_t>::max()); concurrent_start_bytes_ = std::numeric_limits<size_t>::max(); } - CHECK_NE(max_allowed_footprint_, 0U); + CHECK_NE(target_footprint_.load(std::memory_order_relaxed), 0U); // Create our garbage collectors. for (size_t i = 0; i < 2; ++i) { const bool concurrent = i != 0; @@ -1158,10 +1160,11 @@ void Heap::DumpGcPerformanceInfo(std::ostream& os) { rosalloc_space_->DumpStats(os); } - os << "Registered native bytes allocated: " - << (old_native_bytes_allocated_.load(std::memory_order_relaxed) + - new_native_bytes_allocated_.load(std::memory_order_relaxed)) - << "\n"; + os << "Native bytes total: " << GetNativeBytes() + << " registered: " << native_bytes_registered_.load(std::memory_order_relaxed) << "\n"; + + os << "Total native bytes at last GC: " + << old_native_bytes_allocated_.load(std::memory_order_relaxed) << "\n"; BaseMutex::DumpAll(os); } @@ -1337,7 +1340,8 @@ void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType size_t total_bytes_free = GetFreeMemory(); oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free << " free bytes and " << PrettySize(GetFreeMemoryUntilOOME()) << " until OOM," - << " max allowed footprint " << max_allowed_footprint_ << ", growth limit " + << " target footprint " << target_footprint_.load(std::memory_order_relaxed) + << ", growth limit " << growth_limit_; // If the allocation failed due to fragmentation, print out the largest continuous allocation. if (total_bytes_free >= byte_count) { @@ -1872,7 +1876,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, } void Heap::SetTargetHeapUtilization(float target) { - DCHECK_GT(target, 0.0f); // asserted in Java code + DCHECK_GT(target, 0.1f); // asserted in Java code DCHECK_LT(target, 1.0f); target_utilization_ = target; } @@ -2286,8 +2290,8 @@ void Heap::ChangeCollector(CollectorType collector_type) { } if (IsGcConcurrent()) { concurrent_start_bytes_ = - std::max(max_allowed_footprint_, kMinConcurrentRemainingBytes) - - kMinConcurrentRemainingBytes; + UnsignedDifference(target_footprint_.load(std::memory_order_relaxed), + kMinConcurrentRemainingBytes); } else { concurrent_start_bytes_ = std::numeric_limits<size_t>::max(); } @@ -2616,6 +2620,39 @@ void Heap::TraceHeapSize(size_t heap_size) { ATRACE_INT("Heap size (KB)", heap_size / KB); } +size_t Heap::GetNativeBytes() { + size_t malloc_bytes; + size_t mmapped_bytes; +#if defined(__BIONIC__) || defined(__GLIBC__) + struct mallinfo mi = mallinfo(); + // In spite of the documentation, the jemalloc version of this call seems to do what we want, + // and it is thread-safe. + if (sizeof(size_t) > sizeof(mi.uordblks) && sizeof(size_t) > sizeof(mi.hblkhd)) { + // Shouldn't happen, but glibc declares uordblks as int. + // Avoiding sign extension gets us correct behavior for another 2 GB. + malloc_bytes = (unsigned int)mi.uordblks; + mmapped_bytes = (unsigned int)mi.hblkhd; + } else { + malloc_bytes = mi.uordblks; + mmapped_bytes = mi.hblkhd; + } + // From the spec, we clearly have mmapped_bytes <= malloc_bytes. Reality is sometimes + // dramatically different. (b/119580449) If so, fudge it. + if (mmapped_bytes > malloc_bytes) { + malloc_bytes = mmapped_bytes; + } +#else + // We should hit this case only in contexts in which GC triggering is not critical. Effectively + // disable GC triggering based on malloc(). + malloc_bytes = 1000; +#endif + return malloc_bytes + native_bytes_registered_.load(std::memory_order_relaxed); + // An alternative would be to get RSS from /proc/self/statm. Empirically, that's no + // more expensive, and it would allow us to count memory allocated by means other than malloc. + // However it would change as pages are unmapped and remapped due to memory pressure, among + // other things. It seems risky to trigger GCs as a result of such changes. +} + collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, GcCause gc_cause, bool clear_soft_references) { @@ -2666,16 +2703,7 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, ++runtime->GetStats()->gc_for_alloc_count; ++self->GetStats()->gc_for_alloc_count; } - const uint64_t bytes_allocated_before_gc = GetBytesAllocated(); - - if (gc_type == NonStickyGcType()) { - // Move all bytes from new_native_bytes_allocated_ to - // old_native_bytes_allocated_ now that GC has been triggered, resetting - // new_native_bytes_allocated_ to zero in the process. - old_native_bytes_allocated_.fetch_add( - new_native_bytes_allocated_.exchange(0, std::memory_order_relaxed), - std::memory_order_relaxed); - } + const size_t bytes_allocated_before_gc = GetBytesAllocated(); DCHECK_LT(gc_type, collector::kGcTypeMax); DCHECK_NE(gc_type, collector::kGcTypeNone); @@ -2747,6 +2775,9 @@ collector::GcType Heap::CollectGarbageInternal(collector::GcType gc_type, FinishGC(self, gc_type); // Inform DDMS that a GC completed. Dbg::GcDidFinish(); + + old_native_bytes_allocated_.store(GetNativeBytes()); + // Unload native libraries for class unloading. We do this after calling FinishGC to prevent // deadlocks in case the JNI_OnUnload function does allocations. { @@ -3521,16 +3552,17 @@ void Heap::DumpForSigQuit(std::ostream& os) { } size_t Heap::GetPercentFree() { - return static_cast<size_t>(100.0f * static_cast<float>(GetFreeMemory()) / max_allowed_footprint_); + return static_cast<size_t>(100.0f * static_cast<float>( + GetFreeMemory()) / target_footprint_.load(std::memory_order_relaxed)); } -void Heap::SetIdealFootprint(size_t max_allowed_footprint) { - if (max_allowed_footprint > GetMaxMemory()) { - VLOG(gc) << "Clamp target GC heap from " << PrettySize(max_allowed_footprint) << " to " +void Heap::SetIdealFootprint(size_t target_footprint) { + if (target_footprint > GetMaxMemory()) { + VLOG(gc) << "Clamp target GC heap from " << PrettySize(target_footprint) << " to " << PrettySize(GetMaxMemory()); - max_allowed_footprint = GetMaxMemory(); + target_footprint = GetMaxMemory(); } - max_allowed_footprint_ = max_allowed_footprint; + target_footprint_.store(target_footprint, std::memory_order_relaxed); } bool Heap::IsMovableObject(ObjPtr<mirror::Object> obj) const { @@ -3563,10 +3595,10 @@ double Heap::HeapGrowthMultiplier() const { } void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, - uint64_t bytes_allocated_before_gc) { + size_t bytes_allocated_before_gc) { // We know what our utilization is at this moment. // This doesn't actually resize any memory. It just lets the heap grow more when necessary. - const uint64_t bytes_allocated = GetBytesAllocated(); + const size_t bytes_allocated = GetBytesAllocated(); // Trace the new heap size after the GC is finished. TraceHeapSize(bytes_allocated); uint64_t target_size; @@ -3574,16 +3606,18 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, // Use the multiplier to grow more for foreground. const double multiplier = HeapGrowthMultiplier(); // Use the multiplier to grow more for // foreground. - const uint64_t adjusted_min_free = static_cast<uint64_t>(min_free_ * multiplier); - const uint64_t adjusted_max_free = static_cast<uint64_t>(max_free_ * multiplier); + const size_t adjusted_min_free = static_cast<size_t>(min_free_ * multiplier); + const size_t adjusted_max_free = static_cast<size_t>(max_free_ * multiplier); if (gc_type != collector::kGcTypeSticky) { // Grow the heap for non sticky GC. - ssize_t delta = bytes_allocated / GetTargetHeapUtilization() - bytes_allocated; - CHECK_GE(delta, 0) << "bytes_allocated=" << bytes_allocated - << " target_utilization_=" << target_utilization_; + uint64_t delta = bytes_allocated * (1.0 / GetTargetHeapUtilization() - 1.0); + DCHECK_LE(delta, std::numeric_limits<size_t>::max()) << "bytes_allocated=" << bytes_allocated + << " target_utilization_=" << target_utilization_; target_size = bytes_allocated + delta * multiplier; - target_size = std::min(target_size, bytes_allocated + adjusted_max_free); - target_size = std::max(target_size, bytes_allocated + adjusted_min_free); + target_size = std::min(target_size, + static_cast<uint64_t>(bytes_allocated + adjusted_max_free)); + target_size = std::max(target_size, + static_cast<uint64_t>(bytes_allocated + adjusted_min_free)); next_gc_type_ = collector::kGcTypeSticky; } else { collector::GcType non_sticky_gc_type = NonStickyGcType(); @@ -3600,22 +3634,24 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, // We also check that the bytes allocated aren't over the footprint limit in order to prevent a // pathological case where dead objects which aren't reclaimed by sticky could get accumulated // if the sticky GC throughput always remained >= the full/partial throughput. + size_t target_footprint = target_footprint_.load(std::memory_order_relaxed); if (current_gc_iteration_.GetEstimatedThroughput() * kStickyGcThroughputAdjustment >= non_sticky_collector->GetEstimatedMeanThroughput() && non_sticky_collector->NumberOfIterations() > 0 && - bytes_allocated <= max_allowed_footprint_) { + bytes_allocated <= target_footprint) { next_gc_type_ = collector::kGcTypeSticky; } else { next_gc_type_ = non_sticky_gc_type; } // If we have freed enough memory, shrink the heap back down. - if (bytes_allocated + adjusted_max_free < max_allowed_footprint_) { + if (bytes_allocated + adjusted_max_free < target_footprint) { target_size = bytes_allocated + adjusted_max_free; } else { - target_size = std::max(bytes_allocated, static_cast<uint64_t>(max_allowed_footprint_)); + target_size = std::max(bytes_allocated, target_footprint); } } - if (!ignore_max_footprint_) { + CHECK_LE(target_size, std::numeric_limits<size_t>::max()); + if (!ignore_target_footprint_) { SetIdealFootprint(target_size); if (IsGcConcurrent()) { const uint64_t freed_bytes = current_gc_iteration_.GetFreedBytes() + @@ -3624,26 +3660,25 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, // Bytes allocated will shrink by freed_bytes after the GC runs, so if we want to figure out // how many bytes were allocated during the GC we need to add freed_bytes back on. CHECK_GE(bytes_allocated + freed_bytes, bytes_allocated_before_gc); - const uint64_t bytes_allocated_during_gc = bytes_allocated + freed_bytes - + const size_t bytes_allocated_during_gc = bytes_allocated + freed_bytes - bytes_allocated_before_gc; // Calculate when to perform the next ConcurrentGC. // Estimate how many remaining bytes we will have when we need to start the next GC. size_t remaining_bytes = bytes_allocated_during_gc; remaining_bytes = std::min(remaining_bytes, kMaxConcurrentRemainingBytes); remaining_bytes = std::max(remaining_bytes, kMinConcurrentRemainingBytes); - if (UNLIKELY(remaining_bytes > max_allowed_footprint_)) { + size_t target_footprint = target_footprint_.load(std::memory_order_relaxed); + if (UNLIKELY(remaining_bytes > target_footprint)) { // A never going to happen situation that from the estimated allocation rate we will exceed // the applications entire footprint with the given estimated allocation rate. Schedule // another GC nearly straight away. - remaining_bytes = kMinConcurrentRemainingBytes; + remaining_bytes = std::min(kMinConcurrentRemainingBytes, target_footprint); } - DCHECK_LE(remaining_bytes, max_allowed_footprint_); - DCHECK_LE(max_allowed_footprint_, GetMaxMemory()); + DCHECK_LE(target_footprint_.load(std::memory_order_relaxed), GetMaxMemory()); // Start a concurrent GC when we get close to the estimated remaining bytes. When the // allocation rate is very high, remaining_bytes could tell us that we should start a GC // right away. - concurrent_start_bytes_ = std::max(max_allowed_footprint_ - remaining_bytes, - static_cast<size_t>(bytes_allocated)); + concurrent_start_bytes_ = std::max(target_footprint - remaining_bytes, bytes_allocated); } } } @@ -3671,11 +3706,11 @@ void Heap::ClampGrowthLimit() { } void Heap::ClearGrowthLimit() { - if (max_allowed_footprint_ == growth_limit_ && growth_limit_ < capacity_) { - max_allowed_footprint_ = capacity_; + if (target_footprint_.load(std::memory_order_relaxed) == growth_limit_ + && growth_limit_ < capacity_) { + target_footprint_.store(capacity_, std::memory_order_relaxed); concurrent_start_bytes_ = - std::max(max_allowed_footprint_, kMinConcurrentRemainingBytes) - - kMinConcurrentRemainingBytes; + UnsignedDifference(capacity_, kMinConcurrentRemainingBytes); } growth_limit_ = capacity_; ScopedObjectAccess soa(Thread::Current()); @@ -3915,40 +3950,101 @@ void Heap::RunFinalization(JNIEnv* env, uint64_t timeout) { static_cast<jlong>(timeout)); } -void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { - size_t old_value = new_native_bytes_allocated_.fetch_add(bytes, std::memory_order_relaxed); +// For GC triggering purposes, we count old (pre-last-GC) and new native allocations as +// different fractions of Java allocations. +// For now, we essentially do not count old native allocations at all, so that we can preserve the +// existing behavior of not limiting native heap size. If we seriously considered it, we would +// have to adjust collection thresholds when we encounter large amounts of old native memory, +// and handle native out-of-memory situations. + +static constexpr size_t kOldNativeDiscountFactor = 65536; // Approximately infinite for now. +static constexpr size_t kNewNativeDiscountFactor = 2; + +// If weighted java + native memory use exceeds our target by kStopForNativeFactor, and +// newly allocated memory exceeds kHugeNativeAlloc, we wait for GC to complete to avoid +// running out of memory. +static constexpr float kStopForNativeFactor = 2.0; +static constexpr size_t kHugeNativeAllocs = 200*1024*1024; + +// Return the ratio of the weighted native + java allocated bytes to its target value. +// A return value > 1.0 means we should collect. Significantly larger values mean we're falling +// behind. +inline float Heap::NativeMemoryOverTarget(size_t current_native_bytes) { + // Collection check for native allocation. Does not enforce Java heap bounds. + // With adj_start_bytes defined below, effectively checks + // <java bytes allocd> + c1*<old native allocd> + c2*<new native allocd) >= adj_start_bytes, + // where c3 > 1, and currently c1 and c2 are 1 divided by the values defined above. + size_t old_native_bytes = old_native_bytes_allocated_.load(std::memory_order_relaxed); + if (old_native_bytes > current_native_bytes) { + // Net decrease; skip the check, but update old value. + // It's OK to lose an update if two stores race. + old_native_bytes_allocated_.store(current_native_bytes, std::memory_order_relaxed); + return 0.0; + } else { + size_t new_native_bytes = UnsignedDifference(current_native_bytes, old_native_bytes); + size_t weighted_native_bytes = new_native_bytes / kNewNativeDiscountFactor + + old_native_bytes / kOldNativeDiscountFactor; + size_t adj_start_bytes = concurrent_start_bytes_ + + NativeAllocationGcWatermark() / kNewNativeDiscountFactor; + return static_cast<float>(GetBytesAllocated() + weighted_native_bytes) + / static_cast<float>(adj_start_bytes); + } +} - if (old_value > NativeAllocationGcWatermark() * HeapGrowthMultiplier() && - !IsGCRequestPending()) { - // Trigger another GC because there have been enough native bytes - // allocated since the last GC. +inline void Heap::CheckConcurrentGCForNative(Thread* self) { + size_t current_native_bytes = GetNativeBytes(); + float gc_urgency = NativeMemoryOverTarget(current_native_bytes); + if (UNLIKELY(gc_urgency >= 1.0)) { if (IsGcConcurrent()) { - RequestConcurrentGC(ThreadForEnv(env), kGcCauseForNativeAlloc, /*force_full=*/true); + RequestConcurrentGC(self, kGcCauseForNativeAlloc, /*force_full=*/true); + if (gc_urgency > kStopForNativeFactor + && current_native_bytes > kHugeNativeAllocs) { + // We're in danger of running out of memory due to rampant native allocation. + if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) { + LOG(INFO) << "Stopping for native allocation, urgency: " << gc_urgency; + } + WaitForGcToComplete(kGcCauseForAlloc, self); + } } else { CollectGarbageInternal(NonStickyGcType(), kGcCauseForNativeAlloc, false); } } } +// About kNotifyNativeInterval allocations have occurred. Check whether we should garbage collect. +void Heap::NotifyNativeAllocations(JNIEnv* env) { + native_objects_notified_.fetch_add(kNotifyNativeInterval, std::memory_order_relaxed); + CheckConcurrentGCForNative(ThreadForEnv(env)); +} + +// Register a native allocation with an explicit size. +// This should only be done for large allocations of non-malloc memory, which we wouldn't +// otherwise see. +void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { + native_bytes_registered_.fetch_add(bytes, std::memory_order_relaxed); + uint32_t objects_notified = + native_objects_notified_.fetch_add(1, std::memory_order_relaxed); + if (objects_notified % kNotifyNativeInterval == kNotifyNativeInterval - 1 + || bytes > kCheckImmediatelyThreshold) { + CheckConcurrentGCForNative(ThreadForEnv(env)); + } +} + void Heap::RegisterNativeFree(JNIEnv*, size_t bytes) { - // Take the bytes freed out of new_native_bytes_allocated_ first. If - // new_native_bytes_allocated_ reaches zero, take the remaining bytes freed - // out of old_native_bytes_allocated_ to ensure all freed bytes are - // accounted for. size_t allocated; size_t new_freed_bytes; do { - allocated = new_native_bytes_allocated_.load(std::memory_order_relaxed); + allocated = native_bytes_registered_.load(std::memory_order_relaxed); new_freed_bytes = std::min(allocated, bytes); - } while (!new_native_bytes_allocated_.CompareAndSetWeakRelaxed(allocated, - allocated - new_freed_bytes)); - if (new_freed_bytes < bytes) { - old_native_bytes_allocated_.fetch_sub(bytes - new_freed_bytes, std::memory_order_relaxed); - } + // We should not be registering more free than allocated bytes. + // But correctly keep going in non-debug builds. + DCHECK_EQ(new_freed_bytes, bytes); + } while (!native_bytes_registered_.CompareAndSetWeakRelaxed(allocated, + allocated - new_freed_bytes)); } size_t Heap::GetTotalMemory() const { - return std::max(max_allowed_footprint_, GetBytesAllocated()); + return std::max(target_footprint_.load(std::memory_order_relaxed), GetBytesAllocated()); } void Heap::AddModUnionTable(accounting::ModUnionTable* mod_union_table) { @@ -4250,8 +4346,8 @@ const Verification* Heap::GetVerification() const { return verification_.get(); } -void Heap::VlogHeapGrowth(size_t max_allowed_footprint, size_t new_footprint, size_t alloc_size) { - VLOG(heap) << "Growing heap from " << PrettySize(max_allowed_footprint) << " to " +void Heap::VlogHeapGrowth(size_t old_footprint, size_t new_footprint, size_t alloc_size) { + VLOG(heap) << "Growing heap from " << PrettySize(old_footprint) << " to " << PrettySize(new_footprint) << " for a " << PrettySize(alloc_size) << " allocation"; } @@ -4262,20 +4358,21 @@ class Heap::TriggerPostForkCCGcTask : public HeapTask { gc::Heap* heap = Runtime::Current()->GetHeap(); // Trigger a GC, if not already done. The first GC after fork, whenever it // takes place, will adjust the thresholds to normal levels. - if (heap->max_allowed_footprint_ == heap->growth_limit_) { + if (heap->target_footprint_.load(std::memory_order_relaxed) == heap->growth_limit_) { heap->RequestConcurrentGC(self, kGcCauseBackground, false); } } }; void Heap::PostForkChildAction(Thread* self) { - // Temporarily increase max_allowed_footprint_ and concurrent_start_bytes_ to + // Temporarily increase target_footprint_ and concurrent_start_bytes_ to // max values to avoid GC during app launch. if (collector_type_ == kCollectorTypeCC && !IsLowMemoryMode()) { - // Set max_allowed_footprint_ to the largest allowed value. + // Set target_footprint_ to the largest allowed value. SetIdealFootprint(growth_limit_); // Set concurrent_start_bytes_ to half of the heap size. - concurrent_start_bytes_ = std::max(max_allowed_footprint_ / 2, GetBytesAllocated()); + size_t target_footprint = target_footprint_.load(std::memory_order_relaxed); + concurrent_start_bytes_ = std::max(target_footprint / 2, GetBytesAllocated()); GetTaskProcessor()->AddTask( self, new TriggerPostForkCCGcTask(NanoTime() + MsToNs(kPostForkMaxHeapDurationMS))); diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index 504eff2c42..de65f0230e 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -126,7 +126,6 @@ static constexpr bool kUseThreadLocalAllocationStack = true; class Heap { public: - // If true, measure the total allocation time. static constexpr size_t kDefaultStartingSize = kPageSize; static constexpr size_t kDefaultInitialSize = 2 * MB; static constexpr size_t kDefaultMaximumSize = 256 * MB; @@ -155,6 +154,16 @@ class Heap { // Used so that we don't overflow the allocation time atomic integer. static constexpr size_t kTimeAdjust = 1024; + // Client should call NotifyNativeAllocation every kNotifyNativeInterval allocations. + // Should be chosen so that time_to_call_mallinfo / kNotifyNativeInterval is on the same order + // as object allocation time. time_to_call_mallinfo seems to be on the order of 1 usec. + static constexpr uint32_t kNotifyNativeInterval = 32; + + // RegisterNativeAllocation checks immediately whether GC is needed if size exceeds the + // following. kCheckImmediatelyThreshold * kNotifyNativeInterval should be small enough to + // make it safe to allocate that many bytes between checks. + static constexpr size_t kCheckImmediatelyThreshold = 300000; + // How often we allow heap trimming to happen (nanoseconds). static constexpr uint64_t kHeapTrimWait = MsToNs(5000); // How long we wait after a transition request to perform a collector transition (nanoseconds). @@ -187,7 +196,7 @@ class Heap { bool low_memory_mode, size_t long_pause_threshold, size_t long_gc_threshold, - bool ignore_max_footprint, + bool ignore_target_footprint, bool use_tlab, bool verify_pre_gc_heap, bool verify_pre_sweeping_heap, @@ -269,10 +278,22 @@ class Heap { void CheckPreconditionsForAllocObject(ObjPtr<mirror::Class> c, size_t byte_count) REQUIRES_SHARED(Locks::mutator_lock_); + // Inform the garbage collector of a non-malloc allocated native memory that might become + // reclaimable in the future as a result of Java garbage collection. void RegisterNativeAllocation(JNIEnv* env, size_t bytes) REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); void RegisterNativeFree(JNIEnv* env, size_t bytes); + // Notify the garbage collector of malloc allocations that might be reclaimable + // as a result of Java garbage collection. Each such call represents approximately + // kNotifyNativeInterval such allocations. + void NotifyNativeAllocations(JNIEnv* env) + REQUIRES(!*gc_complete_lock_, !*pending_task_lock_); + + uint32_t GetNotifyNativeInterval() { + return kNotifyNativeInterval; + } + // Change the allocator, updates entrypoints. void ChangeAllocator(AllocatorType allocator) REQUIRES(Locks::mutator_lock_, !Locks::runtime_shutdown_lock_); @@ -536,21 +557,20 @@ class Heap { // Returns approximately how much free memory we have until the next GC happens. size_t GetFreeMemoryUntilGC() const { - return max_allowed_footprint_ - GetBytesAllocated(); + return UnsignedDifference(target_footprint_.load(std::memory_order_relaxed), + GetBytesAllocated()); } // Returns approximately how much free memory we have until the next OOME happens. size_t GetFreeMemoryUntilOOME() const { - return growth_limit_ - GetBytesAllocated(); + return UnsignedDifference(growth_limit_, GetBytesAllocated()); } // Returns how much free memory we have until we need to grow the heap to perform an allocation. // Similar to GetFreeMemoryUntilGC. Implements java.lang.Runtime.freeMemory. size_t GetFreeMemory() const { - size_t byte_allocated = num_bytes_allocated_.load(std::memory_order_relaxed); - size_t total_memory = GetTotalMemory(); - // Make sure we don't get a negative number. - return total_memory - std::min(total_memory, byte_allocated); + return UnsignedDifference(GetTotalMemory(), + num_bytes_allocated_.load(std::memory_order_relaxed)); } // Get the space that corresponds to an object's address. Current implementation searches all @@ -877,12 +897,16 @@ class Heap { return main_space_backup_ != nullptr; } + static ALWAYS_INLINE size_t UnsignedDifference(size_t x, size_t y) { + return x > y ? x - y : 0; + } + static ALWAYS_INLINE bool AllocatorHasAllocationStack(AllocatorType allocator_type) { return + allocator_type != kAllocatorTypeRegionTLAB && allocator_type != kAllocatorTypeBumpPointer && allocator_type != kAllocatorTypeTLAB && - allocator_type != kAllocatorTypeRegion && - allocator_type != kAllocatorTypeRegionTLAB; + allocator_type != kAllocatorTypeRegion; } static ALWAYS_INLINE bool AllocatorMayHaveConcurrentGC(AllocatorType allocator_type) { if (kUseReadBarrier) { @@ -890,24 +914,30 @@ class Heap { return true; } return - allocator_type != kAllocatorTypeBumpPointer && - allocator_type != kAllocatorTypeTLAB; + allocator_type != kAllocatorTypeTLAB && + allocator_type != kAllocatorTypeBumpPointer; } static bool IsMovingGc(CollectorType collector_type) { return + collector_type == kCollectorTypeCC || collector_type == kCollectorTypeSS || collector_type == kCollectorTypeGSS || - collector_type == kCollectorTypeCC || collector_type == kCollectorTypeCCBackground || collector_type == kCollectorTypeHomogeneousSpaceCompact; } bool ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_count) const REQUIRES_SHARED(Locks::mutator_lock_); - ALWAYS_INLINE void CheckConcurrentGC(Thread* self, - size_t new_num_bytes_allocated, - ObjPtr<mirror::Object>* obj) + + // Checks whether we should garbage collect: + ALWAYS_INLINE bool ShouldConcurrentGCForJava(size_t new_num_bytes_allocated); + float NativeMemoryOverTarget(size_t current_native_bytes); + ALWAYS_INLINE void CheckConcurrentGCForJava(Thread* self, + size_t new_num_bytes_allocated, + ObjPtr<mirror::Object>* obj) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*pending_task_lock_, !*gc_complete_lock_); + void CheckConcurrentGCForNative(Thread* self) + REQUIRES(!*pending_task_lock_, !*gc_complete_lock_); accounting::ObjectStack* GetMarkStack() { return mark_stack_.get(); @@ -968,6 +998,11 @@ class Heap { void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) REQUIRES_SHARED(Locks::mutator_lock_); + // Are we out of memory, and thus should force a GC or fail? + // For concurrent collectors, out of memory is defined by growth_limit_. + // For nonconcurrent collectors it is defined by target_footprint_ unless grow is + // set. If grow is set, the limit is growth_limit_ and we adjust target_footprint_ + // to accomodate the allocation. ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size, bool grow); @@ -1031,7 +1066,7 @@ class Heap { // collection. bytes_allocated_before_gc is used to measure bytes / second for the period which // the GC was run. void GrowForUtilization(collector::GarbageCollector* collector_ran, - uint64_t bytes_allocated_before_gc = 0); + size_t bytes_allocated_before_gc = 0); size_t GetPercentFree(); @@ -1065,8 +1100,8 @@ class Heap { // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark // sweep GC, false for other GC types. bool IsGcConcurrent() const ALWAYS_INLINE { - return collector_type_ == kCollectorTypeCMS || - collector_type_ == kCollectorTypeCC || + return collector_type_ == kCollectorTypeCC || + collector_type_ == kCollectorTypeCMS || collector_type_ == kCollectorTypeCCBackground; } @@ -1095,15 +1130,19 @@ class Heap { return HasZygoteSpace() ? collector::kGcTypePartial : collector::kGcTypeFull; } - // How large new_native_bytes_allocated_ can grow before we trigger a new - // GC. + // Return the amount of space we allow for native memory when deciding whether to + // collect. We collect when a weighted sum of Java memory plus native memory exceeds + // the similarly weighted sum of the Java heap size target and this value. ALWAYS_INLINE size_t NativeAllocationGcWatermark() const { - // Reuse max_free_ for the native allocation gc watermark, so that the - // native heap is treated in the same way as the Java heap in the case - // where the gc watermark update would exceed max_free_. Using max_free_ - // instead of the target utilization means the watermark doesn't depend on - // the current number of registered native allocations. - return max_free_; + // It probably makes most sense to use a constant multiple of target_footprint_ . + // This is a good indication of the live data size, together with the + // intended space-time trade-off, as expressed by SetTargetHeapUtilization. + // For a fixed target utilization, the amount of GC effort per native + // allocated byte remains roughly constant as the Java heap size changes. + // But we previously triggered on max_free_ native allocation which is often much + // smaller. To avoid unexpected growth, we partially keep that limit in place for now. + // TODO: Consider HeapGrowthMultiplier(). Maybe. + return std::min(target_footprint_.load(std::memory_order_relaxed), 2 * max_free_); } ALWAYS_INLINE void IncrementNumberOfBytesFreedRevoke(size_t freed_bytes_revoke); @@ -1113,6 +1152,11 @@ class Heap { // Remove a vlog code from heap-inl.h which is transitively included in half the world. static void VlogHeapGrowth(size_t max_allowed_footprint, size_t new_footprint, size_t alloc_size); + // Return our best approximation of the number of bytes of native memory that + // are currently in use, and could possibly be reclaimed as an indirect result + // of a garbage collection. + size_t GetNativeBytes(); + // All-known continuous spaces, where objects lie within fixed bounds. std::vector<space::ContinuousSpace*> continuous_spaces_ GUARDED_BY(Locks::mutator_lock_); @@ -1192,9 +1236,9 @@ class Heap { double pre_gc_weighted_allocated_bytes_; double post_gc_weighted_allocated_bytes_; - // If we ignore the max footprint it lets the heap grow until it hits the heap capacity, this is - // useful for benchmarking since it reduces time spent in GC to a low %. - const bool ignore_max_footprint_; + // If we ignore the target footprint it lets the heap grow until it hits the heap capacity, this + // is useful for benchmarking since it reduces time spent in GC to a low %. + const bool ignore_target_footprint_; // Lock which guards zygote space creation. Mutex zygote_creation_lock_; @@ -1243,14 +1287,18 @@ class Heap { // The size the heap is limited to. This is initially smaller than capacity, but for largeHeap // programs it is "cleared" making it the same as capacity. + // Only weakly enforced for simultaneous allocations. size_t growth_limit_; - // When the number of bytes allocated exceeds the footprint TryAllocate returns null indicating - // a GC should be triggered. - size_t max_allowed_footprint_; + // Target size (as in maximum allocatable bytes) for the heap. Weakly enforced as a limit for + // non-concurrent GC. Used as a guideline for computing concurrent_start_bytes_ in the + // concurrent GC case. + Atomic<size_t> target_footprint_; // When num_bytes_allocated_ exceeds this amount then a concurrent GC should be requested so that // it completes ahead of an allocation failing. + // A multiple of this is also used to determine when to trigger a GC in response to native + // allocation. size_t concurrent_start_bytes_; // Since the heap was created, how many bytes have been freed. @@ -1263,19 +1311,18 @@ class Heap { // TLABS in their entirety, even if they have not yet been parceled out. Atomic<size_t> num_bytes_allocated_; - // Number of registered native bytes allocated since the last time GC was - // triggered. Adjusted after each RegisterNativeAllocation and - // RegisterNativeFree. Used to determine when to trigger GC for native - // allocations. - // See the REDESIGN section of go/understanding-register-native-allocation. - Atomic<size_t> new_native_bytes_allocated_; - - // Number of registered native bytes allocated prior to the last time GC was - // triggered, for debugging purposes. The current number of registered - // native bytes is determined by taking the sum of - // old_native_bytes_allocated_ and new_native_bytes_allocated_. + // Number of registered native bytes allocated. Adjusted after each RegisterNativeAllocation and + // RegisterNativeFree. Used to help determine when to trigger GC for native allocations. Should + // not include bytes allocated through the system malloc, since those are implicitly included. + Atomic<size_t> native_bytes_registered_; + + // Approximately the smallest value of GetNativeBytes() we've seen since the last GC. Atomic<size_t> old_native_bytes_allocated_; + // Total number of native objects of which we were notified since the beginning of time, mod 2^32. + // Allows us to check for GC only roughly every kNotifyNativeInterval allocations. + Atomic<uint32_t> native_objects_notified_; + // Number of bytes freed by thread local buffer revokes. This will // cancel out the ahead-of-time bulk counting of bytes allocated in // rosalloc thread-local buffers. It is temporarily accumulated @@ -1360,10 +1407,10 @@ class Heap { // Minimum free guarantees that you always have at least min_free_ free bytes after growing for // utilization, regardless of target utilization ratio. - size_t min_free_; + const size_t min_free_; // The ideal maximum free size, when we grow the heap for utilization. - size_t max_free_; + const size_t max_free_; // Target ideal heap utilization ratio. double target_utilization_; diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc index 3e5003ce13..892d4cc9e1 100644 --- a/runtime/native/dalvik_system_VMRuntime.cc +++ b/runtime/native/dalvik_system_VMRuntime.cc @@ -271,7 +271,7 @@ static void VMRuntime_setTargetSdkVersionNative(JNIEnv*, jobject, jint target_sd #endif } -static void VMRuntime_registerNativeAllocation(JNIEnv* env, jobject, jint bytes) { +static void VMRuntime_registerNativeAllocationInternal(JNIEnv* env, jobject, jint bytes) { if (UNLIKELY(bytes < 0)) { ScopedObjectAccess soa(env); ThrowRuntimeException("allocation size negative %d", bytes); @@ -280,11 +280,7 @@ static void VMRuntime_registerNativeAllocation(JNIEnv* env, jobject, jint bytes) Runtime::Current()->GetHeap()->RegisterNativeAllocation(env, static_cast<size_t>(bytes)); } -static void VMRuntime_registerSensitiveThread(JNIEnv*, jobject) { - Runtime::Current()->RegisterSensitiveThread(); -} - -static void VMRuntime_registerNativeFree(JNIEnv* env, jobject, jint bytes) { +static void VMRuntime_registerNativeFreeInternal(JNIEnv* env, jobject, jint bytes) { if (UNLIKELY(bytes < 0)) { ScopedObjectAccess soa(env); ThrowRuntimeException("allocation size negative %d", bytes); @@ -293,6 +289,18 @@ static void VMRuntime_registerNativeFree(JNIEnv* env, jobject, jint bytes) { Runtime::Current()->GetHeap()->RegisterNativeFree(env, static_cast<size_t>(bytes)); } +static jint VMRuntime_getNotifyNativeInterval(JNIEnv*, jclass) { + return Runtime::Current()->GetHeap()->GetNotifyNativeInterval(); +} + +static void VMRuntime_notifyNativeAllocationsInternal(JNIEnv* env, jobject) { + Runtime::Current()->GetHeap()->NotifyNativeAllocations(env); +} + +static void VMRuntime_registerSensitiveThread(JNIEnv*, jobject) { + Runtime::Current()->RegisterSensitiveThread(); +} + static void VMRuntime_updateProcessState(JNIEnv*, jobject, jint process_state) { Runtime* runtime = Runtime::Current(); runtime->UpdateProcessState(static_cast<ProcessState>(process_state)); @@ -710,9 +718,11 @@ static JNINativeMethod gMethods[] = { FAST_NATIVE_METHOD(VMRuntime, newUnpaddedArray, "(Ljava/lang/Class;I)Ljava/lang/Object;"), NATIVE_METHOD(VMRuntime, properties, "()[Ljava/lang/String;"), NATIVE_METHOD(VMRuntime, setTargetSdkVersionNative, "(I)V"), - NATIVE_METHOD(VMRuntime, registerNativeAllocation, "(I)V"), + NATIVE_METHOD(VMRuntime, registerNativeAllocationInternal, "(I)V"), + NATIVE_METHOD(VMRuntime, registerNativeFreeInternal, "(I)V"), + NATIVE_METHOD(VMRuntime, getNotifyNativeInterval, "()I"), + NATIVE_METHOD(VMRuntime, notifyNativeAllocationsInternal, "()V"), NATIVE_METHOD(VMRuntime, registerSensitiveThread, "()V"), - NATIVE_METHOD(VMRuntime, registerNativeFree, "(I)V"), NATIVE_METHOD(VMRuntime, requestConcurrentGC, "()V"), NATIVE_METHOD(VMRuntime, requestHeapTrim, "()V"), NATIVE_METHOD(VMRuntime, runHeapTasks, "()V"), diff --git a/test/175-alloc-big-bignums/expected.txt b/test/175-alloc-big-bignums/expected.txt new file mode 100644 index 0000000000..f75da10caf --- /dev/null +++ b/test/175-alloc-big-bignums/expected.txt @@ -0,0 +1 @@ +Test complete diff --git a/test/175-alloc-big-bignums/info.txt b/test/175-alloc-big-bignums/info.txt new file mode 100644 index 0000000000..8f6bcc3a55 --- /dev/null +++ b/test/175-alloc-big-bignums/info.txt @@ -0,0 +1,11 @@ +Allocate large numbers of huge BigIntegers in rapid succession. Most of the +associated memory will be in the C++ heap. This makes sure that we trigger +the garbage collector often enough to prevent us from running out of memory. + +The test allocates roughly 10GB of native memory, approximately 1MB of which +will be live at any point. Basically all native memory deallocation is +triggered by Java garbage collection. + +This test is a lot nastier than it looks. In particular, failure on target tends +to exhaust device memory, and kill off all processes on the device, including the +adb daemon :-( . diff --git a/test/175-alloc-big-bignums/src/Main.java b/test/175-alloc-big-bignums/src/Main.java new file mode 100644 index 0000000000..5fbeb46068 --- /dev/null +++ b/test/175-alloc-big-bignums/src/Main.java @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.math.BigInteger; + +// This is motivated by the assumption that BigInteger allocates malloc memory +// underneath. That's true (in 2018) on Android. + +public class Main { + public static void main(String[] args) throws Exception { + final int nIters = 20_000; // Presumed < 1_000_000. + final BigInteger big2_20 = BigInteger.valueOf(1024*1024); // 2^20 + BigInteger huge = BigInteger.valueOf(1).shiftLeft(4_000_000); // ~0.5MB + for (int i = 0; i < nIters; ++i) { // 10 GB total + huge = huge.add(BigInteger.ONE); + } + if (huge.bitLength() != 4_000_001) { + System.out.println("Wrong answer length: " + huge.bitLength()); + } else if (huge.mod(big2_20).compareTo(BigInteger.valueOf(nIters)) != 0) { + System.out.println("Wrong answer: ..." + huge.mod(big2_20)); + } else { + System.out.println("Test complete"); + } + } +} |