diff options
author | 2022-10-12 10:25:23 -0700 | |
---|---|---|
committer | 2022-12-05 18:17:40 +0000 | |
commit | 485a714cbfa146528f7db9395197d855af43e188 (patch) | |
tree | 914b9c764f1cc81938fb09d94e6a27bbe6e5562e | |
parent | 13c3ce1edf7fa9e8e97fb60625a62cb97a13f9a9 (diff) |
Make native GC-root updation concurrent with userfaultfd
Additionally also uses userfaultfd's minor-fault feature for moving
space.
Bug: 160737021
Test: ART_USE_READ_BARRIER=false art/test/testrunner/testrunner.py and module install
Change-Id: I98b0c69fba4aec1263b1f38cc9f31494fd5c8cf5
-rw-r--r-- | libartbase/base/arena_allocator.cc | 3 | ||||
-rw-r--r-- | libartbase/base/arena_allocator.h | 11 | ||||
-rw-r--r-- | libartbase/base/mem_map.cc | 4 | ||||
-rw-r--r-- | libartbase/base/mem_map.h | 8 | ||||
-rw-r--r-- | runtime/Android.bp | 1 | ||||
-rw-r--r-- | runtime/art_field-inl.h | 2 | ||||
-rw-r--r-- | runtime/barrier.cc | 5 | ||||
-rw-r--r-- | runtime/barrier.h | 3 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.cc | 60 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.h | 50 | ||||
-rw-r--r-- | runtime/class_linker.cc | 2 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 1427 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.h | 178 | ||||
-rw-r--r-- | runtime/linear_alloc-inl.h | 3 | ||||
-rw-r--r-- | runtime/linear_alloc.h | 2 |
15 files changed, 1400 insertions, 359 deletions
diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc index 250a3d9b9c..e5f254288e 100644 --- a/libartbase/base/arena_allocator.cc +++ b/libartbase/base/arena_allocator.cc @@ -185,9 +185,6 @@ void ArenaAllocatorMemoryTool::DoMakeInaccessible(void* ptr, size_t size) { MEMORY_TOOL_MAKE_NOACCESS(ptr, size); } -Arena::Arena() : bytes_allocated_(0), memory_(nullptr), size_(0), next_(nullptr) { -} - size_t ArenaAllocator::BytesAllocated() const { return ArenaAllocatorStats::BytesAllocated(); } diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h index e34099403a..49c14614a3 100644 --- a/libartbase/base/arena_allocator.h +++ b/libartbase/base/arena_allocator.h @@ -178,7 +178,8 @@ class ArenaAllocatorMemoryTool { class Arena { public: - Arena(); + Arena() : bytes_allocated_(0), memory_(nullptr), size_(0), next_(nullptr) {} + virtual ~Arena() { } // Reset is for pre-use and uses memset for performance. void Reset(); @@ -188,9 +189,7 @@ class Arena { return memory_; } - uint8_t* End() { - return memory_ + size_; - } + uint8_t* End() const { return memory_ + size_; } size_t Size() const { return size_; @@ -205,9 +204,7 @@ class Arena { } // Return true if ptr is contained in the arena. - bool Contains(const void* ptr) const { - return memory_ <= ptr && ptr < memory_ + bytes_allocated_; - } + bool Contains(const void* ptr) const { return memory_ <= ptr && ptr < memory_ + size_; } Arena* Next() const { return next_; } diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc index aa07f1c6ef..688325db2c 100644 --- a/libartbase/base/mem_map.cc +++ b/libartbase/base/mem_map.cc @@ -777,11 +777,11 @@ MemMap MemMap::RemapAtEnd(uint8_t* new_end, return MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false); } -MemMap MemMap::TakeReservedMemory(size_t byte_count) { +MemMap MemMap::TakeReservedMemory(size_t byte_count, bool reuse) { uint8_t* begin = Begin(); ReleaseReservedMemory(byte_count); // Performs necessary DCHECK()s on this reservation. size_t base_size = RoundUp(byte_count, kPageSize); - return MemMap(name_, begin, byte_count, begin, base_size, prot_, /* reuse= */ false); + return MemMap(name_, begin, byte_count, begin, base_size, prot_, reuse); } void MemMap::ReleaseReservedMemory(size_t byte_count) { diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h index 4c41388121..28d1058e02 100644 --- a/libartbase/base/mem_map.h +++ b/libartbase/base/mem_map.h @@ -290,8 +290,9 @@ class MemMap { // exceed the size of this reservation. // // Returns a mapping owning `byte_count` bytes rounded up to entire pages - // with size set to the passed `byte_count`. - MemMap TakeReservedMemory(size_t byte_count); + // with size set to the passed `byte_count`. If 'reuse' is true then the caller + // is responsible for unmapping the taken pages. + MemMap TakeReservedMemory(size_t byte_count, bool reuse = false); static bool CheckNoGaps(MemMap& begin_map, MemMap& end_map) REQUIRES(!MemMap::mem_maps_lock_); @@ -321,6 +322,9 @@ class MemMap { // in the parent process. void ResetInForkedProcess(); + // 'redzone_size_ == 0' indicates that we are not using memory-tool on this mapping. + size_t GetRedzoneSize() const { return redzone_size_; } + private: MemMap(const std::string& name, uint8_t* begin, diff --git a/runtime/Android.bp b/runtime/Android.bp index dbe11ab3dd..fc9226e6db 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -581,6 +581,7 @@ gensrcs { "gc/allocator/rosalloc.h", "gc/collector_type.h", "gc/collector/gc_type.h", + "gc/collector/mark_compact.h", "gc/space/region_space.h", "gc/space/space.h", "gc/weak_root_state.h", diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h index d57110f945..f6a99ac44e 100644 --- a/runtime/art_field-inl.h +++ b/runtime/art_field-inl.h @@ -70,6 +70,8 @@ void ArtField::VisitArrayRoots(RootVisitorType& visitor, ArtField* first_field = &array->At(0); DCHECK_LE(static_cast<void*>(end_boundary), static_cast<void*>(first_field + array->size())); static constexpr size_t kFieldSize = sizeof(ArtField); + // Confirm the assumption that ArtField size is power of two. It's important + // as we assume so below (RoundUp). static_assert(IsPowerOfTwo(kFieldSize)); uint8_t* declaring_class = reinterpret_cast<uint8_t*>(first_field) + DeclaringClassOffset().Int32Value(); diff --git a/runtime/barrier.cc b/runtime/barrier.cc index d144591daf..a6cc9ba053 100644 --- a/runtime/barrier.cc +++ b/runtime/barrier.cc @@ -40,6 +40,11 @@ void Barrier::Pass(Thread* self) { SetCountLocked(self, count_ - 1); } +void Barrier::IncrementNoWait(Thread* self) { + MutexLock mu(self, *GetLock()); + SetCountLocked(self, count_ + 1); +} + void Barrier::Wait(Thread* self) { Increment(self, -1); } diff --git a/runtime/barrier.h b/runtime/barrier.h index 432df76e16..4c94a144bd 100644 --- a/runtime/barrier.h +++ b/runtime/barrier.h @@ -51,6 +51,9 @@ class Barrier { // Pass through the barrier, decrement the count but do not block. void Pass(Thread* self) REQUIRES(!GetLock()); + // Increment the barrier but do not block. The caller should ensure that it + // decrements/passes it eventually. + void IncrementNoWait(Thread* self) REQUIRES(!GetLock()); // Decrement the count, then wait until the count is zero. void Wait(Thread* self) REQUIRES(!GetLock()); diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc index dd29c7f5b6..938dcfaebe 100644 --- a/runtime/base/gc_visited_arena_pool.cc +++ b/runtime/base/gc_visited_arena_pool.cc @@ -16,23 +16,16 @@ #include "base/gc_visited_arena_pool.h" -#include "base/arena_allocator-inl.h" -#include "base/utils.h" - #include <sys/mman.h> #include <sys/types.h> #include <unistd.h> -namespace art { +#include "base/arena_allocator-inl.h" +#include "base/memfd.h" +#include "base/utils.h" +#include "gc/collector/mark_compact-inl.h" -#if defined(__LP64__) -// Use a size in multiples of 1GB as that can utilize the optimized mremap -// page-table move. -static constexpr size_t kLinearAllocPoolSize = 1 * GB; -static constexpr size_t kLow4GBLinearAllocPoolSize = 32 * MB; -#else -static constexpr size_t kLinearAllocPoolSize = 32 * MB; -#endif +namespace art { TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_array_(nullptr) { static_assert(ArenaAllocator::kArenaAlignment <= kPageSize, @@ -48,7 +41,15 @@ TrackedArena::TrackedArena(uint8_t* start, size_t size) : Arena(), first_obj_arr void TrackedArena::Release() { if (bytes_allocated_ > 0) { - ZeroAndReleasePages(Begin(), Size()); + // Userfaultfd GC uses memfd mappings for linear-alloc and therefore + // MADV_DONTNEED will not free the pages from page cache. Therefore use + // MADV_REMOVE instead, which is meant for this purpose. + if (!gUseUserfaultfd || (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) { + // MADV_REMOVE fails if invoked on anonymous mapping, which could happen + // if the arena is released before userfaultfd-GC starts using memfd. So + // use MADV_DONTNEED. + ZeroAndReleasePages(Begin(), Size()); + } std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr); bytes_allocated_ = 0; } @@ -76,18 +77,36 @@ void GcVisitedArenaPool::AddMap(size_t min_size) { size = std::max(min_size, kLow4GBLinearAllocPoolSize); } #endif + Runtime* runtime = Runtime::Current(); + gc::collector::MarkCompact* mark_compact = runtime->GetHeap()->MarkCompactCollector(); std::string err_msg; - maps_.emplace_back(MemMap::MapAnonymous(name_, - size, - PROT_READ | PROT_WRITE, - low_4gb_, - &err_msg)); + bool mapped_shared; + // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature. + if (gUseUserfaultfd && !runtime->IsZygote() && mark_compact->IsUffdMinorFaultSupported()) { + maps_.emplace_back(MemMap::MapFile(size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_SHARED, + -1, + /*start=*/0, + low_4gb_, + name_, + &err_msg)); + mapped_shared = true; + } else { + maps_.emplace_back( + MemMap::MapAnonymous(name_, size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg)); + mapped_shared = false; + } + MemMap& map = maps_.back(); if (!map.IsValid()) { - LOG(FATAL) << "Failed to allocate " << name_ - << ": " << err_msg; + LOG(FATAL) << "Failed to allocate " << name_ << ": " << err_msg; UNREACHABLE(); } + if (gUseUserfaultfd) { + // Create a shadow-map for the map being added for userfaultfd GC + mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size(), mapped_shared); + } Chunk* chunk = new Chunk(map.Begin(), map.Size()); best_fit_allocs_.insert(chunk); free_chunks_.insert(chunk); @@ -251,4 +270,3 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) { } } // namespace art - diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h index 7dc79afcce..7a5f334f84 100644 --- a/runtime/base/gc_visited_arena_pool.h +++ b/runtime/base/gc_visited_arena_pool.h @@ -32,6 +32,8 @@ namespace art { // An Arena which tracks its allocations. class TrackedArena final : public Arena { public: + // Used for searching in maps. Only arena's starting address is relevant. + explicit TrackedArena(uint8_t* addr) { memory_ = addr; } TrackedArena(uint8_t* start, size_t size); template <typename PageVisitor> @@ -45,6 +47,28 @@ class TrackedArena final : public Arena { } } + // Return the page addr of the first page with first_obj set to nullptr. + uint8_t* GetLastUsedByte() const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_ALIGNED(Begin(), kPageSize); + DCHECK_ALIGNED(End(), kPageSize); + // Jump past bytes-allocated for arenas which are not currently being used + // by arena-allocator. This helps in reducing loop iterations below. + uint8_t* last_byte = AlignUp(Begin() + GetBytesAllocated(), kPageSize); + DCHECK_LE(last_byte, End()); + for (size_t i = (last_byte - Begin()) / kPageSize; + last_byte < End() && first_obj_array_[i] != nullptr; + last_byte += kPageSize, i++) { + // No body. + } + return last_byte; + } + + uint8_t* GetFirstObject(uint8_t* addr) const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_LE(Begin(), addr); + DCHECK_GT(End(), addr); + return first_obj_array_[(addr - Begin()) / kPageSize]; + } + // Set 'obj_begin' in first_obj_array_ in every element for which it's the // first object. void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end); @@ -62,6 +86,15 @@ class TrackedArena final : public Arena { // range to avoid multiple calls to mremapped/mprotected syscalls. class GcVisitedArenaPool final : public ArenaPool { public: +#if defined(__LP64__) + // Use a size in multiples of 1GB as that can utilize the optimized mremap + // page-table move. + static constexpr size_t kLinearAllocPoolSize = 1 * GB; + static constexpr size_t kLow4GBLinearAllocPoolSize = 32 * MB; +#else + static constexpr size_t kLinearAllocPoolSize = 32 * MB; +#endif + explicit GcVisitedArenaPool(bool low_4gb = false, const char* name = "LinearAlloc"); virtual ~GcVisitedArenaPool(); Arena* AllocArena(size_t size) override; @@ -79,6 +112,14 @@ class GcVisitedArenaPool final : public ArenaPool { } } + template <typename Callback> + void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_) { + std::lock_guard<std::mutex> lock(lock_); + for (auto& arena : allocated_arenas_) { + cb(arena); + } + } + private: void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_); // Add a map to the pool of at least min_size @@ -102,9 +143,8 @@ class GcVisitedArenaPool final : public ArenaPool { public: // Since two chunks could have the same size, use addr when that happens. bool operator()(const Chunk* a, const Chunk* b) const { - return std::less<size_t>{}(a->size_, b->size_) - || (std::equal_to<size_t>{}(a->size_, b->size_) - && std::less<uint8_t*>{}(a->addr_, b->addr_)); + return a->size_ < b->size_ || + (a->size_ == b->size_ && std::less<uint8_t*>{}(a->addr_, b->addr_)); } }; @@ -123,9 +163,7 @@ class GcVisitedArenaPool final : public ArenaPool { std::set<Chunk*, LessByChunkAddr> free_chunks_ GUARDED_BY(lock_); // Set of allocated arenas. It's required to be able to find the arena // corresponding to a given address. - // TODO: We can manage without this set if we decide to have a large - // 'first-object' array for the entire space, instead of per arena. Analyse - // which approach is better. + // TODO: consider using HashSet, which is more memory efficient. std::set<TrackedArena, LessByArenaAddr> allocated_arenas_ GUARDED_BY(lock_); // Number of bytes allocated so far. size_t bytes_allocated_ GUARDED_BY(lock_); diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 7a68863595..dc2ccb40f5 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -3515,7 +3515,7 @@ static void LinkCode(ClassLinker* class_linker, } // Method shouldn't have already been linked. - DCHECK(method->GetEntryPointFromQuickCompiledCode() == nullptr); + DCHECK_EQ(method->GetEntryPointFromQuickCompiledCode(), nullptr); DCHECK(!method->GetDeclaringClass()->IsVisiblyInitialized()); // Actually ClassStatus::Idx. if (!method->IsInvokable()) { diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 71e5a13638..4dfba3cc82 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -14,10 +14,11 @@ * limitations under the License. */ -#include "mark_compact-inl.h" +#include <fcntl.h> #include "android-base/file.h" #include "android-base/properties.h" +#include "base/memfd.h" #include "base/quasi_atomic.h" #include "base/systrace.h" #include "base/utils.h" @@ -28,16 +29,21 @@ #include "gc/task_processor.h" #include "gc/verification-inl.h" #include "jit/jit_code_cache.h" +#include "mark_compact-inl.h" #include "mirror/object-refvisitor-inl.h" #include "read_barrier_config.h" #include "scoped_thread_state_change-inl.h" #include "sigchain.h" #include "thread_list.h" - +// Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without. +#if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE) +#include <linux/falloc.h> +#endif #include <linux/userfaultfd.h> #include <poll.h> #include <sys/ioctl.h> #include <sys/mman.h> +#include <sys/resource.h> #include <unistd.h> #include <fstream> @@ -47,6 +53,9 @@ #ifndef MREMAP_DONTUNMAP #define MREMAP_DONTUNMAP 4 #endif +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif #ifndef __NR_userfaultfd #if defined(__x86_64__) #define __NR_userfaultfd 323 @@ -70,8 +79,6 @@ using ::android::base::GetBoolProperty; namespace art { -// We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was -// introduced in 5.13 kernel version. static bool HaveMremapDontunmap() { void* old = mmap(nullptr, kPageSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); CHECK_NE(old, MAP_FAILED); @@ -84,14 +91,9 @@ static bool HaveMremapDontunmap() { return false; } } +// We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was +// introduced in 5.13 kernel version. But it was backported to GKI kernels. static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap(); - -// Concurrent compaction termination logic depends on the kernel having -// the fault-retry feature (allowing repeated faults on the same page), which was -// introduced in 5.7. On Android this feature is backported on all the kernels where -// userfaultfd is enabled. -static const bool gKernelHasFaultRetry = kIsTargetAndroid || IsKernelVersionAtLeast(5, 7); - // The other cases are defined as constexpr in runtime/read_barrier_config.h #if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER) // Returns collector type asked to be used on the cmdline. @@ -114,19 +116,20 @@ static bool SysPropSaysUffdGc() { } static bool KernelSupportsUffd() { - int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY); - // On non-android devices we may not have the kernel patches that restrict - // userfaultfd to user mode. But that is not a security concern as we are - // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY. - if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) { - fd = syscall(__NR_userfaultfd, O_CLOEXEC); - } - if (fd >= 0) { - close(fd); - return true; - } else { - return false; + if (gHaveMremapDontunmap) { + int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY); + // On non-android devices we may not have the kernel patches that restrict + // userfaultfd to user mode. But that is not a security concern as we are + // on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY. + if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) { + fd = syscall(__NR_userfaultfd, O_CLOEXEC); + } + if (fd >= 0) { + close(fd); + return true; + } } + return false; } static bool ShouldUseUserfaultfd() { @@ -157,9 +160,17 @@ namespace collector { // significantly. static constexpr bool kCheckLocks = kDebugLocking; static constexpr bool kVerifyRootsMarked = kIsDebugBuild; +// Two threads should suffice on devices. +static constexpr size_t kMaxNumUffdWorkers = 2; +// Concurrent compaction termination logic works if the kernel has the fault-retry feature +// (allowing repeated faults on the same page), which was introduced in 5.7. +// Otherwise, kernel only retries pagefaults once, therefore having 2 or less +// workers will also suffice as the termination logic requires (n-1) pagefault +// retries. +static const bool gKernelHasFaultRetry = kMaxNumUffdWorkers <= 2 || IsKernelVersionAtLeast(5, 7); bool MarkCompact::CreateUserfaultfd(bool post_fork) { - if (post_fork || uffd_ == -1) { + if (post_fork || uffd_ == kFdUnused) { // Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't // any read event available. We don't use poll. if (gKernelHasFaultRetry) { @@ -175,11 +186,18 @@ bool MarkCompact::CreateUserfaultfd(bool post_fork) { LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno) << ") and therefore falling back to stop-the-world compaction."; } else { - DCHECK_GE(uffd_, 0); + DCHECK(IsValidFd(uffd_)); // Get/update the features that we want in userfaultfd - struct uffdio_api api = {.api = UFFD_API, .features = 0}; + struct uffdio_api api = {.api = UFFD_API, + .features = UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM}; CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0) << "ioctl_userfaultfd: API: " << strerror(errno); + // Missing userfaults on shmem should always be available. + DCHECK_NE(api.features & UFFD_FEATURE_MISSING_SHMEM, 0u); + uffd_minor_fault_supported_ = + gHaveMremapDontunmap && (api.features & UFFD_FEATURE_MINOR_SHMEM) != 0; + // TODO: Assert that minor-fault support isn't available only on 32-bit + // kernel. } } else { // Without fault-retry feature in the kernel we can't terminate concurrent @@ -188,7 +206,7 @@ bool MarkCompact::CreateUserfaultfd(bool post_fork) { } } uffd_initialized_ = !post_fork || uffd_ == kFallbackMode; - return uffd_ >= 0; + return IsValidFd(uffd_); } template <size_t kAlignment> @@ -199,14 +217,19 @@ MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignmen } MarkCompact::MarkCompact(Heap* heap) - : GarbageCollector(heap, "concurrent mark compact"), - gc_barrier_(0), - mark_stack_lock_("mark compact mark stack lock", kMarkSweepMarkStackLock), - bump_pointer_space_(heap->GetBumpPointerSpace()), - uffd_(-1), - thread_pool_counter_(0), - compacting_(false), - uffd_initialized_(false) { + : GarbageCollector(heap, "concurrent mark compact"), + gc_barrier_(0), + mark_stack_lock_("mark compact mark stack lock", kMarkSweepMarkStackLock), + bump_pointer_space_(heap->GetBumpPointerSpace()), + moving_to_space_fd_(kFdUnused), + moving_from_space_fd_(kFdUnused), + uffd_(kFdUnused), + thread_pool_counter_(0), + compaction_in_progress_count_(0), + compacting_(false), + uffd_initialized_(false), + uffd_minor_fault_supported_(false), + minor_fault_initialized_(false) { // TODO: Depending on how the bump-pointer space move is implemented. If we // switch between two virtual memories each time, then we will have to // initialize live_words_bitmap_ accordingly. @@ -229,7 +252,7 @@ MarkCompact::MarkCompact(Heap* heap) /*low_4gb=*/ false, &err_msg); if (UNLIKELY(!info_map_.IsValid())) { - LOG(ERROR) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg; + LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg; } else { uint8_t* p = info_map_.Begin(); chunk_info_vec_ = reinterpret_cast<uint32_t*>(p); @@ -245,36 +268,79 @@ MarkCompact::MarkCompact(Heap* heap) pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p); } + // NOTE: PROT_NONE is used here as these mappings are for address space reservation + // only and will be used only after appropriately remapping them. from_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact from-space", bump_pointer_space_->Capacity(), PROT_NONE, /*low_4gb=*/ kObjPtrPoisoning, &err_msg); if (UNLIKELY(!from_space_map_.IsValid())) { - LOG(ERROR) << "Failed to allocate concurrent mark-compact from-space" << err_msg; + LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg; } else { from_space_begin_ = from_space_map_.Begin(); } - // poisoning requires 32-bit pointers and therefore compaction buffers on - // the stack can't be used. We also use the first page-sized buffer for the - // purpose of terminating concurrent compaction. - const size_t num_pages = 1 + std::max(heap_->GetParallelGCThreadCount(), - heap_->GetConcGCThreadCount()); + // In some cases (32-bit or kObjPtrPoisoning) it's too much to ask for 3 + // heap-sized mappings in low-4GB. So tolerate failure here by attempting to + // mmap again right before the compaction pause. And if even that fails, then + // running the GC cycle in copy-mode rather than minor-fault. + // + // This map doesn't have to be aligned to 2MB as we don't mremap on it. + shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow", + bump_pointer_space_->Capacity(), + PROT_NONE, + /*low_4gb=*/kObjPtrPoisoning, + &err_msg); + if (!shadow_to_space_map_.IsValid()) { + LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg; + } + const size_t num_pages = 1 + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers); compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers", - kPageSize * (kObjPtrPoisoning ? num_pages : 1), + kPageSize * num_pages, PROT_READ | PROT_WRITE, - /*low_4gb=*/ kObjPtrPoisoning, + /*low_4gb=*/kObjPtrPoisoning, &err_msg); if (UNLIKELY(!compaction_buffers_map_.IsValid())) { - LOG(ERROR) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg; + LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg; } + // We also use the first page-sized buffer for the purpose of terminating concurrent compaction. conc_compaction_termination_page_ = compaction_buffers_map_.Begin(); - if (kObjPtrPoisoning) { - // Touch the page deliberately to avoid userfaults on it. We madvise it in - // CompactionPhase() before using it to terminate concurrent compaction. - CHECK_EQ(*conc_compaction_termination_page_, 0); - } + // Touch the page deliberately to avoid userfaults on it. We madvise it in + // CompactionPhase() before using it to terminate concurrent compaction. + CHECK_EQ(*conc_compaction_termination_page_, 0); + // In most of the cases, we don't expect more than one LinearAlloc space. + linear_alloc_spaces_data_.reserve(1); +} + +void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared) { + DCHECK_ALIGNED(begin, kPageSize); + DCHECK_ALIGNED(len, kPageSize); + std::string err_msg; + MemMap shadow(MemMap::MapAnonymous("linear-alloc shadow map", + len, + PROT_NONE, + /*low_4gb=*/false, + &err_msg)); + if (!shadow.IsValid()) { + LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg; + UNREACHABLE(); + } + + MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map", + len / kPageSize, + PROT_READ | PROT_WRITE, + /*low_4gb=*/false, + &err_msg)); + if (!page_status_map.IsValid()) { + LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg; + UNREACHABLE(); + } + linear_alloc_spaces_data_.emplace_back(std::forward<MemMap>(shadow), + std::forward<MemMap>(page_status_map), + begin, + begin + len, + already_shared); } void MarkCompact::BindAndResetBitmaps() { @@ -342,6 +408,9 @@ void MarkCompact::InitializePhase() { from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin(); black_allocations_begin_ = bump_pointer_space_->Limit(); compacting_ = false; + // TODO: Would it suffice to read it once in the constructor, which is called + // in zygote process? + pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); } void MarkCompact::RunPhases() { @@ -381,7 +450,7 @@ void MarkCompact::RunPhases() { heap_->ThreadFlipEnd(self); } - if (uffd_ >= 0) { + if (IsValidFd(uffd_)) { ReaderMutexLock mu(self, *Locks::mutator_lock_); CompactionPhase(); } @@ -544,26 +613,28 @@ void MarkCompact::InitNonMovingSpaceFirstObjects() { non_moving_first_objs_count_ = page_idx; } +bool MarkCompact::CanCompactMovingSpaceWithMinorFault() { + size_t min_size = (moving_first_objs_count_ + black_page_count_) * kPageSize; + return minor_fault_initialized_ && shadow_to_space_map_.IsValid() && + shadow_to_space_map_.Size() >= min_size; +} + class MarkCompact::ConcurrentCompactionGcTask : public SelfDeletingTask { public: explicit ConcurrentCompactionGcTask(MarkCompact* collector, size_t idx) : collector_(collector), index_(idx) {} -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wframe-larger-than=" void Run(Thread* self ATTRIBUTE_UNUSED) override REQUIRES_SHARED(Locks::mutator_lock_) { - // The passed page/buf to ConcurrentCompaction is used by the thread as a - // kPageSize buffer for compacting and updating objects into and then - // passing the buf to uffd ioctls. - if (kObjPtrPoisoning) { - uint8_t* page = collector_->compaction_buffers_map_.Begin() + index_ * kPageSize; - collector_->ConcurrentCompaction(page); + if (collector_->CanCompactMovingSpaceWithMinorFault()) { + collector_->ConcurrentCompaction<MarkCompact::kMinorFaultMode>(/*buf=*/nullptr); } else { - uint8_t buf[kPageSize]; - collector_->ConcurrentCompaction(buf); + // The passed page/buf to ConcurrentCompaction is used by the thread as a + // kPageSize buffer for compacting and updating objects into and then + // passing the buf to uffd ioctls. + uint8_t* buf = collector_->compaction_buffers_map_.Begin() + index_ * kPageSize; + collector_->ConcurrentCompaction<MarkCompact::kCopyMode>(buf); } } -#pragma clang diagnostic pop private: MarkCompact* const collector_; @@ -635,6 +706,7 @@ void MarkCompact::PrepareForCompaction() { // The chunk-info vector entries for the post marking-pause allocations will be // also updated in the pre-compaction pause. + bool is_zygote = Runtime::Current()->IsZygote(); if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) { // Register the buffer that we use for terminating concurrent compaction struct uffdio_register uffd_register; @@ -643,6 +715,18 @@ void MarkCompact::PrepareForCompaction() { uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno); + + // uffd_minor_fault_supported_ would be set appropriately in + // CreateUserfaultfd() above. + if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) { + // A valid shadow-map for moving space is only possible if we + // were able to map it in the constructor. That also means that its size + // matches the moving-space. + CHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity()); + // Release the shadow map for moving-space if we don't support minor-fault + // as it's not required. + shadow_to_space_map_.Reset(); + } } // For zygote we create the thread pool each time before starting compaction, // and get rid of it when finished. This is expected to happen rarely as @@ -650,15 +734,191 @@ void MarkCompact::PrepareForCompaction() { if (uffd_ != kFallbackMode) { ThreadPool* pool = heap_->GetThreadPool(); if (UNLIKELY(pool == nullptr)) { - heap_->CreateThreadPool(); + // On devices with 2 cores, GetParallelGCThreadCount() will return 1, + // which is desired number of workers on such devices. + heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); pool = heap_->GetThreadPool(); } - const size_t num_threads = pool->GetThreadCount(); + size_t num_threads = pool->GetThreadCount(); thread_pool_counter_ = num_threads; for (size_t i = 0; i < num_threads; i++) { pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); } CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); + + /* + * Possible scenarios for mappings: + * A) All zygote GCs (or if minor-fault feature isn't available): uses + * uffd's copy mode + * 1) For moving-space ('to' space is same as the moving-space): + * a) Private-anonymous mappings for 'to' and 'from' space are created in + * the constructor. + * b) In the compaction pause, we mremap(dontunmap) from 'to' space to + * 'from' space. This results in moving all pages to 'from' space and + * emptying the 'to' space, thereby preparing it for userfaultfd + * registration. + * + * 2) For linear-alloc space: + * a) Private-anonymous mappings for the linear-alloc and its 'shadow' + * are created by the arena-pool. + * b) In the compaction pause, we mremap(dontumap) with similar effect as + * (A.1.b) above. + * + * B) First GC after zygote: uses uffd's copy-mode + * 1) For moving-space: + * a) If the mmap for shadow-map has been successful in the constructor, + * then we remap it (mmap with MAP_FIXED) to get a shared-anonymous + * mapping. + * b) Else, we create two memfd and ftruncate them to the moving-space + * size. + * c) Same as (A.1.b) + * d) If (B.1.a), then mremap(dontunmap) from shadow-map to + * 'to' space. This will make both of them map to the same pages + * e) If (B.1.b), then mmap with the first memfd in shared mode on the + * 'to' space. + * f) At the end of compaction, we will have moved the moving-space + * objects to a MAP_SHARED mapping, readying it for minor-fault from next + * GC cycle. + * + * 2) For linear-alloc space: + * a) Same as (A.2.b) + * b) mmap a shared-anonymous mapping onto the linear-alloc space. + * c) Same as (B.1.f) + * + * C) All subsequent GCs: preferable minor-fault mode. But may also require + * using copy-mode. + * 1) For moving-space: + * a) If the shadow-map is created and no memfd was used, then that means + * we are using shared-anonymous. Therefore, mmap a shared-anonymous on + * the shadow-space. + * b) If the shadow-map is not mapped yet, then mmap one with a size + * big enough to hold the compacted moving space. This may fail, in which + * case we will use uffd's copy-mode. + * c) If (b) is successful, then mmap the free memfd onto shadow-map. + * d) Same as (A.1.b) + * e) In compaction pause, if the shadow-map was not created, then use + * copy-mode. + * f) Else, if the created map is smaller than the required-size, then + * use mremap (without dontunmap) to expand the size. If failed, then use + * copy-mode. + * g) Otherwise, same as (B.1.d) and use minor-fault mode. + * + * 2) For linear-alloc space: + * a) Same as (A.2.b) + * b) Use minor-fault mode + */ + auto mmap_shadow_map = [this](int flags, int fd) { + void* ret = mmap(shadow_to_space_map_.Begin(), + shadow_to_space_map_.Size(), + PROT_READ | PROT_WRITE, + flags, + fd, + /*offset=*/0); + DCHECK_NE(ret, MAP_FAILED) << "mmap for moving-space shadow failed:" << strerror(errno); + }; + // Setup all the virtual memory ranges required for concurrent compaction. + if (minor_fault_initialized_) { + DCHECK(!is_zygote); + if (UNLIKELY(!shadow_to_space_map_.IsValid())) { + // This case happens only once on the first GC in minor-fault mode, if + // we were unable to reserve shadow-map for moving-space in the + // beginning. + DCHECK_GE(moving_to_space_fd_, 0); + // Take extra 4MB to reduce the likelihood of requiring resizing this + // map in the pause due to black allocations. + size_t reqd_size = std::min(moving_first_objs_count_ * kPageSize + 4 * MB, + bump_pointer_space_->Capacity()); + // We cannot support memory-tool with shadow-map (as it requires + // appending a redzone) in this case because the mapping may have to be expanded + // using mremap (in KernelPreparation()), which would ignore the redzone. + // MemMap::MapFile() appends a redzone, but MemMap::MapAnonymous() doesn't. + std::string err_msg; + shadow_to_space_map_ = MemMap::MapAnonymous("moving-space-shadow", + reqd_size, + PROT_NONE, + /*low_4gb=*/kObjPtrPoisoning, + &err_msg); + + if (shadow_to_space_map_.IsValid()) { + CHECK(!kMemoryToolAddsRedzones || shadow_to_space_map_.GetRedzoneSize() == 0u); + // We want to use MemMap to get low-4GB mapping, if required, but then also + // want to have its ownership as we may grow it (in + // KernelPreparation()). If the ownership is not taken and we try to + // resize MemMap, then it unmaps the virtual range. + MemMap temp = shadow_to_space_map_.TakeReservedMemory(shadow_to_space_map_.Size(), + /*reuse*/ true); + std::swap(temp, shadow_to_space_map_); + DCHECK(!temp.IsValid()); + } else { + LOG(WARNING) << "Failed to create moving space's shadow map of " << PrettySize(reqd_size) + << " size. " << err_msg; + } + } + + if (LIKELY(shadow_to_space_map_.IsValid())) { + int fd = moving_to_space_fd_; + int mmap_flags = MAP_SHARED | MAP_FIXED; + if (fd == kFdUnused) { + // Unused moving-to-space fd means we are using anonymous shared + // mapping. + DCHECK_EQ(shadow_to_space_map_.Size(), bump_pointer_space_->Capacity()); + mmap_flags |= MAP_ANONYMOUS; + fd = -1; + } + // If the map is smaller than required, then we'll do mremap in the + // compaction pause to increase the size. + mmap_shadow_map(mmap_flags, fd); + } + + for (auto& data : linear_alloc_spaces_data_) { + DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_READ | PROT_WRITE), 0) + << "mprotect failed: " << strerror(errno); + } + } else if (!is_zygote && uffd_minor_fault_supported_) { + // First GC after zygote-fork. We will still use uffd's copy mode but will + // use it to move objects to MAP_SHARED (to prepare for subsequent GCs, which + // will use uffd's minor-fault feature). + if (shadow_to_space_map_.IsValid() && + shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { + mmap_shadow_map(MAP_SHARED | MAP_FIXED | MAP_ANONYMOUS, /*fd=*/-1); + } else { + size_t size = bump_pointer_space_->Capacity(); + DCHECK_EQ(moving_to_space_fd_, kFdUnused); + DCHECK_EQ(moving_from_space_fd_, kFdUnused); + const char* name = bump_pointer_space_->GetName(); + moving_to_space_fd_ = memfd_create(name, MFD_CLOEXEC); + CHECK_NE(moving_to_space_fd_, -1) + << "memfd_create: failed for " << name << ": " << strerror(errno); + moving_from_space_fd_ = memfd_create(name, MFD_CLOEXEC); + CHECK_NE(moving_from_space_fd_, -1) + << "memfd_create: failed for " << name << ": " << strerror(errno); + + // memfds are considered as files from resource limits point of view. + // And the moving space could be several hundred MBs. So increase the + // limit, if it's lower than moving-space size. + bool rlimit_changed = false; + rlimit rlim_read; + CHECK_EQ(getrlimit(RLIMIT_FSIZE, &rlim_read), 0) << "getrlimit failed: " << strerror(errno); + if (rlim_read.rlim_cur < size) { + rlimit_changed = true; + rlimit rlim = rlim_read; + rlim.rlim_cur = size; + CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim), 0) << "setrlimit failed: " << strerror(errno); + } + + // moving-space will map this fd so that we compact objects into it. + int ret = ftruncate(moving_to_space_fd_, size); + CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno); + ret = ftruncate(moving_from_space_fd_, size); + CHECK_EQ(ret, 0) << "ftruncate failed for moving-space:" << strerror(errno); + + if (rlimit_changed) { + // reset the rlimit to the original limits. + CHECK_EQ(setrlimit(RLIMIT_FSIZE, &rlim_read), 0) + << "setrlimit failed: " << strerror(errno); + } + } + } } } @@ -941,7 +1201,10 @@ void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const { } } -void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr) { +void MarkCompact::CompactPage(mirror::Object* obj, + uint32_t offset, + uint8_t* addr, + bool needs_memset_zero) { DCHECK(moving_space_bitmap_->Test(obj) && live_words_bitmap_->Test(obj)); DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj @@ -1084,7 +1347,7 @@ void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* add } // The last page that we compact may have some bytes left untouched in the // end, we should zero them as the kernel copies at page granularity. - if (UNLIKELY(bytes_done < kPageSize)) { + if (needs_memset_zero && UNLIKELY(bytes_done < kPageSize)) { std::memset(addr + bytes_done, 0x0, kPageSize - bytes_done); } } @@ -1097,7 +1360,8 @@ void MarkCompact::CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* add void MarkCompact::SlideBlackPage(mirror::Object* first_obj, const size_t page_idx, uint8_t* const pre_compact_page, - uint8_t* dest) { + uint8_t* dest, + bool needs_memset_zero) { DCHECK(IsAligned<kPageSize>(pre_compact_page)); size_t bytes_copied; const uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx]; @@ -1119,7 +1383,9 @@ void MarkCompact::SlideBlackPage(mirror::Object* first_obj, if (pre_compact_addr > pre_compact_page) { bytes_copied = pre_compact_addr - pre_compact_page; DCHECK_LT(bytes_copied, kPageSize); - std::memset(dest, 0x0, bytes_copied); + if (needs_memset_zero) { + std::memset(dest, 0x0, bytes_copied); + } dest += bytes_copied; } else { bytes_copied = 0; @@ -1230,8 +1496,10 @@ void MarkCompact::SlideBlackPage(mirror::Object* first_obj, }); size_t remaining_bytes = kPageSize - bytes_copied; if (found_obj == nullptr) { - // No more black objects in this page. Zero the remaining bytes and return. - std::memset(dest, 0x0, remaining_bytes); + if (needs_memset_zero) { + // No more black objects in this page. Zero the remaining bytes and return. + std::memset(dest, 0x0, remaining_bytes); + } return; } // Copy everything in this page, which includes any zeroed regions @@ -1271,20 +1539,100 @@ void MarkCompact::SlideBlackPage(mirror::Object* first_obj, } } -template <bool kFallback> -void MarkCompact::CompactMovingSpace(uint8_t* page) { - // For every page we have a starting object, which may have started in some - // preceding page, and an offset within that object from where we must start - // copying. - // Consult the live-words bitmap to copy all contiguously live words at a - // time. These words may constitute multiple objects. To avoid the need for - // consulting mark-bitmap to find where does the next live object start, we - // use the object-size returned by VisitRefsForCompaction. - // - // TODO: Should we do this in reverse? If the probability of accessing an object - // is inversely proportional to the object's age, then it may make sense. - TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); - uint8_t* to_space = bump_pointer_space_->Begin(); +template <bool kFirstPageMapping> +void MarkCompact::MapProcessedPages(uint8_t* to_space_start, + Atomic<PageState>* state_arr, + size_t arr_idx, + size_t arr_len) { + DCHECK(minor_fault_initialized_); + DCHECK_LT(arr_idx, arr_len); + DCHECK_ALIGNED(to_space_start, kPageSize); + // Claim all the contiguous pages, which are ready to be mapped, and then do + // so in a single ioctl. This helps avoid the overhead of invoking syscall + // several times and also maps the already-processed pages, avoiding + // unnecessary faults on them. + size_t length = kFirstPageMapping ? kPageSize : 0; + if (kFirstPageMapping) { + arr_idx++; + } + // We need to guarantee that we don't end up sucsessfully marking a later + // page 'mapping' and then fail to mark an earlier page. To guarantee that + // we use acq_rel order. + for (; arr_idx < arr_len; arr_idx++, length += kPageSize) { + PageState expected_state = PageState::kProcessed; + if (!state_arr[arr_idx].compare_exchange_strong( + expected_state, PageState::kProcessedAndMapping, std::memory_order_acq_rel)) { + break; + } + } + if (length > 0) { + // Note: We need the first page to be attempted (to be mapped) by the ioctl + // as this function is called due to some mutator thread waiting on the + // 'to_space_start' page. Therefore, the ioctl must always be called + // with 'to_space_start' as the 'start' address because it can bail out in + // the middle (not attempting to map the subsequent pages) if it finds any + // page either already mapped in between, or missing on the shadow-map. + struct uffdio_continue uffd_continue; + uffd_continue.range.start = reinterpret_cast<uintptr_t>(to_space_start); + uffd_continue.range.len = length; + uffd_continue.mode = 0; + int ret = ioctl(uffd_, UFFDIO_CONTINUE, &uffd_continue); + if (UNLIKELY(ret == -1 && errno == EAGAIN)) { + // This can happen only in linear-alloc. + DCHECK(linear_alloc_spaces_data_.end() != + std::find_if(linear_alloc_spaces_data_.begin(), + linear_alloc_spaces_data_.end(), + [to_space_start](const LinearAllocSpaceData& data) { + return data.begin_ <= to_space_start && to_space_start < data.end_; + })); + + // This could happen if userfaultfd couldn't find any pages mapped in the + // shadow map. For instance, if there are certain (contiguous) pages on + // linear-alloc which are allocated and have first-object set-up but have + // not been accessed yet. + // Bail out by setting the remaining pages' state back to kProcessed and + // then waking up any waiting threads. + DCHECK_GE(uffd_continue.mapped, 0); + DCHECK_ALIGNED(uffd_continue.mapped, kPageSize); + DCHECK_LT(uffd_continue.mapped, static_cast<ssize_t>(length)); + if (kFirstPageMapping) { + // In this case the first page must be mapped. + DCHECK_GE(uffd_continue.mapped, static_cast<ssize_t>(kPageSize)); + } + // Nobody would modify these pages' state simultaneously so only atomic + // store is sufficient. Use 'release' order to ensure that all states are + // modified sequentially. + for (size_t remaining_len = length - uffd_continue.mapped; remaining_len > 0; + remaining_len -= kPageSize) { + arr_idx--; + DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed), + PageState::kProcessedAndMapping); + state_arr[arr_idx].store(PageState::kProcessed, std::memory_order_release); + } + uffd_continue.range.start = + reinterpret_cast<uintptr_t>(to_space_start) + uffd_continue.mapped; + uffd_continue.range.len = length - uffd_continue.mapped; + ret = ioctl(uffd_, UFFDIO_WAKE, &uffd_continue.range); + CHECK_EQ(ret, 0) << "ioctl_userfaultfd: wake failed: " << strerror(errno); + } else { + // We may receive ENOENT if gc-thread unregisters the + // range behind our back, which is fine because that + // happens only when it knows compaction is done. + CHECK(ret == 0 || !kFirstPageMapping || errno == ENOENT) + << "ioctl_userfaultfd: continue failed: " << strerror(errno); + if (ret == 0) { + DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length)); + } + } + } +} + +template <int kMode, typename CompactionFn> +void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, + size_t status_arr_len, + uint8_t* to_space_page, + uint8_t* page, + CompactionFn func) { auto copy_ioctl = [this] (void* dst, void* buffer) { struct uffdio_copy uffd_copy; uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); @@ -1292,50 +1640,111 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) { uffd_copy.len = kPageSize; uffd_copy.mode = 0; CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl: copy " << strerror(errno); + << "ioctl_userfaultfd: copy failed: " << strerror(errno) + << ". src:" << buffer << " dst:" << dst; DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); }; - size_t idx = 0; - while (idx < moving_first_objs_count_) { - // Relaxed memory-order is used as the subsequent ioctl syscall will act as a fence. - // In the concurrent case (!kFallback) we need to ensure that the update to - // moving_spaces_status_[idx] is released before the contents of the page. - if (kFallback - || moving_pages_status_[idx].exchange(PageState::kCompacting, std::memory_order_relaxed) - == PageState::kUncompacted) { - CompactPage(first_objs_moving_space_[idx].AsMirrorPtr(), - pre_compact_offset_moving_space_[idx], - kFallback ? to_space : page); - if (!kFallback) { - copy_ioctl(to_space, page); + PageState expected_state = PageState::kUnprocessed; + PageState desired_state = + kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing; + // In the concurrent case (kMode != kFallbackMode) we need to ensure that the update + // to moving_spaces_status_[page_idx] is released before the contents of the page are + // made accessible to other threads. + // + // In minor-fault case, we need acquire ordering here to ensure that when the + // CAS fails, another thread has completed processing the page, which is guaranteed + // by the release below. + // Relaxed memory-order is used in copy mode as the subsequent ioctl syscall acts as a fence. + std::memory_order order = + kMode == kCopyMode ? std::memory_order_relaxed : std::memory_order_acquire; + if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, desired_state, order)) { + func(); + if (kMode == kCopyMode) { + copy_ioctl(to_space_page, page); + } else if (kMode == kMinorFaultMode) { + expected_state = PageState::kProcessing; + desired_state = PageState::kProcessed; + // the CAS needs to be with release order to ensure that stores to the + // page makes it to memory *before* other threads observe that it's + // ready to be mapped. + if (!moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_release)) { + // Some mutator has requested to map the page after processing it. + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + MapProcessedPages</*kFirstPageMapping=*/true>( + to_space_page, moving_pages_status_, page_idx, status_arr_len); } } - to_space += kPageSize; - idx++; + } else { + DCHECK_GT(expected_state, PageState::kProcessed); + } +} + +template <int kMode> +void MarkCompact::CompactMovingSpace(uint8_t* page) { + // For every page we have a starting object, which may have started in some + // preceding page, and an offset within that object from where we must start + // copying. + // Consult the live-words bitmap to copy all contiguously live words at a + // time. These words may constitute multiple objects. To avoid the need for + // consulting mark-bitmap to find where does the next live object start, we + // use the object-size returned by VisitRefsForCompaction. + // + // We do the compaction in reverse direction so that the pages containing + // TLAB and latest allocations are processed first. + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_; + size_t idx = page_status_arr_len; + uint8_t* to_space_end = bump_pointer_space_->Begin() + page_status_arr_len * kPageSize; + uint8_t* shadow_space_end = nullptr; + if (kMode == kMinorFaultMode) { + shadow_space_end = shadow_to_space_map_.Begin() + page_status_arr_len * kPageSize; } // Allocated-black pages - size_t count = moving_first_objs_count_ + black_page_count_; - uint8_t* pre_compact_page = black_allocations_begin_; + uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * kPageSize); + DCHECK(IsAligned<kPageSize>(pre_compact_page)); - while (idx < count) { + while (idx > moving_first_objs_count_) { + idx--; + pre_compact_page -= kPageSize; + to_space_end -= kPageSize; + if (kMode == kMinorFaultMode) { + shadow_space_end -= kPageSize; + page = shadow_space_end; + } else if (kMode == kFallbackMode) { + page = to_space_end; + } mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); - if (first_obj != nullptr - && (kFallback - || moving_pages_status_[idx].exchange(PageState::kCompacting, std::memory_order_relaxed) - == PageState::kUncompacted)) { - DCHECK_GT(black_alloc_pages_first_chunk_size_[idx], 0u); - SlideBlackPage(first_obj, - idx, - pre_compact_page, - kFallback ? to_space : page); - if (!kFallback) { - copy_ioctl(to_space, page); - } + if (first_obj != nullptr) { + DoPageCompactionWithStateChange<kMode>( + idx, + page_status_arr_len, + to_space_end, + page, + [&]() REQUIRES_SHARED(Locks::mutator_lock_) { + SlideBlackPage(first_obj, idx, pre_compact_page, page, kMode == kCopyMode); + }); } - pre_compact_page += kPageSize; - to_space += kPageSize; - idx++; } + DCHECK_EQ(pre_compact_page, black_allocations_begin_); + + while (idx > 0) { + idx--; + to_space_end -= kPageSize; + if (kMode == kMinorFaultMode) { + shadow_space_end -= kPageSize; + page = shadow_space_end; + } else if (kMode == kFallbackMode) { + page = to_space_end; + } + mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr(); + DoPageCompactionWithStateChange<kMode>( + idx, page_status_arr_len, to_space_end, page, [&]() REQUIRES_SHARED(Locks::mutator_lock_) { + CompactPage(first_obj, pre_compact_offset_moving_space_[idx], page, kMode == kCopyMode); + }); + } + DCHECK_EQ(to_space_end, bump_pointer_space_->Begin()); } void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { @@ -1572,11 +1981,9 @@ class MarkCompact::ImmuneSpaceUpdateObjVisitor { MarkCompact* const collector_; }; -class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { +class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor { public: - explicit NativeRootsUpdateVisitor(MarkCompact* collector) - : collector_(collector), - pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {} + explicit ClassLoaderRootsUpdater(MarkCompact* collector) : collector_(collector) {} void Visit(ObjPtr<mirror::ClassLoader> class_loader) override REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) { @@ -1586,8 +1993,28 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { } } - void operator()(uint8_t* page_begin, uint8_t* first_obj) - ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) { + collector_->VisitRoots(&root, 1, RootInfo(RootType::kRootVMInternal)); + } + + private: + MarkCompact* collector_; +}; + +class MarkCompact::LinearAllocPageUpdater { + public: + explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {} + + void operator()(uint8_t* page_begin, uint8_t* first_obj) const ALWAYS_INLINE + REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_ALIGNED(page_begin, kPageSize); uint8_t* page_end = page_begin + kPageSize; uint32_t obj_size; @@ -1595,9 +2022,9 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte); obj_size = header->GetSize(); LinearAllocKind kind = header->GetKind(); - if (obj_size == 0) { + if (UNLIKELY(obj_size == 0)) { // No more objects in this page to visit. - DCHECK_EQ(static_cast<uint32_t>(kind), 0u); + DCHECK_EQ(kind, LinearAllocKind::kNoGCRoots); break; } uint8_t* obj = byte + sizeof(TrackingHeader); @@ -1605,10 +2032,11 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { if (header->Is16Aligned()) { obj = AlignUp(obj, 16); } - if (UNLIKELY(obj >= page_end)) { - break; + uint8_t* begin_boundary = std::max(obj, page_begin); + uint8_t* end_boundary = std::min(obj_end, page_end); + if (begin_boundary < end_boundary) { + VisitObject(kind, obj, begin_boundary, end_boundary); } - VisitObject(kind, obj, std::max(obj, page_begin), std::min(obj_end, page_end)); if (ArenaAllocator::IsRunningOnMemoryTool()) { obj_size += ArenaAllocator::kMemoryToolRedZoneBytes; } @@ -1628,12 +2056,14 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { mirror::Object* old_ref = root->AsMirrorPtr(); DCHECK_NE(old_ref, nullptr); if (collector_->live_words_bitmap_->HasAddress(old_ref)) { + mirror::Object* new_ref = old_ref; if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) { - mirror::Object* new_ref = collector_->PostCompactBlackObjAddr(old_ref); - root->Assign(new_ref); + new_ref = collector_->PostCompactBlackObjAddr(old_ref); } else if (collector_->live_words_bitmap_->Test(old_ref)) { DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref; - mirror::Object* new_ref = collector_->PostCompactOldObjAddr(old_ref); + new_ref = collector_->PostCompactOldObjAddr(old_ref); + } + if (old_ref != new_ref) { root->Assign(new_ref); } } @@ -1643,9 +2073,10 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { void VisitObject(LinearAllocKind kind, void* obj, uint8_t* start_boundary, - uint8_t* end_boundary) - REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* end_boundary) const REQUIRES_SHARED(Locks::mutator_lock_) { switch (kind) { + case LinearAllocKind::kNoGCRoots: + break; case LinearAllocKind::kGCRootArray: { GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary); @@ -1661,17 +2092,13 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { // Old methods are clobbered in debug builds. Check size to confirm if the array // has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods() if (array->size() > 0) { - if (pointer_size_ == PointerSize::k64) { - ArtMethod::VisitArrayRoots<PointerSize::k64>(*this, - start_boundary, - end_boundary, - array); + if (collector_->pointer_size_ == PointerSize::k64) { + ArtMethod::VisitArrayRoots<PointerSize::k64>( + *this, start_boundary, end_boundary, array); } else { - DCHECK_EQ(pointer_size_, PointerSize::k32); - ArtMethod::VisitArrayRoots<PointerSize::k32>(*this, - start_boundary, - end_boundary, - array); + DCHECK_EQ(collector_->pointer_size_, PointerSize::k32); + ArtMethod::VisitArrayRoots<PointerSize::k32>( + *this, start_boundary, end_boundary, array); } } } @@ -1692,15 +2119,11 @@ class MarkCompact::NativeRootsUpdateVisitor : public ClassLoaderVisitor { mirror::DexCachePair<mirror::Object>* last = reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary); mirror::DexCache::VisitDexCachePairRoots(*this, first, last); - } - break; - case LinearAllocKind::kNoGCRoots: - break; + } } } MarkCompact* const collector_; - const PointerSize pointer_size_; }; void MarkCompact::PreCompactionPhase() { @@ -1744,7 +2167,8 @@ void MarkCompact::PreCompactionPhase() { if (kIsDebugBuild) { size_t len = moving_first_objs_count_ + black_page_count_; for (size_t i = 0; i < len; i++) { - CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed), PageState::kUncompacted); + CHECK_EQ(moving_pages_status_[i].load(std::memory_order_relaxed), + PageState::kUnprocessed); } } // Iterate over the allocation_stack_, for every object in the non-moving @@ -1774,19 +2198,28 @@ void MarkCompact::PreCompactionPhase() { } } { - TimingLogger::ScopedTiming t2("(Paused)UpdateNativeRoots", GetTimings()); - NativeRootsUpdateVisitor visitor(this); + TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings()); + ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); { - ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); - runtime->GetClassLinker()->VisitClassLoaders(&visitor); + ClassLoaderRootsUpdater updater(this); + runtime->GetClassLinker()->VisitClassLoaders(&updater); } - GcVisitedArenaPool *arena_pool = - static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); - arena_pool->VisitRoots(visitor); } - SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/true); - KernelPreparation(); + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); + if (uffd_ == kFallbackMode) { + LinearAllocPageUpdater updater(this); + arena_pool->VisitRoots(updater); + } else { + arena_pool->ForEachAllocatedArena( + [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* last_byte = arena.GetLastUsedByte(); + CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); + }); + } + + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true); { TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings()); @@ -1825,75 +2258,177 @@ void MarkCompact::PreCompactionPhase() { } } + KernelPreparation(); UpdateNonMovingSpace(); // fallback mode if (uffd_ == kFallbackMode) { - CompactMovingSpace</*kFallback*/true>(); + CompactMovingSpace<kFallbackMode>(nullptr); int32_t freed_bytes = black_objs_slide_diff_; bump_pointer_space_->RecordFree(freed_objects_, freed_bytes); RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); } else { + DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u); // We must start worker threads before resuming mutators to avoid deadlocks. heap_->GetThreadPool()->StartWorkers(thread_running_gc_); } stack_end_ = nullptr; } -void MarkCompact::KernelPreparation() { - TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); +void MarkCompact::KernelPrepareRange(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + size_t uffd_size, + int fd, + int uffd_mode, + uint8_t* shadow_addr) { // TODO: Create mapping's at 2MB aligned addresses to benefit from optimized // mremap. - size_t size = bump_pointer_space_->Capacity(); - uint8_t* begin = bump_pointer_space_->Begin(); - int flags = MREMAP_MAYMOVE | MREMAP_FIXED; + int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED; if (gHaveMremapDontunmap) { - flags |= MREMAP_DONTUNMAP; - } - - void* ret = mremap(begin, size, size, flags, from_space_begin_); - CHECK_EQ(ret, static_cast<void*>(from_space_begin_)) - << "mremap to move pages from moving space to from-space failed: " << strerror(errno) - << ". moving-space-addr=" << reinterpret_cast<void*>(begin) - << " size=" << size; - - // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap - // the moving space again. - if (!gHaveMremapDontunmap) { - ret = mmap(begin, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); - CHECK_EQ(ret, static_cast<void*>(begin)) << "mmap for moving space failed: " << strerror(errno); + mremap_flags |= MREMAP_DONTUNMAP; + } + + void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr); + CHECK_EQ(ret, static_cast<void*>(from_addr)) + << "mremap to move pages failed: " << strerror(errno) + << ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size); + + if (shadow_addr != nullptr) { + DCHECK_EQ(fd, kFdUnused); + DCHECK(gHaveMremapDontunmap); + ret = mremap(shadow_addr, map_size, map_size, mremap_flags, to_addr); + CHECK_EQ(ret, static_cast<void*>(to_addr)) + << "mremap from shadow to to-space map failed: " << strerror(errno); + } else if (!gHaveMremapDontunmap || fd > kFdUnused) { + // Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap + // the moving space again. + int mmap_flags = MAP_FIXED; + if (fd == kFdUnused) { + // Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr' + // mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't + // available, to avoid unmapping someone else' mapping and then causing + // crashes elsewhere. + mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; + // On some platforms MAP_ANONYMOUS expects fd to be -1. + fd = -1; + } else if (IsValidFd(fd)) { + mmap_flags |= MAP_SHARED; + } else { + DCHECK_EQ(fd, kFdSharedAnon); + mmap_flags |= MAP_SHARED | MAP_ANONYMOUS; + } + ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, fd, 0); + CHECK_EQ(ret, static_cast<void*>(to_addr)) + << "mmap for moving space failed: " << strerror(errno); } - - DCHECK_EQ(mprotect(from_space_begin_, size, PROT_READ), 0) - << "mprotect failed: " << strerror(errno); - - if (uffd_ >= 0) { + if (IsValidFd(uffd_)) { // Userfaultfd registration struct uffdio_register uffd_register; - uffd_register.range.start = reinterpret_cast<uintptr_t>(begin); - uffd_register.range.len = size; + uffd_register.range.start = reinterpret_cast<uintptr_t>(to_addr); + uffd_register.range.len = uffd_size; uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (uffd_mode == kMinorFaultMode) { + uffd_register.mode |= UFFDIO_REGISTER_MODE_MINOR; + } CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) - << "ioctl_userfaultfd: register moving-space: " << strerror(errno); + << "ioctl_userfaultfd: register failed: " << strerror(errno) + << ". start:" << static_cast<void*>(to_addr) << " len:" << PrettySize(uffd_size); } } -void MarkCompact::ConcurrentCompaction(uint8_t* page) { - struct uffd_msg msg; - uint8_t* unused_space_begin = bump_pointer_space_->Begin() - + (moving_first_objs_count_ + black_page_count_) * kPageSize; - DCHECK(IsAligned<kPageSize>(unused_space_begin)); - auto zeropage_ioctl = [this] (void* addr, bool tolerate_eexist) { - struct uffdio_zeropage uffd_zeropage; - DCHECK(IsAligned<kPageSize>(addr)); - uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); - uffd_zeropage.range.len = kPageSize; - uffd_zeropage.mode = 0; - int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); - CHECK(ret == 0 || (tolerate_eexist && ret == -1 && errno == EEXIST)) - << "ioctl: zeropage: " << strerror(errno); - DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); - }; +void MarkCompact::KernelPreparation() { + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + uint8_t* moving_space_begin = bump_pointer_space_->Begin(); + size_t moving_space_size = bump_pointer_space_->Capacity(); + int mode = kCopyMode; + size_t moving_space_register_sz; + if (minor_fault_initialized_) { + moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * kPageSize; + if (shadow_to_space_map_.IsValid()) { + size_t shadow_size = shadow_to_space_map_.Size(); + void* addr = shadow_to_space_map_.Begin(); + if (shadow_size < moving_space_register_sz) { + addr = mremap(addr, + shadow_size, + moving_space_register_sz, + // Don't allow moving with obj-ptr poisoning as the + // mapping needs to be in <4GB address space. + kObjPtrPoisoning ? 0 : MREMAP_MAYMOVE, + /*new_address=*/nullptr); + if (addr != MAP_FAILED) { + // Succeeded in expanding the mapping. Update the MemMap entry for shadow map. + MemMap temp = MemMap::MapPlaceholder( + "moving-space-shadow", static_cast<uint8_t*>(addr), moving_space_register_sz); + std::swap(shadow_to_space_map_, temp); + } + } + if (addr != MAP_FAILED) { + mode = kMinorFaultMode; + } else { + // We are not going to use shadow map. So protect it to catch any + // potential bugs. + DCHECK_EQ(mprotect(shadow_to_space_map_.Begin(), shadow_to_space_map_.Size(), PROT_NONE), 0) + << "mprotect failed: " << strerror(errno); + } + } + } else { + moving_space_register_sz = moving_space_size; + } + + bool map_shared = + minor_fault_initialized_ || (!Runtime::Current()->IsZygote() && uffd_minor_fault_supported_); + uint8_t* shadow_addr = nullptr; + if (moving_to_space_fd_ == kFdUnused && map_shared) { + DCHECK(gHaveMremapDontunmap); + DCHECK(shadow_to_space_map_.IsValid()); + DCHECK_EQ(shadow_to_space_map_.Size(), moving_space_size); + shadow_addr = shadow_to_space_map_.Begin(); + } + + KernelPrepareRange(moving_space_begin, + from_space_begin_, + moving_space_size, + moving_space_register_sz, + moving_to_space_fd_, + mode, + shadow_addr); + DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_READ), 0) + << "mprotect failed: " << strerror(errno); + + if (IsValidFd(uffd_)) { + for (auto& data : linear_alloc_spaces_data_) { + KernelPrepareRange(data.begin_, + data.shadow_.Begin(), + data.shadow_.Size(), + data.shadow_.Size(), + map_shared && !data.already_shared_ ? kFdSharedAnon : kFdUnused, + minor_fault_initialized_ ? kMinorFaultMode : kCopyMode); + if (map_shared) { + data.already_shared_ = true; + } + } + } +} + +template <int kMode> +void MarkCompact::ConcurrentCompaction(uint8_t* buf) { + DCHECK_NE(kMode, kFallbackMode); + DCHECK(kMode != kCopyMode || buf != nullptr); + auto zeropage_ioctl = [this](void* addr, bool tolerate_eexist, bool tolerate_enoent) { + struct uffdio_zeropage uffd_zeropage; + DCHECK(IsAligned<kPageSize>(addr)); + uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_zeropage.range.len = kPageSize; + uffd_zeropage.mode = 0; + int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); + if (LIKELY(ret == 0)) { + DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); + } else { + CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) + << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; + } + }; auto copy_ioctl = [this] (void* fault_page, void* src) { struct uffdio_copy uffd_copy; @@ -1901,12 +2436,14 @@ void MarkCompact::ConcurrentCompaction(uint8_t* page) { uffd_copy.dst = reinterpret_cast<uintptr_t>(fault_page); uffd_copy.len = kPageSize; uffd_copy.mode = 0; - CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl: copy: " << strerror(errno); + int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy); + CHECK_EQ(ret, 0) << "ioctl_userfaultfd: copy failed: " << strerror(errno) + << ". src:" << src << " fault_page:" << fault_page; DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); }; - + size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; while (true) { + struct uffd_msg msg; ssize_t nread = read(uffd_, &msg, sizeof(msg)); CHECK_GT(nread, 0); CHECK_EQ(msg.event, UFFD_EVENT_PAGEFAULT); @@ -1923,70 +2460,340 @@ void MarkCompact::ConcurrentCompaction(uint8_t* page) { // Only the last thread should map the zeropage so that the gc-thread can // proceed. if (ret == 1) { - zeropage_ioctl(fault_addr, /*tolerate_eexist*/ false); + zeropage_ioctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); } else { struct uffdio_range uffd_range; uffd_range.start = msg.arg.pagefault.address; uffd_range.len = kPageSize; CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &uffd_range), 0) - << "ioctl: wake: " << strerror(errno); + << "ioctl_userfaultfd: wake failed for concurrent-compaction termination page: " + << strerror(errno); } break; } - DCHECK(bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))); uint8_t* fault_page = AlignDown(fault_addr, kPageSize); - if (fault_addr >= unused_space_begin) { - // There is a race which allows more than one thread to install a - // zero-page. But we can tolerate that. So absorb the EEXIST returned by - // the ioctl and move on. - zeropage_ioctl(fault_page, /*tolerate_eexist*/ true); - continue; - } - size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize; - PageState state = moving_pages_status_[page_idx].load(std::memory_order_relaxed); - if (state == PageState::kUncompacted) { - // Relaxed memory-order is fine as the subsequent ioctl syscall guarantees - // status to be flushed before this thread attempts to copy/zeropage the - // fault_page. - state = moving_pages_status_[page_idx].exchange(PageState::kCompacting, - std::memory_order_relaxed); - } - if (state == PageState::kCompacting) { - // Somebody else took (or taking) care of the page, so nothing to do. - continue; - } - - if (fault_page < post_compact_end_) { - // The page has to be compacted. - CompactPage(first_objs_moving_space_[page_idx].AsMirrorPtr(), - pre_compact_offset_moving_space_[page_idx], - page); - copy_ioctl(fault_page, page); + if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) { + ConcurrentlyProcessMovingPage<kMode>( + zeropage_ioctl, copy_ioctl, fault_page, buf, nr_moving_space_used_pages); + } else if (minor_fault_initialized_) { + ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>( + zeropage_ioctl, + copy_ioctl, + fault_page, + (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); } else { - // The page either has to be slid, or if it's an empty page then a - // zeropage needs to be mapped. - mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr(); - if (first_obj != nullptr) { - DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u); - uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_); - DCHECK(IsAligned<kPageSize>(pre_compact_page)); - SlideBlackPage(first_obj, - page_idx, - pre_compact_page, - page); - copy_ioctl(fault_page, page); + ConcurrentlyProcessLinearAllocPage<kCopyMode>( + zeropage_ioctl, + copy_ioctl, + fault_page, + (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + } + } +} + +template <int kMode, typename ZeropageType, typename CopyType> +void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, + CopyType& copy_ioctl, + uint8_t* fault_page, + uint8_t* buf, + size_t nr_moving_space_used_pages) { + class ScopedInProgressCount { + public: + explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) { + collector_->compaction_in_progress_count_.fetch_add(1, std::memory_order_relaxed); + } + + ~ScopedInProgressCount() { + collector_->compaction_in_progress_count_.fetch_add(-1, std::memory_order_relaxed); + } + + private: + MarkCompact* collector_; + }; + + uint8_t* unused_space_begin = + bump_pointer_space_->Begin() + nr_moving_space_used_pages * kPageSize; + DCHECK(IsAligned<kPageSize>(unused_space_begin)); + DCHECK(kMode == kCopyMode || fault_page < unused_space_begin); + if (kMode == kCopyMode && fault_page >= unused_space_begin) { + // There is a race which allows more than one thread to install a + // zero-page. But we can tolerate that. So absorb the EEXIST returned by + // the ioctl and move on. + zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); + return; + } + size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize; + mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr(); + if (first_obj == nullptr) { + // We should never have a case where two workers are trying to install a + // zeropage in this range as we synchronize using moving_pages_status_[page_idx]. + PageState expected_state = PageState::kUnprocessed; + if (moving_pages_status_[page_idx].compare_exchange_strong( + expected_state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + zeropage_ioctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); + } else { + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + } + return; + } + + PageState state = moving_pages_status_[page_idx].load(std::memory_order_relaxed); + while (true) { + switch (state) { + case PageState::kUnprocessed: { + // The increment to the in-progress counter must be done before updating + // the page's state. Otherwise, we will end up leaving a window wherein + // the GC-thread could observe that no worker is working on compaction + // and could end up unregistering the moving space from userfaultfd. + ScopedInProgressCount in_progress(this); + // Acquire order to ensure we don't start writing to shadow map, which is + // shared, before the CAS is successful. Release order to ensure that the + // increment to moving_compactions_in_progress above is not re-ordered + // after the CAS. + if (moving_pages_status_[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { + if (kMode == kMinorFaultMode) { + DCHECK_EQ(buf, nullptr); + buf = shadow_to_space_map_.Begin() + page_idx * kPageSize; + } + + if (fault_page < post_compact_end_) { + // The page has to be compacted. + CompactPage( + first_obj, pre_compact_offset_moving_space_[page_idx], buf, kMode == kCopyMode); + } else { + DCHECK_NE(first_obj, nullptr); + DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u); + uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_); + DCHECK(IsAligned<kPageSize>(pre_compact_page)); + SlideBlackPage(first_obj, page_idx, pre_compact_page, buf, kMode == kCopyMode); + } + if (kMode == kCopyMode) { + copy_ioctl(fault_page, buf); + return; + } else { + break; + } + } + } + continue; + case PageState::kProcessing: + DCHECK_EQ(kMode, kMinorFaultMode); + if (moving_pages_status_[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + // Somebody else took or will take care of finishing the compaction and + // then mapping the page. + return; + } + continue; + case PageState::kProcessed: + // The page is processed but not mapped. We should map it. + break; + default: + // Somebody else took care of the page. + return; + } + break; + } + + DCHECK_EQ(kMode, kMinorFaultMode); + if (state == PageState::kUnprocessed) { + MapProcessedPages</*kFirstPageMapping=*/true>( + fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages); + } else { + DCHECK_EQ(state, PageState::kProcessed); + MapProcessedPages</*kFirstPageMapping=*/false>( + fault_page, moving_pages_status_, page_idx, nr_moving_space_used_pages); + } +} + +template <int kMode, typename ZeropageType, typename CopyType> +void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, + CopyType& copy_ioctl, + uint8_t* fault_page, + bool is_minor_fault) { + DCHECK(!is_minor_fault || kMode == kMinorFaultMode); + auto arena_iter = linear_alloc_arenas_.end(); + { + TrackedArena temp_arena(fault_page); + arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena); + arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter) + : linear_alloc_arenas_.end(); + } + if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { + // Fault page isn't in any of the arenas that existed before we started + // compaction. So map zeropage and return. + zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); + } else { + // fault_page should always belong to some arena. + DCHECK(arena_iter != linear_alloc_arenas_.end()) + << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault; + // Find the linear-alloc space containing fault-page + LinearAllocSpaceData* space_data = nullptr; + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= fault_page && fault_page < data.end_) { + space_data = &data; + break; + } + } + DCHECK_NE(space_data, nullptr); + ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_; + size_t page_idx = (fault_page - space_data->begin_) / kPageSize; + Atomic<PageState>* state_arr = + reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); + PageState state = state_arr[page_idx].load(std::memory_order_relaxed); + while (true) { + switch (state) { + case PageState::kUnprocessed: + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { + if (kMode == kCopyMode || is_minor_fault) { + uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); + DCHECK_NE(first_obj, nullptr); + LinearAllocPageUpdater updater(this); + updater(fault_page + diff, first_obj + diff); + if (kMode == kCopyMode) { + copy_ioctl(fault_page, fault_page + diff); + return; + } + } else { + // Don't touch the page in this case (there is no reason to do so + // anyways) as it would mean reading from first_obj, which could be on + // another missing page and hence may cause this thread to block, leading + // to deadlocks. + // Force read the page if it is missing so that a zeropage gets mapped on + // the shadow map and then CONTINUE ioctl will map it on linear-alloc. + ForceRead(fault_page + diff); + } + MapProcessedPages</*kFirstPageMapping=*/true>( + fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); + return; + } + continue; + case PageState::kProcessing: + DCHECK_EQ(kMode, kMinorFaultMode); + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + // Somebody else took or will take care of finishing the updates and + // then mapping the page. + return; + } + continue; + case PageState::kProcessed: + // The page is processed but not mapped. We should map it. + break; + default: + // Somebody else took care of the page. + return; + } + break; + } + + DCHECK_EQ(kMode, kMinorFaultMode); + DCHECK_EQ(state, PageState::kProcessed); + if (!is_minor_fault) { + // Force read the page if it is missing so that a zeropage gets mapped on + // the shadow map and then CONTINUE ioctl will map it on linear-alloc. + ForceRead(fault_page + diff); + } + MapProcessedPages</*kFirstPageMapping=*/false>( + fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); + } +} + +void MarkCompact::ProcessLinearAlloc() { + for (auto& pair : linear_alloc_arenas_) { + const TrackedArena* arena = pair.first; + uint8_t* last_byte = pair.second; + DCHECK_ALIGNED(last_byte, kPageSize); + bool others_processing = false; + // Find the linear-alloc space containing the arena + LinearAllocSpaceData* space_data = nullptr; + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= arena->Begin() && arena->Begin() < data.end_) { + space_data = &data; + break; + } + } + DCHECK_NE(space_data, nullptr); + ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_; + auto visitor = [space_data, last_byte, diff, this, &others_processing]( + uint8_t* page_begin, + uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { + // No need to process pages past last_byte as they already have updated + // gc-roots, if any. + if (page_begin >= last_byte) { + return; + } + LinearAllocPageUpdater updater(this); + size_t page_idx = (page_begin - space_data->begin_) / kPageSize; + DCHECK_LT(page_idx, space_data->page_status_map_.Size()); + Atomic<PageState>* state_arr = + reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); + PageState expected_state = PageState::kUnprocessed; + PageState desired_state = + minor_fault_initialized_ ? PageState::kProcessing : PageState::kProcessingAndMapping; + // Acquire order to ensure that we don't start accessing the shadow page, + // which is shared with other threads, prior to CAS. Also, for same + // reason, we used 'release' order for changing the state to 'processed'. + if (state_arr[page_idx].compare_exchange_strong( + expected_state, desired_state, std::memory_order_acquire)) { + updater(page_begin + diff, first_obj + diff); + expected_state = PageState::kProcessing; + if (!minor_fault_initialized_) { + struct uffdio_copy uffd_copy; + uffd_copy.src = reinterpret_cast<uintptr_t>(page_begin + diff); + uffd_copy.dst = reinterpret_cast<uintptr_t>(page_begin); + uffd_copy.len = kPageSize; + uffd_copy.mode = 0; + CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) + << "ioctl_userfaultfd: linear-alloc copy failed:" << strerror(errno) + << ". dst:" << static_cast<void*>(page_begin); + DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); + } else if (!state_arr[page_idx].compare_exchange_strong( + expected_state, PageState::kProcessed, std::memory_order_release)) { + DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); + // Force read in case the page was missing and updater didn't touch it + // as there was nothing to do. This will ensure that a zeropage is + // faulted on the shadow map. + ForceRead(page_begin + diff); + MapProcessedPages</*kFirstPageMapping=*/true>( + page_begin, state_arr, page_idx, space_data->page_status_map_.Size()); + } } else { - // We should never have a case where two workers are trying to install a - // zeropage in this range as we synchronize using - // moving_pages_status_[page_idx]. - zeropage_ioctl(fault_page, /*tolerate_eexist*/ false); + others_processing = true; } + }; + + arena->VisitRoots(visitor); + // If we are not in minor-fault mode and if no other thread was found to be + // processing any pages in this arena, then we can madvise the shadow size. + // Otherwise, we will double the memory use for linear-alloc. + if (!minor_fault_initialized_ && !others_processing) { + ZeroAndReleasePages(arena->Begin() + diff, arena->Size()); } } } -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wframe-larger-than=" +void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) { + struct uffdio_range range; + range.start = reinterpret_cast<uintptr_t>(start); + range.len = len; + CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0) + << "ioctl_userfaultfd: unregister failed: " << strerror(errno) + << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len); + // Due to an oversight in the kernel implementation of 'unregister', the + // waiting threads are woken up only for copy uffds. Therefore, for now, we + // have to explicitly wake up the threads in minor-fault case. + // TODO: The fix in the kernel is being worked on. Once the kernel version + // containing the fix is known, make it conditional on that as well. + if (minor_fault_initialized_) { + CHECK_EQ(ioctl(uffd_, UFFDIO_WAKE, &range), 0) + << "ioctl_userfaultfd: wake failed: " << strerror(errno) + << ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len); + } +} + void MarkCompact::CompactionPhase() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); { @@ -1995,45 +2802,66 @@ void MarkCompact::CompactionPhase() { RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); } - if (kObjPtrPoisoning) { - CompactMovingSpace</*kFallback*/false>(compaction_buffers_map_.Begin()); - // madvise the page so that we can get userfaults on it. We don't need to - // do this when not using poisoning as in that case the address location is - // untouched during compaction. - ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + if (CanCompactMovingSpaceWithMinorFault()) { + CompactMovingSpace<kMinorFaultMode>(/*page=*/nullptr); } else { - uint8_t buf[kPageSize]; - CompactMovingSpace</*kFallback*/false>(buf); + CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin()); } - // The following triggers 'special' userfaults. When received by the + // madvise the page so that we can get userfaults on it. + ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + + // TODO: add more sophisticated logic here wherein we sleep after attempting + // yield a couple of times. + while (compaction_in_progress_count_.load(std::memory_order_relaxed) > 0) { + sched_yield(); + } + + size_t moving_space_size = bump_pointer_space_->Capacity(); + UnregisterUffd(bump_pointer_space_->Begin(), + minor_fault_initialized_ ? + (moving_first_objs_count_ + black_page_count_) * kPageSize : + moving_space_size); + + // Release all of the memory taken by moving-space's from-map + if (minor_fault_initialized_) { + // Give write permission for the madvise(REMOVE) to succeed. + DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_WRITE), 0) + << "mprotect failed: " << strerror(errno); + int ret = madvise(from_space_begin_, moving_space_size, MADV_REMOVE); + CHECK_EQ(ret, 0) << "madvise(MADV_REMOVE) failed for from-space map:" << strerror(errno); + } else { + from_space_map_.MadviseDontNeedAndZero(); + } + + ProcessLinearAlloc(); + + // The following load triggers 'special' userfaults. When received by the // thread-pool workers, they will exit out of the compaction task. This fault // happens because we madvise info_map_ above and it is at least kPageSize in length. DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); CHECK_EQ(*reinterpret_cast<volatile uint8_t*>(conc_compaction_termination_page_), 0); DCHECK_EQ(thread_pool_counter_, 0); - struct uffdio_range unregister_range; - unregister_range.start = reinterpret_cast<uintptr_t>(bump_pointer_space_->Begin()); - unregister_range.len = bump_pointer_space_->Capacity(); - CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &unregister_range), 0) - << "ioctl_userfaultfd: unregister moving-space: " << strerror(errno); - - // When poisoning ObjPtr, we are forced to use buffers for page compaction in - // lower 4GB. Now that the usage is done, madvise them. But skip the first - // page, which is used by the gc-thread for the next iteration. Otherwise, we - // get into a deadlock due to userfault on it in the next iteration. This page - // is not consuming any physical memory because we already madvised it above - // and then we triggered a read userfault, which maps a special zero-page. - if (kObjPtrPoisoning) { - ZeroAndReleasePages(compaction_buffers_map_.Begin() + kPageSize, - compaction_buffers_map_.Size() - kPageSize); - } else { - ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + // Unregister linear-alloc spaces + for (auto& data : linear_alloc_spaces_data_) { + DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size())); + UnregisterUffd(data.begin_, data.shadow_.Size()); + // madvise linear-allocs's page-status array + data.page_status_map_.MadviseDontNeedAndZero(); + // Madvise the entire linear-alloc space's shadow. In copy-mode it gets rid + // of the pages which are still mapped. In minor-fault mode this unmaps all + // pages, which is good in reducing the mremap (done in STW pause) time in + // next GC cycle. + data.shadow_.MadviseDontNeedAndZero(); + if (minor_fault_initialized_) { + DCHECK_EQ(mprotect(data.shadow_.Begin(), data.shadow_.Size(), PROT_NONE), 0) + << "mprotect failed: " << strerror(errno); + } } + heap_->GetThreadPool()->StopWorkers(thread_running_gc_); } -#pragma clang diagnostic pop template <size_t kBufferSize> class MarkCompact::ThreadRootsVisitor : public RootVisitor { @@ -2630,23 +3458,46 @@ void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, } void MarkCompact::FinishPhase() { + bool is_zygote = Runtime::Current()->IsZygote(); + minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_; + // When poisoning ObjPtr, we are forced to use buffers for page compaction in + // lower 4GB. Now that the usage is done, madvise them. But skip the first + // page, which is used by the gc-thread for the next iteration. Otherwise, we + // get into a deadlock due to userfault on it in the next iteration. This page + // is not consuming any physical memory because we already madvised it above + // and then we triggered a read userfault, which maps a special zero-page. + if (!minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || + shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) { + ZeroAndReleasePages(compaction_buffers_map_.Begin() + kPageSize, + compaction_buffers_map_.Size() - kPageSize); + } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { + // Now that we are going to use minor-faults from next GC cycle, we can + // unmap the buffers used by worker threads. + compaction_buffers_map_.SetSize(kPageSize); + } + info_map_.MadviseDontNeedAndZero(); live_words_bitmap_->ClearBitmap(); - from_space_map_.MadviseDontNeedAndZero(); - if (UNLIKELY(Runtime::Current()->IsZygote() && uffd_ >= 0)) { + + if (UNLIKELY(is_zygote && IsValidFd(uffd_))) { heap_->DeleteThreadPool(); + // This unregisters all ranges as a side-effect. close(uffd_); - uffd_ = -1; + uffd_ = kFdUnused; uffd_initialized_ = false; } CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty. mark_stack_->Reset(); updated_roots_.clear(); delete[] moving_pages_status_; - DCHECK_EQ(thread_running_gc_, Thread::Current()); - ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); - WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); - heap_->ClearMarkedObjects(); + linear_alloc_arenas_.clear(); + { + DCHECK_EQ(thread_running_gc_, Thread::Current()); + ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_); + WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_); + heap_->ClearMarkedObjects(); + } + std::swap(moving_to_space_fd_, moving_from_space_fd_); } } // namespace collector diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index cb7440ceff..9931059bd4 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -17,11 +17,13 @@ #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ +#include <map> #include <memory> #include <unordered_set> -#include "base/atomic.h" #include "barrier.h" +#include "base/atomic.h" +#include "base/gc_visited_arena_pool.h" #include "base/macros.h" #include "base/mutex.h" #include "garbage_collector.h" @@ -36,7 +38,7 @@ namespace art { namespace mirror { class DexCache; -} +} // namespace mirror namespace gc { @@ -47,11 +49,16 @@ class BumpPointerSpace; } // namespace space namespace collector { -class MarkCompact : public GarbageCollector { +class MarkCompact final : public GarbageCollector { public: static constexpr size_t kAlignment = kObjectAlignment; - // Fake file descriptor for fall back mode - static constexpr int kFallbackMode = -2; + static constexpr int kCopyMode = -1; + static constexpr int kMinorFaultMode = -2; + // Fake file descriptor for fall back mode (when uffd isn't available) + static constexpr int kFallbackMode = -3; + + static constexpr int kFdSharedAnon = -1; + static constexpr int kFdUnused = -2; explicit MarkCompact(Heap* heap); @@ -130,6 +137,23 @@ class MarkCompact : public GarbageCollector { // created or was already done. bool CreateUserfaultfd(bool post_fork); + bool IsUffdMinorFaultSupported() const { return uffd_minor_fault_supported_; } + + // Add linear-alloc space data when a new space is added to + // GcVisitedArenaPool, which mostly happens only once. + void AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared); + + // In copy-mode of userfaultfd, we don't need to reach a 'processed' state as + // it's given that processing thread also copies the page, thereby mapping it. + // The order is important as we may treat them as integers. + enum class PageState : uint8_t { + kUnprocessed = 0, // Not processed yet + kProcessing = 1, // Being processed by GC thread and will not be mapped + kProcessed = 2, // Processed but not mapped + kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped + kProcessedAndMapping = 4 // Processed and will be mapped mapped + }; + private: using ObjReference = mirror::ObjectReference</*kPoisonReferences*/ false, mirror::Object>; // Number of bits (live-words) covered by a single chunk-info (below) @@ -276,12 +300,23 @@ class MarkCompact : public GarbageCollector { // Then update the references within the copied objects. The boundary objects are // partially updated such that only the references that lie in the page are updated. // This is necessary to avoid cascading userfaults. - void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr) + void CompactPage(mirror::Object* obj, uint32_t offset, uint8_t* addr, bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); // Compact the bump-pointer space. Pass page that should be used as buffer for // userfaultfd. - template <bool kFallback> - void CompactMovingSpace(uint8_t* page = nullptr) REQUIRES_SHARED(Locks::mutator_lock_); + template <int kMode> + void CompactMovingSpace(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_); + + // Compact the given page as per func and change its state. Also map/copy the + // page, if required. + template <int kMode, typename CompactionFn> + ALWAYS_INLINE void DoPageCompactionWithStateChange(size_t page_idx, + size_t status_arr_len, + uint8_t* to_space_page, + uint8_t* page, + CompactionFn func) + REQUIRES_SHARED(Locks::mutator_lock_); + // Update all the objects in the given non-moving space page. 'first' object // could have started in some preceding page. void UpdateNonMovingPage(mirror::Object* first, uint8_t* page) @@ -315,8 +350,8 @@ class MarkCompact : public GarbageCollector { void SlideBlackPage(mirror::Object* first_obj, const size_t page_idx, uint8_t* const pre_compact_page, - uint8_t* dest) - REQUIRES_SHARED(Locks::mutator_lock_); + uint8_t* dest, + bool needs_memset_zero) REQUIRES_SHARED(Locks::mutator_lock_); // Perform reference-processing and the likes before sweeping the non-movable // spaces. @@ -403,25 +438,61 @@ class MarkCompact : public GarbageCollector { void SweepLargeObjects(bool swap_bitmaps) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); - // Store all the dex-cache objects visited during marking phase. - // This is required during compaction phase to ensure that we don't miss any - // of them from visiting (to update references). Somehow, iterating over - // class-tables to fetch these misses some of them, leading to memory - // corruption. - // TODO: once we implement concurrent compaction of classes and dex-caches, - // which will visit all of them, we should remove this. - void RememberDexCaches(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_); // Perform all kernel operations required for concurrent compaction. Includes // mremap to move pre-compact pages to from-space, followed by userfaultfd - // registration on the moving space. + // registration on the moving space and linear-alloc. void KernelPreparation(); + // Called by KernelPreparation() for every memory range being prepared. + void KernelPrepareRange(uint8_t* to_addr, + uint8_t* from_addr, + size_t map_size, + size_t uffd_size, + int fd, + int uffd_mode, + uint8_t* shadow_addr = nullptr); + // Unregister given range from userfaultfd. + void UnregisterUffd(uint8_t* start, size_t len); + // Called by thread-pool workers to read uffd_ and process fault events. - void ConcurrentCompaction(uint8_t* page) REQUIRES_SHARED(Locks::mutator_lock_); + template <int kMode> + void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); + // Called by thread-pool workers to compact and copy/map the fault page in + // moving space. + template <int kMode, typename ZeropageType, typename CopyType> + void ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, + CopyType& copy_ioctl, + uint8_t* fault_page, + uint8_t* buf, + size_t nr_moving_space_used_pages) + REQUIRES_SHARED(Locks::mutator_lock_); + // Called by thread-pool workers to process and copy/map the fault page in + // linear-alloc. + template <int kMode, typename ZeropageType, typename CopyType> + void ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, + CopyType& copy_ioctl, + uint8_t* fault_page, + bool is_minor_fault) + REQUIRES_SHARED(Locks::mutator_lock_); - enum PageState : uint8_t { - kUncompacted = 0, // The page has not been compacted yet - kCompacting // Some thread (GC or mutator) is compacting the page - }; + // Process concurrently all the pages in linear-alloc. Called by gc-thread. + void ProcessLinearAlloc() REQUIRES_SHARED(Locks::mutator_lock_); + + // Returns true if the moving space can be compacted using uffd's minor-fault + // feature. + bool CanCompactMovingSpaceWithMinorFault(); + + // Maps processed pages (from moving space and linear-alloc) for uffd's + // minor-fault feature. We try to 'claim' all processed (and unmapped) pages + // contiguous to 'to_space_start'. + // kFirstPageMapping indicates if the first page is already claimed or not. It + // also indicates that the ioctl must succeed in mapping the first page. + template <bool kFirstPageMapping> + void MapProcessedPages(uint8_t* to_space_start, + Atomic<PageState>* state_arr, + size_t arr_idx, + size_t arr_len) REQUIRES_SHARED(Locks::mutator_lock_); + + bool IsValidFd(int fd) const { return fd >= 0; } // Buffers, one per worker thread + gc-thread, to be used when // kObjPtrPoisoning == true as in that case we can't have the buffer on the @@ -450,13 +521,46 @@ class MarkCompact : public GarbageCollector { // TODO: Must be replaced with an efficient mechanism eventually. Or ensure // that double updation doesn't happen in the first place. std::unordered_set<void*> updated_roots_; - // Set of dex-caches visited during marking. See comment above - // RememberDexCaches() for the explanation. - std::unordered_set<uint32_t> dex_caches_; MemMap from_space_map_; + MemMap shadow_to_space_map_; // Any array of live-bytes in logical chunks of kOffsetChunkSize size // in the 'to-be-compacted' space. MemMap info_map_; + + class LessByArenaAddr { + public: + bool operator()(const TrackedArena* a, const TrackedArena* b) const { + return std::less<uint8_t*>{}(a->Begin(), b->Begin()); + } + }; + + // Map of arenas allocated in LinearAlloc arena-pool and last non-zero page, + // captured during compaction pause for concurrent updates. + std::map<const TrackedArena*, uint8_t*, LessByArenaAddr> linear_alloc_arenas_; + // Set of PageStatus arrays, one per arena-pool space. It's extremely rare to + // have more than one, but this is to be ready for the worst case. + class LinearAllocSpaceData { + public: + LinearAllocSpaceData(MemMap&& shadow, + MemMap&& page_status_map, + uint8_t* begin, + uint8_t* end, + bool already_shared) + : shadow_(std::move(shadow)), + page_status_map_(std::move(page_status_map)), + begin_(begin), + end_(end), + already_shared_(already_shared) {} + + MemMap shadow_; + MemMap page_status_map_; + uint8_t* begin_; + uint8_t* end_; + // Indicates if the linear-alloc is already MAP_SHARED. + bool already_shared_; + }; + std::vector<LinearAllocSpaceData> linear_alloc_spaces_data_; + // The main space bitmap accounting::ContinuousSpaceBitmap* moving_space_bitmap_; accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; @@ -520,16 +624,23 @@ class MarkCompact : public GarbageCollector { void* stack_end_; uint8_t* conc_compaction_termination_page_; + PointerSize pointer_size_; // Number of objects freed during this GC in moving space. It is decremented // every time an object is discovered. And total-object count is added to it // in MarkingPause(). It reaches the correct count only once the marking phase // is completed. int32_t freed_objects_; + // memfds for moving space for using userfaultfd's minor-fault feature. + // Initialized to kFdUnused to indicate that mmap should be MAP_PRIVATE in + // KernelPrepareRange(). + int moving_to_space_fd_; + int moving_from_space_fd_; // Userfault file descriptor, accessed only by the GC itself. // kFallbackMode value indicates that we are in the fallback mode. int uffd_; // Used to exit from compaction loop at the end of concurrent compaction uint8_t thread_pool_counter_; + std::atomic<uint8_t> compaction_in_progress_count_; // True while compacting. bool compacting_; // Flag indicating whether one-time uffd initialization has been done. It will @@ -538,6 +649,13 @@ class MarkCompact : public GarbageCollector { // Heap::PostForkChildAction() as it's invoked in app startup path. With // this, we register the compaction-termination page on the first GC. bool uffd_initialized_; + // Flag indicating if userfaultfd supports minor-faults. Set appropriately in + // CreateUserfaultfd(), where we get this information from the kernel. + bool uffd_minor_fault_supported_; + // For non-zygote processes this flah indicates if the spaces are ready to + // start using userfaultfd's minor-fault feature. This initialization involves + // starting to use shmem (memfd_create) for the userfaultfd protected spaces. + bool minor_fault_initialized_; class VerifyRootMarkedVisitor; class ScanObjectVisitor; @@ -546,13 +664,17 @@ class MarkCompact : public GarbageCollector { class CardModifiedVisitor; class RefFieldsVisitor; template <bool kCheckBegin, bool kCheckEnd> class RefsUpdateVisitor; - class NativeRootsUpdateVisitor; + class ArenaPoolPageUpdater; + class ClassLoaderRootsUpdater; + class LinearAllocPageUpdater; class ImmuneSpaceUpdateObjVisitor; class ConcurrentCompactionGcTask; DISALLOW_IMPLICIT_CONSTRUCTORS(MarkCompact); }; +std::ostream& operator<<(std::ostream& os, MarkCompact::PageState value); + } // namespace collector } // namespace gc } // namespace art diff --git a/runtime/linear_alloc-inl.h b/runtime/linear_alloc-inl.h index a6b3df3ae6..928bffbc1c 100644 --- a/runtime/linear_alloc-inl.h +++ b/runtime/linear_alloc-inl.h @@ -26,6 +26,9 @@ namespace art { inline void LinearAlloc::SetFirstObject(void* begin, size_t bytes) const { DCHECK(track_allocations_); + if (ArenaAllocator::IsRunningOnMemoryTool()) { + bytes += ArenaAllocator::kMemoryToolRedZoneBytes; + } uint8_t* end = static_cast<uint8_t*>(begin) + bytes; Arena* arena = allocator_.GetHeadArena(); DCHECK_NE(arena, nullptr); diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h index 735372151f..12c772b2ba 100644 --- a/runtime/linear_alloc.h +++ b/runtime/linear_alloc.h @@ -26,7 +26,7 @@ namespace art { class ArenaPool; enum class LinearAllocKind : uint32_t { - kNoGCRoots, + kNoGCRoots = 0, // No GC-root kind should always be 0. kGCRootArray, kArtMethodArray, kArtFieldArray, |