diff options
-rw-r--r-- | runtime/base/gc_visited_arena_pool.cc | 244 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.h | 207 | ||||
-rw-r--r-- | runtime/class_linker.cc | 20 | ||||
-rw-r--r-- | runtime/class_linker.h | 5 | ||||
-rw-r--r-- | runtime/class_table-inl.h | 36 | ||||
-rw-r--r-- | runtime/class_table.h | 26 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 278 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.h | 5 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 4 | ||||
-rw-r--r-- | runtime/intern_table.h | 13 | ||||
-rw-r--r-- | runtime/runtime.cc | 16 |
11 files changed, 613 insertions, 241 deletions
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc index 039157a30c..77cc7ae32d 100644 --- a/runtime/base/gc_visited_arena_pool.cc +++ b/runtime/base/gc_visited_arena_pool.cc @@ -27,43 +27,67 @@ namespace art { -TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork) - : Arena(), first_obj_array_(nullptr), pre_zygote_fork_(pre_zygote_fork) { +TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena) + : Arena(), + first_obj_array_(nullptr), + pre_zygote_fork_(pre_zygote_fork), + waiting_for_deletion_(false) { static_assert(ArenaAllocator::kArenaAlignment <= kPageSize, "Arena should not need stronger alignment than kPageSize."); - DCHECK_ALIGNED(size, kPageSize); - DCHECK_ALIGNED(start, kPageSize); memory_ = start; size_ = size; - size_t arr_size = size / kPageSize; - first_obj_array_.reset(new uint8_t*[arr_size]); - std::fill_n(first_obj_array_.get(), arr_size, nullptr); + if (single_obj_arena) { + // We have only one object in this arena and it is expected to consume the + // entire arena. + bytes_allocated_ = size; + } else { + DCHECK_ALIGNED(size, kPageSize); + DCHECK_ALIGNED(start, kPageSize); + size_t arr_size = size / kPageSize; + first_obj_array_.reset(new uint8_t*[arr_size]); + std::fill_n(first_obj_array_.get(), arr_size, nullptr); + } +} + +void TrackedArena::ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork) { + DCHECK_ALIGNED(begin, kPageSize); + // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore + // MADV_DONTNEED will not free the pages from page cache. Therefore use + // MADV_REMOVE instead, which is meant for this purpose. + // Arenas allocated pre-zygote fork are private anonymous and hence must be + // released using MADV_DONTNEED. + if (!gUseUserfaultfd || pre_zygote_fork || + (madvise(begin, size, MADV_REMOVE) == -1 && errno == EINVAL)) { + // MADV_REMOVE fails if invoked on anonymous mapping, which could happen + // if the arena is released before userfaultfd-GC starts using memfd. So + // use MADV_DONTNEED. + ZeroAndReleaseMemory(begin, size); + } } void TrackedArena::Release() { if (bytes_allocated_ > 0) { - // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore - // MADV_DONTNEED will not free the pages from page cache. Therefore use - // MADV_REMOVE instead, which is meant for this purpose. - // Arenas allocated pre-zygote fork are private anonymous and hence must be - // released using MADV_DONTNEED. - if (!gUseUserfaultfd || pre_zygote_fork_ || - (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) { - // MADV_REMOVE fails if invoked on anonymous mapping, which could happen - // if the arena is released before userfaultfd-GC starts using memfd. So - // use MADV_DONTNEED. - ZeroAndReleaseMemory(Begin(), Size()); + ReleasePages(Begin(), Size(), pre_zygote_fork_); + if (first_obj_array_.get() != nullptr) { + std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr); } - std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr); bytes_allocated_ = 0; } } void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) { + DCHECK(first_obj_array_.get() != nullptr); DCHECK_LE(static_cast<void*>(Begin()), static_cast<void*>(obj_end)); DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end)); + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); size_t idx = static_cast<size_t>(obj_begin - Begin()) / kPageSize; size_t last_byte_idx = static_cast<size_t>(obj_end - 1 - Begin()) / kPageSize; + // Do the update below with arena-pool's lock in shared-mode to serialize with + // the compaction-pause wherein we acquire it exclusively. This is to ensure + // that last-byte read there doesn't change after reading it and before + // userfaultfd registration. + ReaderMutexLock rmu(Thread::Current(), arena_pool->GetLock()); // If the addr is at the beginning of a page, then we set it for that page too. if (IsAligned<kPageSize>(obj_begin)) { first_obj_array_[idx] = obj_begin; @@ -106,7 +130,13 @@ uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) { } GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name) - : bytes_allocated_(0), name_(name), low_4gb_(low_4gb), pre_zygote_fork_(is_zygote) {} + : lock_("gc-visited arena-pool", kGenericBottomLock), + bytes_allocated_(0), + unused_arenas_(nullptr), + name_(name), + defer_arena_freeing_(false), + low_4gb_(low_4gb), + pre_zygote_fork_(is_zygote) {} GcVisitedArenaPool::~GcVisitedArenaPool() { for (Chunk* chunk : free_chunks_) { @@ -117,13 +147,12 @@ GcVisitedArenaPool::~GcVisitedArenaPool() { } size_t GcVisitedArenaPool::GetBytesAllocated() const { - std::lock_guard<std::mutex> lock(lock_); + ReaderMutexLock rmu(Thread::Current(), lock_); return bytes_allocated_; } uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) { DCHECK(pre_zygote_fork_); - DCHECK(Runtime::Current()->IsZygote()); std::string pre_fork_name = "Pre-zygote-"; pre_fork_name += name_; std::string err_msg; @@ -137,18 +166,67 @@ uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) { return map.Begin(); } -Arena* GcVisitedArenaPool::AllocArena(size_t size) { +uint8_t* GcVisitedArenaPool::AllocSingleObjArena(size_t size) { + WriterMutexLock wmu(Thread::Current(), lock_); + Arena* arena; + DCHECK(gUseUserfaultfd); + // To minimize private dirty, all class and intern table allocations are + // done outside LinearAlloc range so they are untouched during GC. + if (pre_zygote_fork_) { + uint8_t* begin = static_cast<uint8_t*>(malloc(size)); + auto insert_result = allocated_arenas_.insert( + new TrackedArena(begin, size, /*pre_zygote_fork=*/true, /*single_obj_arena=*/true)); + arena = *insert_result.first; + } else { + arena = AllocArena(size, /*single_obj_arena=*/true); + } + return arena->Begin(); +} + +void GcVisitedArenaPool::FreeSingleObjArena(uint8_t* addr) { + Thread* self = Thread::Current(); + size_t size; + bool zygote_arena; + { + TrackedArena temp_arena(addr); + WriterMutexLock wmu(self, lock_); + auto iter = allocated_arenas_.find(&temp_arena); + DCHECK(iter != allocated_arenas_.end()); + TrackedArena* arena = *iter; + size = arena->Size(); + zygote_arena = arena->IsPreZygoteForkArena(); + DCHECK_EQ(arena->Begin(), addr); + DCHECK(arena->IsSingleObjectArena()); + allocated_arenas_.erase(iter); + if (defer_arena_freeing_) { + arena->SetupForDeferredDeletion(unused_arenas_); + unused_arenas_ = arena; + } else { + delete arena; + } + } + // Refer to the comment in FreeArenaChain() for why the pages are released + // after deleting the arena. + if (zygote_arena) { + free(addr); + } else { + TrackedArena::ReleasePages(addr, size, /*pre_zygote_fork=*/false); + WriterMutexLock wmu(self, lock_); + FreeRangeLocked(addr, size); + } +} + +Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) { // Return only page aligned sizes so that madvise can be leveraged. size = RoundUp(size, kPageSize); - std::lock_guard<std::mutex> lock(lock_); - if (pre_zygote_fork_) { // The first fork out of zygote hasn't happened yet. Allocate arena in a // private-anonymous mapping to retain clean pages across fork. - DCHECK(Runtime::Current()->IsZygote()); uint8_t* addr = AddPreZygoteForkMap(size); - auto emplace_result = allocated_arenas_.emplace(addr, size, /*pre_zygote_fork=*/true); - return const_cast<TrackedArena*>(&(*emplace_result.first)); + auto insert_result = allocated_arenas_.insert( + new TrackedArena(addr, size, /*pre_zygote_fork=*/true, single_obj_arena)); + DCHECK(insert_result.second); + return *insert_result.first; } Chunk temp_chunk(nullptr, size); @@ -165,19 +243,21 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size) { // if the best-fit chunk < 2x the requested size, then give the whole chunk. if (chunk->size_ < 2 * size) { DCHECK_GE(chunk->size_, size); - auto emplace_result = allocated_arenas_.emplace(chunk->addr_, - chunk->size_, - /*pre_zygote_fork=*/false); - DCHECK(emplace_result.second); + auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_, + chunk->size_, + /*pre_zygote_fork=*/false, + single_obj_arena)); + DCHECK(insert_result.second); free_chunks_.erase(free_chunks_iter); best_fit_allocs_.erase(best_fit_iter); delete chunk; - return const_cast<TrackedArena*>(&(*emplace_result.first)); + return *insert_result.first; } else { - auto emplace_result = allocated_arenas_.emplace(chunk->addr_, - size, - /*pre_zygote_fork=*/false); - DCHECK(emplace_result.second); + auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_, + size, + /*pre_zygote_fork=*/false, + single_obj_arena)); + DCHECK(insert_result.second); // Compute next iterators for faster insert later. auto next_best_fit_iter = best_fit_iter; next_best_fit_iter++; @@ -190,7 +270,7 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size) { DCHECK_EQ(free_chunks_nh.value()->addr_, chunk->addr_); best_fit_allocs_.insert(next_best_fit_iter, std::move(best_fit_nh)); free_chunks_.insert(next_free_chunks_iter, std::move(free_chunks_nh)); - return const_cast<TrackedArena*>(&(*emplace_result.first)); + return *insert_result.first; } } @@ -266,27 +346,79 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) { // TODO: Handle the case when arena_allocator::kArenaAllocatorPreciseTracking // is true. See MemMapArenaPool::FreeArenaChain() for example. CHECK(!arena_allocator::kArenaAllocatorPreciseTracking); + Thread* self = Thread::Current(); + // vector of arena ranges to be freed and whether they are pre-zygote-fork. + std::vector<std::tuple<uint8_t*, size_t, bool>> free_ranges; + + { + WriterMutexLock wmu(self, lock_); + while (first != nullptr) { + TrackedArena* temp = down_cast<TrackedArena*>(first); + DCHECK(!temp->IsSingleObjectArena()); + first = first->Next(); + free_ranges.emplace_back(temp->Begin(), temp->Size(), temp->IsPreZygoteForkArena()); + // In other implementations of ArenaPool this is calculated when asked for, + // thanks to the list of free arenas that is kept around. But in this case, + // we release the freed arena back to the pool and therefore need to + // calculate here. + bytes_allocated_ += temp->GetBytesAllocated(); + auto iter = allocated_arenas_.find(temp); + DCHECK(iter != allocated_arenas_.end()); + allocated_arenas_.erase(iter); + if (defer_arena_freeing_) { + temp->SetupForDeferredDeletion(unused_arenas_); + unused_arenas_ = temp; + } else { + delete temp; + } + } + } - // madvise the arenas before acquiring lock for scalability - for (Arena* temp = first; temp != nullptr; temp = temp->Next()) { - temp->Release(); + // madvise of arenas must be done after the above loop which serializes with + // MarkCompact::ProcessLinearAlloc() so that if it finds an arena to be not + // 'waiting-for-deletion' then it finishes the arena's processing before + // clearing here. Otherwise, we could have a situation wherein arena-pool + // assumes the memory range of the arena(s) to be zero'ed (by madvise), + // whereas GC maps stale arena pages. + for (auto& iter : free_ranges) { + // No need to madvise pre-zygote-fork arenas as they will munmapped below. + if (!std::get<2>(iter)) { + TrackedArena::ReleasePages(std::get<0>(iter), std::get<1>(iter), /*pre_zygote_fork=*/false); + } } - std::lock_guard<std::mutex> lock(lock_); - arenas_freed_ = true; - while (first != nullptr) { - FreeRangeLocked(first->Begin(), first->Size()); - // In other implementations of ArenaPool this is calculated when asked for, - // thanks to the list of free arenas that is kept around. But in this case, - // we release the freed arena back to the pool and therefore need to - // calculate here. - bytes_allocated_ += first->GetBytesAllocated(); - TrackedArena* temp = down_cast<TrackedArena*>(first); - // TODO: Add logic to unmap the maps corresponding to pre-zygote-fork - // arenas, which are expected to be released only during shutdown. - first = first->Next(); - size_t erase_count = allocated_arenas_.erase(*temp); - DCHECK_EQ(erase_count, 1u); + WriterMutexLock wmu(self, lock_); + for (auto& iter : free_ranges) { + if (UNLIKELY(std::get<2>(iter))) { + bool found = false; + for (auto map_iter = maps_.begin(); map_iter != maps_.end(); map_iter++) { + if (map_iter->Begin() == std::get<0>(iter)) { + // erase will destruct the MemMap and thereby munmap. But this happens + // very rarely so it's ok to do it with lock acquired. + maps_.erase(map_iter); + found = true; + break; + } + } + CHECK(found); + } else { + FreeRangeLocked(std::get<0>(iter), std::get<1>(iter)); + } + } +} + +void GcVisitedArenaPool::DeleteUnusedArenas() { + TrackedArena* arena; + { + WriterMutexLock wmu(Thread::Current(), lock_); + defer_arena_freeing_ = false; + arena = unused_arenas_; + unused_arenas_ = nullptr; + } + while (arena != nullptr) { + TrackedArena* temp = down_cast<TrackedArena*>(arena->Next()); + delete arena; + arena = temp; } } diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h index e307147c9e..f3c1abcb5a 100644 --- a/runtime/base/gc_visited_arena_pool.h +++ b/runtime/base/gc_visited_arena_pool.h @@ -17,12 +17,16 @@ #ifndef ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ #define ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ -#include "base/casts.h" +#include <set> + +#include "base/allocator.h" #include "base/arena_allocator.h" +#include "base/casts.h" +#include "base/hash_set.h" #include "base/locks.h" #include "base/mem_map.h" - -#include <set> +#include "read_barrier_config.h" +#include "runtime.h" namespace art { @@ -34,27 +38,45 @@ class TrackedArena final : public Arena { public: // Used for searching in maps. Only arena's starting address is relevant. explicit TrackedArena(uint8_t* addr) : pre_zygote_fork_(false) { memory_ = addr; } - TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork); + TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena); template <typename PageVisitor> void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK_ALIGNED(Size(), kPageSize); - DCHECK_ALIGNED(Begin(), kPageSize); - int nr_pages = Size() / kPageSize; uint8_t* page_begin = Begin(); - for (int i = 0; i < nr_pages && first_obj_array_[i] != nullptr; i++, page_begin += kPageSize) { - visitor(page_begin, first_obj_array_[i]); + if (first_obj_array_.get() != nullptr) { + DCHECK_ALIGNED(Size(), kPageSize); + DCHECK_ALIGNED(Begin(), kPageSize); + for (int i = 0, nr_pages = Size() / kPageSize; i < nr_pages; i++, page_begin += kPageSize) { + uint8_t* first = first_obj_array_[i]; + if (first != nullptr) { + visitor(page_begin, first, kPageSize); + } else { + break; + } + } + } else { + size_t page_size = Size(); + while (page_size > kPageSize) { + visitor(page_begin, nullptr, kPageSize); + page_begin += kPageSize; + page_size -= kPageSize; + } + visitor(page_begin, nullptr, page_size); } } // Return the page addr of the first page with first_obj set to nullptr. uint8_t* GetLastUsedByte() const REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK_ALIGNED(Begin(), kPageSize); - DCHECK_ALIGNED(End(), kPageSize); // Jump past bytes-allocated for arenas which are not currently being used // by arena-allocator. This helps in reducing loop iterations below. uint8_t* last_byte = AlignUp(Begin() + GetBytesAllocated(), kPageSize); - DCHECK_LE(last_byte, End()); + if (first_obj_array_.get() != nullptr) { + DCHECK_ALIGNED(Begin(), kPageSize); + DCHECK_ALIGNED(End(), kPageSize); + DCHECK_LE(last_byte, End()); + } else { + DCHECK_EQ(last_byte, End()); + } for (size_t i = (last_byte - Begin()) / kPageSize; last_byte < End() && first_obj_array_[i] != nullptr; last_byte += kPageSize, i++) { @@ -66,21 +88,43 @@ class TrackedArena final : public Arena { uint8_t* GetFirstObject(uint8_t* addr) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_LE(Begin(), addr); DCHECK_GT(End(), addr); - return first_obj_array_[(addr - Begin()) / kPageSize]; + if (first_obj_array_.get() != nullptr) { + return first_obj_array_[(addr - Begin()) / kPageSize]; + } else { + // The pages of this arena contain array of GC-roots. So we don't need + // first-object of any given page of the arena. + // Returning null helps distinguish which visitor is to be called. + return nullptr; + } } // Set 'obj_begin' in first_obj_array_ in every element for which it's the // first object. void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end); + // Setup the arena for deferred deletion. + void SetupForDeferredDeletion(TrackedArena* next_arena) { + DCHECK(next_arena->waiting_for_deletion_); + DCHECK(!waiting_for_deletion_); + waiting_for_deletion_ = true; + next_ = next_arena; + } + bool IsWaitingForDeletion() const { return waiting_for_deletion_; } + // Madvise the pages in the given range. 'begin' is expected to be page + // aligned. + // TODO: Remove this once we remove the shmem (minor-fault) code in + // userfaultfd GC and directly use ZeroAndReleaseMemory(). + static void ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork); void Release() override; bool IsPreZygoteForkArena() const { return pre_zygote_fork_; } + bool IsSingleObjectArena() const { return first_obj_array_.get() == nullptr; } private: // first_obj_array_[i] is the object that overlaps with the ith page's // beginning, i.e. first_obj_array_[i] <= ith page_begin. std::unique_ptr<uint8_t*[]> first_obj_array_; const bool pre_zygote_fork_; + bool waiting_for_deletion_; }; // An arena-pool wherein allocations can be tracked so that the GC can visit all @@ -101,15 +145,24 @@ class GcVisitedArenaPool final : public ArenaPool { bool is_zygote = false, const char* name = "LinearAlloc"); virtual ~GcVisitedArenaPool(); - Arena* AllocArena(size_t size) override; - void FreeArenaChain(Arena* first) override; - size_t GetBytesAllocated() const override; + + Arena* AllocArena(size_t size, bool need_first_obj_arr) REQUIRES(lock_); + // Use by arena allocator. + Arena* AllocArena(size_t size) override REQUIRES(!lock_) { + WriterMutexLock wmu(Thread::Current(), lock_); + return AllocArena(size, /*single_obj_arena=*/false); + } + void FreeArenaChain(Arena* first) override REQUIRES(!lock_); + size_t GetBytesAllocated() const override REQUIRES(!lock_); void ReclaimMemory() override {} void LockReclaimMemory() override {} void TrimMaps() override {} - bool Contains(void* ptr) { - std::lock_guard<std::mutex> lock(lock_); + uint8_t* AllocSingleObjArena(size_t size) REQUIRES(!lock_); + void FreeSingleObjArena(uint8_t* addr) REQUIRES(!lock_); + + bool Contains(void* ptr) REQUIRES(!lock_) { + ReaderMutexLock rmu(Thread::Current(), lock_); for (auto& map : maps_) { if (map.HasAddress(ptr)) { return true; @@ -119,51 +172,43 @@ class GcVisitedArenaPool final : public ArenaPool { } template <typename PageVisitor> - void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) { - std::lock_guard<std::mutex> lock(lock_); + void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_, lock_) { for (auto& arena : allocated_arenas_) { - arena.VisitRoots(visitor); + arena->VisitRoots(visitor); } } template <typename Callback> - void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_) { - std::lock_guard<std::mutex> lock(lock_); + void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_, lock_) { + // We should not have any unused arenas when calling this function. + CHECK(unused_arenas_ == nullptr); for (auto& arena : allocated_arenas_) { - cb(arena); + cb(*arena); } } // Called in Heap::PreZygoteFork(). All allocations after this are done in // arena-pool which is visited by userfaultfd. - void SetupPostZygoteMode() { - std::lock_guard<std::mutex> lock(lock_); + void SetupPostZygoteMode() REQUIRES(!lock_) { + WriterMutexLock wmu(Thread::Current(), lock_); DCHECK(pre_zygote_fork_); pre_zygote_fork_ = false; } // For userfaultfd GC to be able to acquire the lock to avoid concurrent // release of arenas when it is visiting them. - std::mutex& GetLock() { return lock_; } - - // Find the given arena in allocated_arenas_. The function is called with - // lock_ acquired. - bool FindAllocatedArena(const TrackedArena* arena) const NO_THREAD_SAFETY_ANALYSIS { - for (auto& allocated_arena : allocated_arenas_) { - if (arena == &allocated_arena) { - return true; - } - } - return false; - } + ReaderWriterMutex& GetLock() const RETURN_CAPABILITY(lock_) { return lock_; } - void ClearArenasFreed() { - std::lock_guard<std::mutex> lock(lock_); - arenas_freed_ = false; + // Called in the compaction pause to indicate that all arenas that will be + // freed until compaction is done shouldn't delete the TrackedArena object to + // avoid ABA problem. Called with lock_ acquired. + void DeferArenaFreeing() REQUIRES(lock_) { + CHECK(unused_arenas_ == nullptr); + defer_arena_freeing_ = true; } - // The function is called with lock_ acquired. - bool AreArenasFreed() const NO_THREAD_SAFETY_ANALYSIS { return arenas_freed_; } + // Clear defer_arena_freeing_ and delete all unused arenas. + void DeleteUnusedArenas() REQUIRES(!lock_); private: void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_); @@ -197,31 +242,40 @@ class GcVisitedArenaPool final : public ArenaPool { } }; - class LessByArenaAddr { + class TrackedArenaEquals { public: - bool operator()(const TrackedArena& a, const TrackedArena& b) const { - return std::less<uint8_t*>{}(a.Begin(), b.Begin()); + bool operator()(const TrackedArena* a, const TrackedArena* b) const { + return std::equal_to<uint8_t*>{}(a->Begin(), b->Begin()); } }; - // Use a std::mutex here as Arenas are second-from-the-bottom when using MemMaps, and MemMap - // itself uses std::mutex scoped to within an allocate/free only. - mutable std::mutex lock_; + class TrackedArenaHash { + public: + size_t operator()(const TrackedArena* arena) const { + return std::hash<size_t>{}(reinterpret_cast<uintptr_t>(arena->Begin()) / kPageSize); + } + }; + using AllocatedArenaSet = + HashSet<TrackedArena*, DefaultEmptyFn<TrackedArena*>, TrackedArenaHash, TrackedArenaEquals>; + + mutable ReaderWriterMutex lock_; std::vector<MemMap> maps_ GUARDED_BY(lock_); std::set<Chunk*, LessByChunkSize> best_fit_allocs_ GUARDED_BY(lock_); std::set<Chunk*, LessByChunkAddr> free_chunks_ GUARDED_BY(lock_); // Set of allocated arenas. It's required to be able to find the arena // corresponding to a given address. - // TODO: consider using HashSet, which is more memory efficient. - std::set<TrackedArena, LessByArenaAddr> allocated_arenas_ GUARDED_BY(lock_); + AllocatedArenaSet allocated_arenas_ GUARDED_BY(lock_); // Number of bytes allocated so far. size_t bytes_allocated_ GUARDED_BY(lock_); + // To hold arenas that are freed while GC is happening. These are kept until + // the end of GC to avoid ABA problem. + TrackedArena* unused_arenas_ GUARDED_BY(lock_); const char* name_; // Flag to indicate that some arenas have been freed. This flag is used as an // optimization by GC to know if it needs to find if the arena being visited // has been freed or not. The flag is cleared in the compaction pause and read // when linear-alloc space is concurrently visited updated to update GC roots. - bool arenas_freed_ GUARDED_BY(lock_); + bool defer_arena_freeing_ GUARDED_BY(lock_); const bool low_4gb_; // Set to true in zygote process so that all linear-alloc allocations are in // private-anonymous mappings and not on userfaultfd visited pages. At @@ -232,6 +286,55 @@ class GcVisitedArenaPool final : public ArenaPool { DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool); }; +// Allocator for class-table and intern-table hash-sets. It enables updating the +// roots concurrently page-by-page. +template <class T, AllocatorTag kTag> +class GcRootArenaAllocator : public TrackingAllocator<T, kTag> { + public: + using value_type = typename TrackingAllocator<T, kTag>::value_type; + using size_type = typename TrackingAllocator<T, kTag>::size_type; + using difference_type = typename TrackingAllocator<T, kTag>::difference_type; + using pointer = typename TrackingAllocator<T, kTag>::pointer; + using const_pointer = typename TrackingAllocator<T, kTag>::const_pointer; + using reference = typename TrackingAllocator<T, kTag>::reference; + using const_reference = typename TrackingAllocator<T, kTag>::const_reference; + + // Used internally by STL data structures. + template <class U> + explicit GcRootArenaAllocator( + [[maybe_unused]] const GcRootArenaAllocator<U, kTag>& alloc) noexcept {} + // Used internally by STL data structures. + GcRootArenaAllocator() noexcept : TrackingAllocator<T, kTag>() {} + + // Enables an allocator for objects of one type to allocate storage for objects of another type. + // Used internally by STL data structures. + template <class U> + struct rebind { + using other = GcRootArenaAllocator<U, kTag>; + }; + + pointer allocate(size_type n, [[maybe_unused]] const_pointer hint = 0) { + if (!gUseUserfaultfd) { + return TrackingAllocator<T, kTag>::allocate(n); + } + size_t size = n * sizeof(T); + GcVisitedArenaPool* pool = + down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); + return reinterpret_cast<pointer>(pool->AllocSingleObjArena(size)); + } + + template <typename PT> + void deallocate(PT p, size_type n) { + if (!gUseUserfaultfd) { + TrackingAllocator<T, kTag>::deallocate(p, n); + return; + } + GcVisitedArenaPool* pool = + down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); + pool->FreeSingleObjArena(reinterpret_cast<uint8_t*>(p)); + } +}; + } // namespace art #endif // ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 9d175cdba5..d4666e5f9c 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -2426,17 +2426,9 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { boot_class_table_->VisitRoots(root_visitor); // If tracing is enabled, then mark all the class loaders to prevent unloading. if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) { - gc::Heap* const heap = Runtime::Current()->GetHeap(); - // Don't visit class-loaders if compacting with userfaultfd GC as these - // weaks are updated using Runtime::SweepSystemWeaks() and the GC doesn't - // tolerate double updates. - if (!heap->IsPerformingUffdCompaction()) { - for (const ClassLoaderData& data : class_loaders_) { - GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); - root.VisitRoot(visitor, RootInfo(kRootVMInternal)); - } - } else { - DCHECK_EQ(heap->CurrentCollectorType(), gc::CollectorType::kCollectorTypeCMC); + for (const ClassLoaderData& data : class_loaders_) { + GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); + root.VisitRoot(visitor, RootInfo(kRootVMInternal)); } } } else if (!gUseReadBarrier && (flags & kVisitRootFlagNewRoots) != 0) { @@ -2476,9 +2468,11 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { // Keep in sync with InitCallback. Anything we visit, we need to // reinit references to when reinitializing a ClassLinker from a // mapped image. -void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) { +void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots) { class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal)); - VisitClassRoots(visitor, flags); + if (visit_class_roots) { + VisitClassRoots(visitor, flags); + } // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class // unloading if we are marking roots. DropFindArrayClassCache(); diff --git a/runtime/class_linker.h b/runtime/class_linker.h index 3aae0422c1..c81713b116 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -514,7 +514,7 @@ class ClassLinker { void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_) REQUIRES_SHARED(Locks::mutator_lock_); - void VisitRoots(RootVisitor* visitor, VisitRootFlags flags) + void VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots = true) REQUIRES(!Locks::dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_) REQUIRES_SHARED(Locks::mutator_lock_); // Visits all dex-files accessible by any class-loader or the BCP. @@ -866,6 +866,9 @@ class ClassLinker { // Enable or disable public sdk checks. virtual void SetEnablePublicSdkChecks(bool enabled); void RemoveDexFromCaches(const DexFile& dex_file); + ClassTable* GetBootClassTable() REQUIRES_SHARED(Locks::classlinker_classes_lock_) { + return boot_class_table_.get(); + } protected: virtual bool InitializeClass(Thread* self, diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h index ecc8a0a620..4ee59a79f7 100644 --- a/runtime/class_table-inl.h +++ b/runtime/class_table-inl.h @@ -68,12 +68,14 @@ inline bool ClassTable::ClassDescriptorEquals::operator()(const TableSlot& a, return a.Read<kWithoutReadBarrier>()->DescriptorEquals(b.first); } -template<class Visitor> -void ClassTable::VisitRoots(Visitor& visitor) { +template <class Visitor> +void ClassTable::VisitRoots(Visitor& visitor, bool skip_classes) { ReaderMutexLock mu(Thread::Current(), lock_); - for (ClassSet& class_set : classes_) { - for (TableSlot& table_slot : class_set) { - table_slot.VisitRoot(visitor); + if (!skip_classes) { + for (ClassSet& class_set : classes_) { + for (TableSlot& table_slot : class_set) { + table_slot.VisitRoot(visitor); + } } } for (GcRoot<mirror::Object>& root : strong_roots_) { @@ -86,12 +88,14 @@ void ClassTable::VisitRoots(Visitor& visitor) { } } -template<class Visitor> -void ClassTable::VisitRoots(const Visitor& visitor) { +template <class Visitor> +void ClassTable::VisitRoots(const Visitor& visitor, bool skip_classes) { ReaderMutexLock mu(Thread::Current(), lock_); - for (ClassSet& class_set : classes_) { - for (TableSlot& table_slot : class_set) { - table_slot.VisitRoot(visitor); + if (!skip_classes) { + for (ClassSet& class_set : classes_) { + for (TableSlot& table_slot : class_set) { + table_slot.VisitRoot(visitor); + } } } for (GcRoot<mirror::Object>& root : strong_roots_) { @@ -104,6 +108,18 @@ void ClassTable::VisitRoots(const Visitor& visitor) { } } +template <class Condition, class Visitor> +void ClassTable::VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) { + ReaderMutexLock mu(Thread::Current(), lock_); + for (ClassSet& class_set : classes_) { + if (cond(class_set)) { + for (TableSlot& table_slot : class_set) { + table_slot.VisitRoot(visitor); + } + } + } +} + template <typename Visitor> class ClassTable::TableSlot::ClassAndRootVisitor { public: diff --git a/runtime/class_table.h b/runtime/class_table.h index 7e263737c3..54e066a18c 100644 --- a/runtime/class_table.h +++ b/runtime/class_table.h @@ -21,7 +21,7 @@ #include <utility> #include <vector> -#include "base/allocator.h" +#include "base/gc_visited_arena_pool.h" #include "base/hash_set.h" #include "base/macros.h" #include "base/mutex.h" @@ -151,7 +151,7 @@ class ClassTable { TableSlotEmptyFn, ClassDescriptorHash, ClassDescriptorEquals, - TrackingAllocator<TableSlot, kAllocatorTagClassTable>>; + GcRootArenaAllocator<TableSlot, kAllocatorTagClassTable>>; ClassTable(); @@ -181,7 +181,7 @@ class ClassTable { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the number of classes in the class table. + // Returns the number of class-sets in the class table. size_t Size() const REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); @@ -194,17 +194,13 @@ class ClassTable { REQUIRES_SHARED(Locks::mutator_lock_); // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock. - template<class Visitor> - void VisitRoots(Visitor& visitor) - NO_THREAD_SAFETY_ANALYSIS - REQUIRES(!lock_) - REQUIRES_SHARED(Locks::mutator_lock_); + template <class Visitor> + void VisitRoots(Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS + REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - template<class Visitor> - void VisitRoots(const Visitor& visitor) - NO_THREAD_SAFETY_ANALYSIS - REQUIRES(!lock_) - REQUIRES_SHARED(Locks::mutator_lock_); + template <class Visitor> + void VisitRoots(const Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS + REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); template<class Visitor> void VisitClassesAndRoots(Visitor& visitor) @@ -212,6 +208,10 @@ class ClassTable { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); + // Visit classes in those class-sets which satisfy 'cond'. + template <class Condition, class Visitor> + void VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) REQUIRES(!lock_) + REQUIRES_SHARED(Locks::mutator_lock_); // Stops visit if the visitor returns false. template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor> bool Visit(Visitor& visitor) diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 6509dbe3ef..8434433efc 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -303,9 +303,8 @@ static constexpr size_t kMaxNumUffdWorkers = 2; // Number of compaction buffers reserved for mutator threads in SIGBUS feature // case. It's extremely unlikely that we will ever have more than these number // of mutator threads trying to access the moving-space during one compaction -// phase. Using a lower number in debug builds to hopefully catch the issue -// before it becomes a problem on user builds. -static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 2048; +// phase. +static constexpr size_t kMutatorCompactionBufferCount = 2048; // Minimum from-space chunk to be madvised (during concurrent compaction) in one go. static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB; // Concurrent compaction termination logic is different (and slightly more efficient) if the @@ -2376,7 +2375,7 @@ void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { } void MarkCompact::UpdateNonMovingSpace() { - TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings()); // Iterating in reverse ensures that the class pointer in objects which span // across more than one page gets updated in the end. This is necessary for // VisitRefsForCompaction() to work correctly. @@ -2597,7 +2596,8 @@ class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor { REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) { ClassTable* const class_table = class_loader->GetClassTable(); if (class_table != nullptr) { - class_table->VisitRoots(*this); + // Classes are updated concurrently. + class_table->VisitRoots(*this, /*skip_classes=*/true); } } @@ -2621,8 +2621,10 @@ class MarkCompact::LinearAllocPageUpdater { public: explicit LinearAllocPageUpdater(MarkCompact* collector) : collector_(collector) {} - void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE + // Update a page in multi-object arena. + void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(first_obj != nullptr); DCHECK_ALIGNED(page_begin, kPageSize); uint8_t* page_end = page_begin + kPageSize; uint32_t obj_size; @@ -2652,6 +2654,28 @@ class MarkCompact::LinearAllocPageUpdater { last_page_touched_ = true; } + // This version is only used for cases where the entire page is filled with + // GC-roots. For example, class-table and intern-table. + void SingleObjectArena(uint8_t* page_begin, size_t page_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); + DCHECK_ALIGNED(page_begin, kAlignment); + // Least significant bits are used by class-table. + static constexpr uint32_t kMask = kObjectAlignment - 1; + size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>); + uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin); + for (size_t i = 0; i < num_roots; root_ptr++, i++) { + uint32_t word = *root_ptr; + if (word != 0) { + uint32_t lsbs = word & kMask; + word &= ~kMask; + VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word)); + *root_ptr = word | lsbs; + last_page_touched_ = true; + } + } + } + bool WasLastPageTouched() const { return last_page_touched_; } void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const @@ -2670,7 +2694,8 @@ class MarkCompact::LinearAllocPageUpdater { if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) { new_ref = collector_->PostCompactBlackObjAddr(old_ref); } else if (collector_->live_words_bitmap_->Test(old_ref)) { - DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref; + DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) + << "ref:" << old_ref << " root:" << root; new_ref = collector_->PostCompactOldObjAddr(old_ref); } if (old_ref != new_ref) { @@ -2735,9 +2760,34 @@ class MarkCompact::LinearAllocPageUpdater { MarkCompact* const collector_; // Whether the last page was touched or not. - bool last_page_touched_; + bool last_page_touched_ = false; }; +void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) { + // If the process is debuggable then redefinition is allowed, which may mean + // pre-zygote-fork class-tables may have pointer to class in moving-space. + // So visit classes from class-sets that are not in linear-alloc arena-pool. + if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) { + ClassLinker* linker = runtime->GetClassLinker(); + ClassLoaderRootsUpdater updater(this); + GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); + auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool { + if (!set.empty()) { + return immune_class_table_only ? + immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) : + !pool->Contains(reinterpret_cast<void*>(&*set.begin())); + } + return false; + }; + linker->VisitClassTables([cond, &updater](ClassTable* table) + REQUIRES_SHARED(Locks::mutator_lock_) { + table->VisitClassesIfConditionMet(cond, updater); + }); + ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); + linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater); + } +} + void MarkCompact::CompactionPause() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime* runtime = Runtime::Current(); @@ -2791,65 +2841,6 @@ void MarkCompact::CompactionPause() { heap_->GetReferenceProcessor()->UpdateRoots(this); } { - TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings()); - ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); - { - ClassLoaderRootsUpdater updater(this); - runtime->GetClassLinker()->VisitClassLoaders(&updater); - } - } - - bool has_zygote_space = heap_->HasZygoteSpace(); - // TODO: Find out why it's not sufficient to visit native roots of immune - // spaces, and why all the pre-zygote fork arenas have to be linearly updated. - // Is it possible that some native root starts getting pointed to by some object - // in moving space after fork? Or are we missing a write-barrier somewhere - // when a native root is updated? - GcVisitedArenaPool* arena_pool = - static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); - if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { - // Besides fallback-mode, visit linear-alloc space in the pause for zygote - // processes prior to first fork (that's when zygote space gets created). - if (kIsDebugBuild && IsValidFd(uffd_)) { - // All arenas allocated so far are expected to be pre-zygote fork. - arena_pool->ForEachAllocatedArena( - [](const TrackedArena& arena) - REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); - } - LinearAllocPageUpdater updater(this); - arena_pool->VisitRoots(updater); - } else { - // Clear the flag as we care about this only if arenas are freed during - // concurrent compaction. - arena_pool->ClearArenasFreed(); - arena_pool->ForEachAllocatedArena( - [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { - // The pre-zygote fork arenas are not visited concurrently in the - // zygote children processes. The native roots of the dirty objects - // are visited during immune space visit below. - if (!arena.IsPreZygoteForkArena()) { - uint8_t* last_byte = arena.GetLastUsedByte(); - CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); - } else { - LinearAllocPageUpdater updater(this); - arena.VisitRoots(updater); - } - }); - } - - SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true); - - { - TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings()); - runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); - } - { - // TODO: don't visit the transaction roots if it's not active. - TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings()); - runtime->VisitNonThreadRoots(this); - } - - { // TODO: Immune space updation has to happen either before or after // remapping pre-compact pages to from-space. And depending on when it's // done, we have to invoke VisitRefsForCompaction() with or without @@ -2880,11 +2871,98 @@ void MarkCompact::CompactionPause() { } } - if (use_uffd_sigbus_) { - // Release order wrt to mutator threads' SIGBUS handler load. - sigbus_in_progress_count_.store(0, std::memory_order_release); + { + TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings()); + runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); + runtime->VisitNonThreadRoots(this); + { + ClassLinker* linker = runtime->GetClassLinker(); + ClassLoaderRootsUpdater updater(this); + ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); + linker->VisitClassLoaders(&updater); + linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true); + } + SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true); + + bool has_zygote_space = heap_->HasZygoteSpace(); + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); + // Update immune/pre-zygote class-tables in case class redefinition took + // place. pre-zygote class-tables that are not in immune spaces are updated + // below if we are in fallback-mode or if there is no zygote space. So in + // that case only visit class-tables that are there in immune-spaces. + UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space); + + // Acquire arena-pool's lock, which should be released after the pool is + // userfaultfd registered. This is to ensure that no new arenas are + // allocated and used in between. Since they will not be captured in + // linear_alloc_arenas_ below, we will miss updating their pages. The same + // reason also applies to new allocations within the existing arena which + // may change last_byte. + // Since we are in a STW pause, this shouldn't happen anyways, but holding + // the lock confirms it. + // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it + // doesn't fail once it is available for ReaderWriterMutex. + WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock()); + + // TODO: Find out why it's not sufficient to visit native roots of immune + // spaces, and why all the pre-zygote fork arenas have to be linearly updated. + // Is it possible that some native root starts getting pointed to by some object + // in moving space after fork? Or are we missing a write-barrier somewhere + // when a native root is updated? + auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + LinearAllocPageUpdater updater(this); + if (first_obj != nullptr) { + updater.MultiObjectArena(page_begin, first_obj); + } else { + updater.SingleObjectArena(page_begin, page_size); + } + }; + if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { + // Besides fallback-mode, visit linear-alloc space in the pause for zygote + // processes prior to first fork (that's when zygote space gets created). + if (kIsDebugBuild && IsValidFd(uffd_)) { + // All arenas allocated so far are expected to be pre-zygote fork. + arena_pool->ForEachAllocatedArena( + [](const TrackedArena& arena) + REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); + } + arena_pool->VisitRoots(arena_visitor); + } else { + // Inform the arena-pool that compaction is going on. So the TrackedArena + // objects corresponding to the arenas that are freed shouldn't be deleted + // immediately. We will do that in FinishPhase(). This is to avoid ABA + // problem. + arena_pool->DeferArenaFreeing(); + arena_pool->ForEachAllocatedArena( + [this, arena_visitor, has_zygote_space](const TrackedArena& arena) + REQUIRES_SHARED(Locks::mutator_lock_) { + // The pre-zygote fork arenas are not visited concurrently in the + // zygote children processes. The native roots of the dirty objects + // are visited during immune space visit below. + if (!arena.IsPreZygoteForkArena()) { + uint8_t* last_byte = arena.GetLastUsedByte(); + auto ret = linear_alloc_arenas_.insert({&arena, last_byte}); + CHECK(ret.second); + } else if (!arena.IsSingleObjectArena() || !has_zygote_space) { + // Pre-zygote class-table and intern-table don't need to be updated. + // TODO: Explore the possibility of using /proc/self/pagemap to + // fetch which pages in these arenas are private-dirty and then only + // visit those pages. To optimize it further, we can keep all + // pre-zygote arenas in a single memory range so that just one read + // from pagemap is sufficient. + arena.VisitRoots(arena_visitor); + } + }); + } + if (use_uffd_sigbus_) { + // Release order wrt to mutator threads' SIGBUS handler load. + sigbus_in_progress_count_.store(0, std::memory_order_release); + } + KernelPreparation(); } - KernelPreparation(); + UpdateNonMovingSpace(); // fallback mode if (uffd_ == kFallbackMode) { @@ -2950,7 +3028,7 @@ void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, } void MarkCompact::KernelPreparation() { - TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); + TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings()); uint8_t* moving_space_begin = bump_pointer_space_->Begin(); size_t moving_space_size = bump_pointer_space_->Capacity(); int mode = kCopyMode; @@ -3377,14 +3455,22 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter) : linear_alloc_arenas_.end(); } - if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { + // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here + // because a thread trying to access the page and as a result causing this + // userfault confirms that nobody can delete the corresponding arena and + // release its pages. + // NOTE: We may have some memory range be recycled several times during a + // compaction cycle, thereby potentially causing userfault on the same page + // several times. That's not a problem as all of them (except for possibly the + // first one) would require us mapping a zero-page, which we do without updating + // the 'state_arr'. + if (arena_iter == linear_alloc_arenas_.end() || + arena_iter->first->IsWaitingForDeletion() || + arena_iter->second <= fault_page) { // Fault page isn't in any of the arenas that existed before we started // compaction. So map zeropage and return. ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); } else { - // fault_page should always belong to some arena. - DCHECK(arena_iter != linear_alloc_arenas_.end()) - << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault; // Find the linear-alloc space containing fault-page LinearAllocSpaceData* space_data = nullptr; for (auto& data : linear_alloc_spaces_data_) { @@ -3409,10 +3495,15 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i if (state_arr[page_idx].compare_exchange_strong( state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { if (kMode == kCopyMode || is_minor_fault) { - uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); - DCHECK_NE(first_obj, nullptr); LinearAllocPageUpdater updater(this); - updater(fault_page + diff, first_obj + diff); + uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); + // null first_obj indicates that it's a page from arena for + // intern-table/class-table. So first object isn't required. + if (first_obj != nullptr) { + updater.MultiObjectArena(fault_page + diff, first_obj + diff); + } else { + updater.SingleObjectArena(fault_page + diff, kPageSize); + } if (kMode == kCopyMode) { MapUpdatedLinearAllocPage(fault_page, fault_page + diff, @@ -3481,6 +3572,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i void MarkCompact::ProcessLinearAlloc() { GcVisitedArenaPool* arena_pool = static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); + DCHECK_EQ(thread_running_gc_, Thread::Current()); for (auto& pair : linear_alloc_arenas_) { const TrackedArena* arena = pair.first; size_t arena_size; @@ -3488,12 +3580,15 @@ void MarkCompact::ProcessLinearAlloc() { ptrdiff_t diff; bool others_processing; { - // Acquire arena-pool's lock so that the arena being worked cannot be - // deallocated at the same time. - std::lock_guard<std::mutex> lock(arena_pool->GetLock()); + // Acquire arena-pool's lock (in shared-mode) so that the arena being updated + // does not get deleted at the same time. If this critical section is too + // long and impacts mutator response time, then we get rid of this lock by + // holding onto memory ranges of all deleted (since compaction pause) + // arenas until completion finishes. + ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock()); // If any arenas were freed since compaction pause then skip them from // visiting. - if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) { + if (arena->IsWaitingForDeletion()) { continue; } uint8_t* last_byte = pair.second; @@ -3509,11 +3604,12 @@ void MarkCompact::ProcessLinearAlloc() { break; } } - DCHECK_NE(space_data, nullptr); + CHECK_NE(space_data, nullptr); diff = space_data->shadow_.Begin() - space_data->begin_; auto visitor = [space_data, last_byte, diff, this, &others_processing]( uint8_t* page_begin, - uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* first_obj, + size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) { // No need to process pages past last_byte as they already have updated // gc-roots, if any. if (page_begin >= last_byte) { @@ -3532,7 +3628,14 @@ void MarkCompact::ProcessLinearAlloc() { // reason, we used 'release' order for changing the state to 'processed'. if (state_arr[page_idx].compare_exchange_strong( expected_state, desired_state, std::memory_order_acquire)) { - updater(page_begin + diff, first_obj + diff); + // null first_obj indicates that it's a page from arena for + // intern-table/class-table. So first object isn't required. + if (first_obj != nullptr) { + updater.MultiObjectArena(page_begin + diff, first_obj + diff); + } else { + DCHECK_EQ(page_size, kPageSize); + updater.SingleObjectArena(page_begin + diff, page_size); + } expected_state = PageState::kProcessing; if (!minor_fault_initialized_) { MapUpdatedLinearAllocPage( @@ -4333,6 +4436,9 @@ void MarkCompact::FinishPhase() { DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno); DCHECK_EQ(buf.st_blocks, 0u); } + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); + arena_pool->DeleteUnusedArenas(); } } // namespace collector diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 97709b8cd4..c75cfb56bb 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -543,6 +543,11 @@ class MarkCompact final : public GarbageCollector { // Initialize all the info-map related fields of this GC. Returns total size // of all the structures in info-map. size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz); + // Update class-table classes in compaction pause if we are running in debuggable + // mode. Only visit class-table in image spaces if 'immune_class_table_only' + // is true. + void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) + REQUIRES_SHARED(Locks::mutator_lock_); // For checkpoints Barrier gc_barrier_; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 796f8fbf09..329777c7d7 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2451,9 +2451,11 @@ void Heap::PreZygoteFork() { return; } Runtime* runtime = Runtime::Current(); + // Setup linear-alloc pool for post-zygote fork allocations before freezing + // snapshots of intern-table and class-table. + runtime->SetupLinearAllocForPostZygoteFork(self); runtime->GetInternTable()->AddNewTable(); runtime->GetClassLinker()->MoveClassTableToPreZygote(); - runtime->SetupLinearAllocForPostZygoteFork(self); VLOG(heap) << "Starting PreZygoteFork"; // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote // there. diff --git a/runtime/intern_table.h b/runtime/intern_table.h index d115f51f7e..0fcada1af4 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_INTERN_TABLE_H_ #define ART_RUNTIME_INTERN_TABLE_H_ -#include "base/allocator.h" #include "base/dchecked_vector.h" +#include "base/gc_visited_arena_pool.h" #include "base/hash_set.h" #include "base/mutex.h" #include "gc/weak_root_state.h" @@ -109,11 +109,12 @@ class InternTable { } }; - using UnorderedSet = HashSet<GcRoot<mirror::String>, - GcRootEmptyFn, - StringHash, - StringEquals, - TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>; + using UnorderedSet = + HashSet<GcRoot<mirror::String>, + GcRootEmptyFn, + StringHash, + StringEquals, + GcRootArenaAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>; InternTable(); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 9c3b9de70c..97db5fb0fd 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -859,7 +859,11 @@ void Runtime::CallExitHook(jint status) { } void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) { - GetInternTable()->SweepInternTableWeaks(visitor); + // Userfaultfd compaction updates weak intern-table page-by-page via + // LinearAlloc. + if (!GetHeap()->IsPerformingUffdCompaction()) { + GetInternTable()->SweepInternTableWeaks(visitor); + } GetMonitorList()->SweepMonitorList(visitor); GetJavaVM()->SweepJniWeakGlobals(visitor); GetHeap()->SweepAllocationRecords(visitor); @@ -2590,8 +2594,14 @@ void Runtime::VisitConstantRoots(RootVisitor* visitor) { } void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) { - intern_table_->VisitRoots(visitor, flags); - class_linker_->VisitRoots(visitor, flags); + // Userfaultfd compaction updates intern-tables and class-tables page-by-page + // via LinearAlloc. So don't visit them here. + if (GetHeap()->IsPerformingUffdCompaction()) { + class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/false); + } else { + intern_table_->VisitRoots(visitor, flags); + class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/true); + } jni_id_manager_->VisitRoots(visitor); heap_->VisitAllocationRecords(visitor); if (jit_ != nullptr) { |