diff options
| author | 2014-04-29 14:37:57 -0700 | |
|---|---|---|
| committer | 2014-04-30 15:23:36 -0700 | |
| commit | 0651d41e41341fb2e9ef3ee41dc1f1bfc832dbbb (patch) | |
| tree | ad060fe7f37f4958d15d15d868e0cf6fb074d2cb | |
| parent | adcfc69aa94cc1d406ef78e194b1ac36e389ad95 (diff) | |
Add thread unsafe allocation methods to spaces.
Used by SS/GSS collectors since these run with mutators suspended and
only allocate from a single thread. Added AllocThreadUnsafe to
BumpPointerSpace and RosAllocSpace. Added AllocThreadUnsafe which uses
current runs as thread local runs for a thread unsafe allocation.
Added code to revoke current runs which are the same idx as thread
local runs.
Changed:
The number of thread local runs in each thread is now the the number
of thread local runs in RosAlloc instead of the number of size
brackets.
Total GC time / time on EvaluateAndApplyChanges.
TLAB SS:
Before: 36.7s / 7254
After: 16.1s / 4837
TLAB GSS:
Before: 6.9s / 3973
After: 5.7s / 3778
Bug: 8981901
Change-Id: Id1d264ade3799f431bf7ebbdcca6146aefbeb632
| -rw-r--r-- | runtime/gc/allocator/rosalloc-inl.h | 8 | ||||
| -rw-r--r-- | runtime/gc/allocator/rosalloc.cc | 224 | ||||
| -rw-r--r-- | runtime/gc/allocator/rosalloc.h | 24 | ||||
| -rw-r--r-- | runtime/gc/collector/semi_space.cc | 18 | ||||
| -rw-r--r-- | runtime/gc/collector/semi_space.h | 9 | ||||
| -rw-r--r-- | runtime/gc/heap.cc | 3 | ||||
| -rw-r--r-- | runtime/gc/space/bump_pointer_space-inl.h | 20 | ||||
| -rw-r--r-- | runtime/gc/space/bump_pointer_space.h | 5 | ||||
| -rw-r--r-- | runtime/gc/space/rosalloc_space-inl.h | 6 | ||||
| -rw-r--r-- | runtime/gc/space/rosalloc_space.cc | 2 | ||||
| -rw-r--r-- | runtime/gc/space/rosalloc_space.h | 11 | ||||
| -rw-r--r-- | runtime/gc/space/space.h | 8 | ||||
| -rw-r--r-- | runtime/thread.cc | 3 | ||||
| -rw-r--r-- | runtime/thread.h | 12 |
14 files changed, 233 insertions, 120 deletions
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index ac0f67bd69..c69ca48de8 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -23,11 +23,17 @@ namespace art { namespace gc { namespace allocator { +template<bool kThreadSafe> inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) { if (UNLIKELY(size > kLargeSizeThreshold)) { return AllocLargeObject(self, size, bytes_allocated); } - void* m = AllocFromRun(self, size, bytes_allocated); + void* m; + if (kThreadSafe) { + m = AllocFromRun(self, size, bytes_allocated); + } else { + m = AllocFromRunThreadUnsafe(self, size, bytes_allocated); + } // Check if the returned memory is really all zero. if (kCheckZeroMemory && m != nullptr) { byte* bytes = reinterpret_cast<byte*>(m); diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index ff59016423..f11303002c 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -67,11 +67,11 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity, << std::hex << (intptr_t)(base_ + capacity_) << ", capacity=" << std::dec << capacity_ << ", max_capacity=" << std::dec << max_capacity_; - memset(current_runs_, 0, sizeof(current_runs_)); for (size_t i = 0; i < kNumOfSizeBrackets; i++) { size_bracket_lock_names[i] = StringPrintf("an rosalloc size bracket %d lock", static_cast<int>(i)); size_bracket_locks_[i] = new Mutex(size_bracket_lock_names[i].c_str(), kRosAllocBracketLock); + current_runs_[i] = dedicated_full_run_; } DCHECK_EQ(footprint_, capacity_); size_t num_of_pages = footprint_ / kPageSize; @@ -548,7 +548,7 @@ RosAlloc::Run* RosAlloc::AllocRun(Thread* self, size_t idx) { DCHECK(!new_run->IsThreadLocal()); DCHECK_EQ(new_run->first_search_vec_idx_, 0U); DCHECK(!new_run->to_be_bulk_freed_); - if (kUsePrefetchDuringAllocRun && idx <= kMaxThreadLocalSizeBracketIdx) { + if (kUsePrefetchDuringAllocRun && idx < kNumThreadLocalSizeBrackets) { // Take ownership of the cache lines if we are likely to be thread local run. if (kPrefetchNewRunDataByZeroing) { // Zeroing the data is sometimes faster than prefetching but it increases memory usage @@ -584,6 +584,60 @@ RosAlloc::Run* RosAlloc::RefillRun(Thread* self, size_t idx) { return AllocRun(self, idx); } +void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) { + Run* current_run = current_runs_[idx]; + DCHECK(current_run != nullptr); + void* slot_addr = current_run->AllocSlot(); + if (UNLIKELY(slot_addr == nullptr)) { + // The current run got full. Try to refill it. + DCHECK(current_run->IsFull()); + if (kIsDebugBuild && current_run != dedicated_full_run_) { + full_runs_[idx].insert(current_run); + if (kTraceRosAlloc) { + LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run) + << " into full_runs_[" << std::dec << idx << "]"; + } + DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); + DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end()); + } + current_run = RefillRun(self, idx); + if (UNLIKELY(current_run == nullptr)) { + // Failed to allocate a new run, make sure that it is the dedicated full run. + current_runs_[idx] = dedicated_full_run_; + return nullptr; + } + DCHECK(current_run != nullptr); + DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); + DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end()); + current_run->SetIsThreadLocal(false); + current_runs_[idx] = current_run; + DCHECK(!current_run->IsFull()); + slot_addr = current_run->AllocSlot(); + // Must succeed now with a new run. + DCHECK(slot_addr != nullptr); + } + return slot_addr; +} + +void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) { + DCHECK_LE(size, kLargeSizeThreshold); + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + DCHECK_EQ(idx, SizeToIndex(size)); + DCHECK_EQ(bracket_size, IndexToBracketSize(idx)); + DCHECK_EQ(bracket_size, bracketSizes[idx]); + DCHECK_LE(size, bracket_size); + DCHECK(size > 512 || bracket_size - size < 16); + Locks::mutator_lock_->AssertExclusiveHeld(self); + void* slot_addr = AllocFromCurrentRunUnlocked(self, idx); + if (LIKELY(slot_addr != nullptr)) { + DCHECK(bytes_allocated != nullptr); + *bytes_allocated = bracket_size; + // Caller verifies that it is all 0. + } + return slot_addr; +} + void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) { DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; @@ -596,7 +650,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) void* slot_addr; - if (LIKELY(idx <= kMaxThreadLocalSizeBracketIdx)) { + if (LIKELY(idx < kNumThreadLocalSizeBrackets)) { // Use a thread-local run. Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); // Allow invalid since this will always fail the allocation. @@ -631,7 +685,6 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) // No slots got freed. Try to refill the thread-local run. DCHECK(thread_local_run->IsFull()); if (thread_local_run != dedicated_full_run_) { - self->SetRosAllocRun(idx, dedicated_full_run_); thread_local_run->SetIsThreadLocal(false); if (kIsDebugBuild) { full_runs_[idx].insert(thread_local_run); @@ -646,8 +699,9 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) } thread_local_run = RefillRun(self, idx); - if (UNLIKELY(thread_local_run == NULL)) { - return NULL; + if (UNLIKELY(thread_local_run == nullptr)) { + self->SetRosAllocRun(idx, dedicated_full_run_); + return nullptr; } DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); @@ -656,12 +710,12 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) DCHECK(!thread_local_run->IsFull()); } - DCHECK(thread_local_run != NULL); + DCHECK(thread_local_run != nullptr); DCHECK(!thread_local_run->IsFull()); DCHECK(thread_local_run->IsThreadLocal()); slot_addr = thread_local_run->AllocSlot(); // Must succeed now with a new run. - DCHECK(slot_addr != NULL); + DCHECK(slot_addr != nullptr); } if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) @@ -671,48 +725,7 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) } else { // Use the (shared) current run. MutexLock mu(self, *size_bracket_locks_[idx]); - Run* current_run = current_runs_[idx]; - if (UNLIKELY(current_run == NULL)) { - current_run = RefillRun(self, idx); - if (UNLIKELY(current_run == NULL)) { - return NULL; - } - DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); - DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end()); - current_run->SetIsThreadLocal(false); - current_runs_[idx] = current_run; - DCHECK(!current_run->IsFull()); - } - DCHECK(current_run != NULL); - slot_addr = current_run->AllocSlot(); - if (UNLIKELY(slot_addr == NULL)) { - // The current run got full. Try to refill it. - DCHECK(current_run->IsFull()); - current_runs_[idx] = NULL; - if (kIsDebugBuild) { - // Insert it into full_runs and set the current run to NULL. - full_runs_[idx].insert(current_run); - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run) - << " into full_runs_[" << std::dec << idx << "]"; - } - } - DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); - DCHECK(full_runs_[idx].find(current_run) != full_runs_[idx].end()); - current_run = RefillRun(self, idx); - if (UNLIKELY(current_run == NULL)) { - return NULL; - } - DCHECK(current_run != NULL); - DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end()); - DCHECK(full_runs_[idx].find(current_run) == full_runs_[idx].end()); - current_run->SetIsThreadLocal(false); - current_runs_[idx] = current_run; - DCHECK(!current_run->IsFull()); - slot_addr = current_run->AllocSlot(); - // Must succeed now with a new run. - DCHECK(slot_addr != NULL); - } + slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) @@ -741,7 +754,7 @@ size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { } if (LIKELY(run->IsThreadLocal())) { // It's a thread-local run. Just mark the thread-local free bit map and return. - DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx); + DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); run->MarkThreadLocalFreeBitMap(ptr); @@ -766,7 +779,7 @@ size_t RosAlloc::FreeFromRun(Thread* self, void* ptr, Run* run) { } } if (run == current_runs_[idx]) { - current_runs_[idx] = NULL; + current_runs_[idx] = dedicated_full_run_; } DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); @@ -1233,7 +1246,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) { size_t idx = run->size_bracket_idx_; MutexLock mu(self, *size_bracket_locks_[idx]); if (run->IsThreadLocal()) { - DCHECK_LE(run->size_bracket_idx_, kMaxThreadLocalSizeBracketIdx); + DCHECK_LT(run->size_bracket_idx_, kNumThreadLocalSizeBrackets); DCHECK(non_full_runs_[idx].find(run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(run) == full_runs_[idx].end()); run->UnionBulkFreeBitMapToThreadLocalFreeBitMap(); @@ -1627,7 +1640,7 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). WriterMutexLock wmu(self, bulk_free_lock_); - for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) { + for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); CHECK(thread_local_run != nullptr); @@ -1643,30 +1656,48 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { thread_local_run->MergeBulkFreeBitMapIntoAllocBitMap(); DCHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); DCHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); - if (thread_local_run->IsFull()) { - if (kIsDebugBuild) { - full_runs_[idx].insert(thread_local_run); - DCHECK(full_runs_[idx].find(thread_local_run) != full_runs_[idx].end()); - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex - << reinterpret_cast<intptr_t>(thread_local_run) - << " into full_runs_[" << std::dec << idx << "]"; - } - } - } else if (thread_local_run->IsAllFree()) { - MutexLock mu(self, lock_); - thread_local_run->ZeroHeader(); - FreePages(self, thread_local_run, true); - } else { - non_full_runs_[idx].insert(thread_local_run); - DCHECK(non_full_runs_[idx].find(thread_local_run) != non_full_runs_[idx].end()); - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::RevokeThreadLocalRuns() : Inserted run 0x" << std::hex - << reinterpret_cast<intptr_t>(thread_local_run) - << " into non_full_runs_[" << std::dec << idx << "]"; - } + RevokeRun(self, idx, thread_local_run); + } + } +} + +void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) { + size_bracket_locks_[idx]->AssertHeld(self); + DCHECK(run != dedicated_full_run_); + if (run->IsFull()) { + if (kIsDebugBuild) { + full_runs_[idx].insert(run); + DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end()); + if (kTraceRosAlloc) { + LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex + << reinterpret_cast<intptr_t>(run) + << " into full_runs_[" << std::dec << idx << "]"; } } + } else if (run->IsAllFree()) { + run->ZeroHeader(); + MutexLock mu(self, lock_); + FreePages(self, run, true); + } else { + non_full_runs_[idx].insert(run); + DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end()); + if (kTraceRosAlloc) { + LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex + << reinterpret_cast<intptr_t>(run) + << " into non_full_runs_[" << std::dec << idx << "]"; + } + } +} + +void RosAlloc::RevokeThreadUnsafeCurrentRuns() { + // Revoke the current runs which share the same idx as thread local runs. + Thread* self = Thread::Current(); + for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) { + MutexLock mu(self, *size_bracket_locks_[idx]); + if (current_runs_[idx] != dedicated_full_run_) { + RevokeRun(self, idx, current_runs_[idx]); + current_runs_[idx] = dedicated_full_run_; + } } } @@ -1679,6 +1710,7 @@ void RosAlloc::RevokeAllThreadLocalRuns() { for (Thread* thread : thread_list) { RevokeThreadLocalRuns(thread); } + RevokeThreadUnsafeCurrentRuns(); } void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { @@ -1686,7 +1718,7 @@ void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). WriterMutexLock wmu(self, bulk_free_lock_); - for (size_t idx = 0; idx < kNumOfSizeBrackets; idx++) { + for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); DCHECK(thread_local_run == nullptr || thread_local_run == dedicated_full_run_); @@ -1696,18 +1728,21 @@ void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { void RosAlloc::AssertAllThreadLocalRunsAreRevoked() { if (kIsDebugBuild) { - MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_); - MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_); + Thread* self = Thread::Current(); + MutexLock mu(self, *Locks::runtime_shutdown_lock_); + MutexLock mu2(self, *Locks::thread_list_lock_); std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); for (Thread* t : thread_list) { AssertThreadLocalRunsAreRevoked(t); } + for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; ++idx) { + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK_EQ(current_runs_[idx], dedicated_full_run_); + } } } void RosAlloc::Initialize() { - // Check the consistency of the number of size brackets. - DCHECK_EQ(Thread::kRosAllocNumOfSizeBrackets, kNumOfSizeBrackets); // bracketSizes. for (size_t i = 0; i < kNumOfSizeBrackets; i++) { if (i < kNumOfSizeBrackets - 2) { @@ -1911,15 +1946,34 @@ void RosAlloc::Verify() { break; } case kPageMapRunPart: - LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap(); - break; + // Fall-through. default: LOG(FATAL) << "Unreachable - page map type: " << pm << std::endl << DumpPageMap(); break; } } } - + std::list<Thread*> threads = Runtime::Current()->GetThreadList()->GetList(); + for (Thread* thread : threads) { + for (size_t i = 0; i < kNumThreadLocalSizeBrackets; ++i) { + MutexLock mu(self, *size_bracket_locks_[i]); + Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i)); + CHECK(thread_local_run != nullptr); + CHECK(thread_local_run->IsThreadLocal()); + CHECK(thread_local_run == dedicated_full_run_ || + thread_local_run->size_bracket_idx_ == i); + } + } + for (size_t i = 0; i < kNumOfSizeBrackets; i++) { + MutexLock mu(self, *size_bracket_locks_[i]); + Run* current_run = current_runs_[i]; + CHECK(current_run != nullptr); + if (current_run != dedicated_full_run_) { + // The dedicated full run is currently marked as thread local. + CHECK(!current_run->IsThreadLocal()); + CHECK_EQ(current_run->size_bracket_idx_, i); + } + } // Call Verify() here for the lock order. for (auto& run : runs) { run->Verify(self, this); @@ -1952,7 +2006,7 @@ void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) { std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); for (auto it = thread_list.begin(); it != thread_list.end(); ++it) { Thread* thread = *it; - for (size_t i = 0; i < kNumOfSizeBrackets; i++) { + for (size_t i = 0; i < kNumThreadLocalSizeBrackets; i++) { MutexLock mu(self, *rosalloc->size_bracket_locks_[i]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(i)); if (thread_local_run == this) { diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index f7fa2da236..21044f3b10 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -405,11 +405,6 @@ class RosAlloc { // at a page-granularity. static const size_t kLargeSizeThreshold = 2048; - // We use use thread-local runs for the size Brackets whose indexes - // are less than or equal to this index. We use shared (current) - // runs for the rest. - static const size_t kMaxThreadLocalSizeBracketIdx = 10; - // If true, check that the returned memory is actually zero. static constexpr bool kCheckZeroMemory = kIsDebugBuild; @@ -442,6 +437,10 @@ class RosAlloc { // The default value for page_release_size_threshold_. static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB; + // We use thread-local runs for the size Brackets whose indexes + // are less than this index. We use shared (current) runs for the rest. + static const size_t kNumThreadLocalSizeBrackets = 11; + private: // The base address of the memory region that's managed by this allocator. byte* base_; @@ -526,6 +525,12 @@ class RosAlloc { // Allocate/free a run slot. void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); + // Allocate/free a run slot without acquiring locks. + // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) + void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) + LOCKS_EXCLUDED(lock_); + void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx); + // Returns the bracket size. size_t FreeFromRun(Thread* self, void* ptr, Run* run) LOCKS_EXCLUDED(lock_); @@ -543,11 +548,20 @@ class RosAlloc { // Allocates large objects. void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); + // Revoke a run by adding it to non_full_runs_ or freeing the pages. + void RevokeRun(Thread* self, size_t idx, Run* run); + + // Revoke the current runs which share an index with the thread local runs. + void RevokeThreadUnsafeCurrentRuns(); + public: RosAlloc(void* base, size_t capacity, size_t max_capacity, PageReleaseMode page_release_mode, size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold); ~RosAlloc(); + // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. + // If used, this may cause race conditions if multiple threads are allocating at the same time. + template<bool kThreadSafe = true> void* Alloc(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); size_t Free(Thread* self, void* ptr) diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index 0b2601992d..b53ee10136 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -180,7 +180,9 @@ void SemiSpace::MarkingPhase() { runtime->SetFaultMessage(oss.str()); CHECK_EQ(self_->SetStateUnsafe(old_state), kRunnable); } - + // Revoke the thread local buffers since the GC may allocate into a RosAllocSpace and this helps + // to prevent fragmentation. + RevokeAllThreadLocalBuffers(); if (generational_) { if (gc_cause_ == kGcCauseExplicit || gc_cause_ == kGcCauseForNativeAlloc || clear_soft_references_) { @@ -332,11 +334,8 @@ void SemiSpace::UpdateAndMarkModUnion() { class SemiSpaceScanObjectVisitor { public: explicit SemiSpaceScanObjectVisitor(SemiSpace* ss) : semi_space_(ss) {} - void operator()(Object* obj) const SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) { - // TODO: fix NO_THREAD_SAFETY_ANALYSIS. ScanObject() requires an - // exclusive lock on the mutator lock, but - // SpaceBitmap::VisitMarkedRange() only requires the shared lock. + void operator()(Object* obj) const EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_, + Locks::heap_bitmap_lock_) { DCHECK(obj != nullptr); semi_space_->ScanObject(obj); } @@ -552,10 +551,11 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { // (pseudo-promote) it to the main free list space (as sort // of an old generation.) space::MallocSpace* promo_dest_space = GetHeap()->GetPrimaryFreeListSpace(); - forward_address = promo_dest_space->Alloc(self_, object_size, &bytes_allocated, nullptr); + forward_address = promo_dest_space->AllocThreadUnsafe(self_, object_size, &bytes_allocated, + nullptr); if (UNLIKELY(forward_address == nullptr)) { // If out of space, fall back to the to-space. - forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); } else { bytes_promoted_ += bytes_allocated; // Dirty the card at the destionation as it may contain @@ -599,7 +599,7 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { DCHECK(forward_address != nullptr); } else { // If it's allocated after the last GC (younger), copy it to the to-space. - forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); } ++objects_moved_; bytes_moved_ += bytes_allocated; diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index 9b6df16ec5..3b3e1b11d2 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -98,12 +98,10 @@ class SemiSpace : public GarbageCollector { // Returns the new address of the object. template<bool kPoisonReferences> void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); void ScanObject(mirror::Object* obj) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); void VerifyNoFromSpaceReferences(mirror::Object* obj) SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); @@ -150,8 +148,7 @@ class SemiSpace : public GarbageCollector { SHARED_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj) - EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_) - SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + EXCLUSIVE_LOCKS_REQUIRED(Locks::heap_bitmap_lock_, Locks::mutator_lock_); // Schedules an unmarked object for reference processing. void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 4d074f1f4b..b913d21b28 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2396,8 +2396,7 @@ void Heap::PreGcVerificationPaused(collector::GarbageCollector* gc) { } void Heap::PreGcVerification(collector::GarbageCollector* gc) { - if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_ || - verify_pre_gc_rosalloc_) { + if (verify_pre_gc_heap_ || verify_missing_card_marks_ || verify_mod_union_table_) { collector::GarbageCollector::ScopedPause pause(gc); PreGcVerificationPaused(gc); } diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 70ab64b90e..497a61f273 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -36,6 +36,26 @@ inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t return ret; } +inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, + size_t* bytes_allocated, + size_t* usable_size) { + Locks::mutator_lock_->AssertExclusiveHeld(self); + num_bytes = RoundUp(num_bytes, kAlignment); + if (end_ + num_bytes > growth_end_) { + return nullptr; + } + mirror::Object* obj = reinterpret_cast<mirror::Object*>(end_); + end_ += num_bytes; + *bytes_allocated = num_bytes; + // Use the CAS free versions as an optimization. + objects_allocated_ = objects_allocated_ + 1; + bytes_allocated_ = bytes_allocated_ + num_bytes; + if (UNLIKELY(usable_size != nullptr)) { + *usable_size = num_bytes; + } + return obj; +} + inline mirror::Object* BumpPointerSpace::AllocNonvirtualWithoutAccounting(size_t num_bytes) { DCHECK(IsAligned<kAlignment>(num_bytes)); byte* old_end; diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index e52a9a37f7..9e61f300be 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -48,6 +48,11 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size) OVERRIDE; + // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. + mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, + size_t* usable_size) + OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); + mirror::Object* AllocNonvirtual(size_t num_bytes); mirror::Object* AllocNonvirtualWithoutAccounting(size_t num_bytes); diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h index d270885f36..fbfef450fe 100644 --- a/runtime/gc/space/rosalloc_space-inl.h +++ b/runtime/gc/space/rosalloc_space-inl.h @@ -46,11 +46,15 @@ inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ return size_by_size; } +template<bool kThreadSafe> inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size) { size_t rosalloc_size = 0; + if (!kThreadSafe) { + Locks::mutator_lock_->AssertExclusiveHeld(self); + } mirror::Object* result = reinterpret_cast<mirror::Object*>( - rosalloc_->Alloc(self, num_bytes, &rosalloc_size)); + rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_size)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result) diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index f5c0e9495f..a1511e749e 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -159,7 +159,7 @@ mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, } // Note RosAlloc zeroes memory internally. // Return the new allocation or NULL. - CHECK(!kDebugSpaces || result == NULL || Contains(result)); + CHECK(!kDebugSpaces || result == nullptr || Contains(result)); return result; } diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h index a15673884a..2934af87c6 100644 --- a/runtime/gc/space/rosalloc_space.h +++ b/runtime/gc/space/rosalloc_space.h @@ -52,6 +52,11 @@ class RosAllocSpace : public MallocSpace { size_t* usable_size) OVERRIDE { return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); } + mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, + size_t* usable_size) + OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { + return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size); + } size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { return AllocationSizeNonvirtual(obj, usable_size); } @@ -65,6 +70,11 @@ class RosAllocSpace : public MallocSpace { // RosAlloc zeroes memory internally. return AllocCommon(self, num_bytes, bytes_allocated, usable_size); } + mirror::Object* AllocNonvirtualThreadUnsafe(Thread* self, size_t num_bytes, + size_t* bytes_allocated, size_t* usable_size) { + // RosAlloc zeroes memory internally. Pass in false for thread unsafe. + return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size); + } // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held. size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) @@ -116,6 +126,7 @@ class RosAllocSpace : public MallocSpace { size_t starting_size, size_t initial_size, bool low_memory_mode); private: + template<bool kThreadSafe = true> mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size); diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index 0a87a160b3..dcf5357de7 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -203,9 +203,17 @@ class AllocSpace { // Allocate num_bytes without allowing growth. If the allocation // succeeds, the output parameter bytes_allocated will be set to the // actually allocated bytes which is >= num_bytes. + // Alloc can be called from multiple threads at the same time and must be thread-safe. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size) = 0; + // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. + virtual mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, + size_t* usable_size) + EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { + return Alloc(self, num_bytes, bytes_allocated, usable_size); + } + // Return the storage space required by obj. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0; diff --git a/runtime/thread.cc b/runtime/thread.cc index 7470670d0b..e67a64f467 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -1018,7 +1018,8 @@ Thread::Thread(bool daemon) : tls32_(daemon), wait_monitor_(nullptr), interrupte tls32_.state_and_flags.as_struct.flags = 0; tls32_.state_and_flags.as_struct.state = kNative; memset(&tlsPtr_.held_mutexes[0], 0, sizeof(tlsPtr_.held_mutexes)); - std::fill(tlsPtr_.rosalloc_runs, tlsPtr_.rosalloc_runs + kRosAllocNumOfSizeBrackets, + std::fill(tlsPtr_.rosalloc_runs, + tlsPtr_.rosalloc_runs + gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets, gc::allocator::RosAlloc::GetDedicatedFullRun()); for (uint32_t i = 0; i < kMaxCheckpoints; ++i) { tlsPtr_.checkpoint_functions[i] = nullptr; diff --git a/runtime/thread.h b/runtime/thread.h index f8692855cf..8c17082cec 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -29,6 +29,7 @@ #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/portable/portable_entrypoints.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "gc/allocator/rosalloc.h" #include "globals.h" #include "jvalue.h" #include "object_callbacks.h" @@ -783,9 +784,6 @@ class Thread { return tlsPtr_.thread_local_objects; } - // ROS alloc TLS. - static constexpr size_t kRosAllocNumOfSizeBrackets = 34; - void* GetRosAllocRun(size_t index) const { return tlsPtr_.rosalloc_runs[index]; } @@ -1060,12 +1058,8 @@ class Thread { byte* thread_local_end; size_t thread_local_objects; - // Thread-local rosalloc runs. There are 34 size brackets in rosalloc - // runs (RosAlloc::kNumOfSizeBrackets). We can't refer to the - // RosAlloc class due to a header file circular dependency issue. - // To compensate, we check that the two values match at RosAlloc - // initialization time. - void* rosalloc_runs[kRosAllocNumOfSizeBrackets]; + // There are RosAlloc::kNumThreadLocalSizeBrackets thread-local size brackets per thread. + void* rosalloc_runs[gc::allocator::RosAlloc::kNumThreadLocalSizeBrackets]; // Thread-local allocation stack data/routines. mirror::Object** thread_local_alloc_stack_top; |