Rosalloc thread local allocation path without a cas.
Speedup on N4:
MemAllocTest 3044 -> 2396 (~21% reduction)
BinaryTrees 4101 -> 2929 (~26% reduction)
Bug: 9986565
Change-Id: Ia1d1a37b9e001f903c3c056e8ec68fc8c623a78b
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 3269e10..d1e7ad9 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -230,8 +230,10 @@
static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec);
// Returns true if all the slots in the run are not in use.
bool IsAllFree();
+ // Returns the number of free slots.
+ size_t NumberOfFreeSlots();
// Returns true if all the slots in the run are in use.
- bool IsFull();
+ ALWAYS_INLINE bool IsFull();
// Returns true if the bulk free bit map is clean.
bool IsBulkFreeBitmapClean();
// Returns true if the thread local free bit map is clean.
@@ -309,6 +311,15 @@
DCHECK(bracketSizes[idx] == size);
return idx;
+ // Returns true if the given allocation size is for a thread local allocation.
+ static bool IsSizeForThreadLocal(size_t size) {
+ DCHECK_GT(kNumThreadLocalSizeBrackets, 0U);
+ size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1;
+ bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx];
+ DCHECK(size > kLargeSizeThreshold ||
+ (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets)));
+ return is_size_for_thread_local;
+ }
// Rounds up the size up the nearest bracket size.
static size_t RoundToBracketSize(size_t size) {
DCHECK(size <= kLargeSizeThreshold);
@@ -504,11 +515,13 @@
size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_);
// Allocate/free a run slot.
- void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+ void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
+ size_t* bytes_tl_bulk_allocated)
// Allocate/free a run slot without acquiring locks.
// TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
- void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated)
+ void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated,
+ size_t* usable_size, size_t* bytes_tl_bulk_allocated)
void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx);
@@ -527,7 +540,9 @@
size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
// Allocates large objects.
- void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
+ void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated,
+ size_t* usable_size, size_t* bytes_tl_bulk_allocated)
// Revoke a run by adding it to non_full_runs_ or freeing the pages.
void RevokeRun(Thread* self, size_t idx, Run* run);
@@ -551,13 +566,26 @@
// If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
// If used, this may cause race conditions if multiple threads are allocating at the same time.
template<bool kThreadSafe = true>
- void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+ void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size,
+ size_t* bytes_tl_bulk_allocated)
size_t Free(Thread* self, void* ptr)
size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+ // Returns true if the given allocation request can be allocated in
+ // an existing thread local run without allocating a new run.
+ ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size);
+ // Allocate the given allocation request in an existing thread local
+ // run without allocating a new run.
+ ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated);
+ // Returns the maximum bytes that could be allocated for the given
+ // size in bulk, that is the maximum value for the
+ // bytes_allocated_bulk out param returned by RosAlloc::Alloc().
+ ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size);
// Returns the size of the allocated slot for a given allocated memory chunk.
size_t UsableSize(const void* ptr);
// Returns the size of the allocated slot for a given size.
@@ -586,9 +614,13 @@
void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
// Releases the thread-local runs assigned to the given thread back to the common set of runs.
- void RevokeThreadLocalRuns(Thread* thread);
+ // Returns the total bytes of free slots in the revoked thread local runs. This is to be
+ // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
+ size_t RevokeThreadLocalRuns(Thread* thread);
// Releases the thread-local runs assigned to all the threads back to the common set of runs.
- void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+ // Returns the total bytes of free slots in the revoked thread local runs. This is to be
+ // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting.
+ size_t RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
// Assert the thread local runs of a thread are revoked.
void AssertThreadLocalRunsAreRevoked(Thread* thread);
// Assert all the thread local runs are revoked.