A custom 'runs-of-slots' memory allocator.
Bug: 9986565
Change-Id: I0eb73b9458752113f519483616536d219d5f798b
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
new file mode 100644
index 0000000..5dda80c
--- /dev/null
+++ b/runtime/gc/allocator/rosalloc.h
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+#define ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_
+
+#include <set>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string>
+#include <sys/mman.h>
+#include <vector>
+
+#include "base/mutex.h"
+#include "base/logging.h"
+#include "globals.h"
+#include "utils.h"
+
+// A boilerplate to use hash_map/hash_set both on host and device.
+#ifdef HAVE_ANDROID_OS
+#include <hash_map>
+#include <hash_set>
+using std::hash_map;
+using std::hash_set;
+#else // HAVE_ANDROID_OS
+#ifdef __DEPRECATED
+#define ROSALLOC_OLD__DEPRECATED __DEPRECATED
+#undef __DEPRECATED
+#endif
+#include <ext/hash_map>
+#include <ext/hash_set>
+#ifdef ROSALLOC_OLD__DEPRECATED
+#define __DEPRECATED ROSALLOC_OLD__DEPRECATED
+#undef ROSALLOC_OLD__DEPRECATED
+#endif
+using __gnu_cxx::hash_map;
+using __gnu_cxx::hash_set;
+#endif // HAVE_ANDROID_OS
+
+namespace art {
+namespace gc {
+namespace allocator {
+
+// A Runs-of-slots memory allocator.
+class RosAlloc {
+ private:
+ // Rerepresents a run of free pages.
+ class FreePageRun {
+ public:
+ byte magic_num_; // The magic number used for debugging only.
+
+ bool IsFree() const {
+ if (kIsDebugBuild) {
+ return magic_num_ == kMagicNumFree;
+ }
+ return true;
+ }
+ size_t ByteSize(RosAlloc* rosalloc) const EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+ const byte* fpr_base = reinterpret_cast<const byte*>(this);
+ size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+ size_t byte_size = rosalloc->free_page_run_size_map_[pm_idx];
+ DCHECK_GE(byte_size, static_cast<size_t>(0));
+ DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+ return byte_size;
+ }
+ void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
+ EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+ DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+ byte* fpr_base = reinterpret_cast<byte*>(this);
+ size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
+ rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
+ }
+ void* Begin() {
+ return reinterpret_cast<void*>(this);
+ }
+ void* End(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+ byte* fpr_base = reinterpret_cast<byte*>(this);
+ byte* end = fpr_base + ByteSize(rosalloc);
+ return end;
+ }
+ void ReleasePages(RosAlloc* rosalloc) EXCLUSIVE_LOCKS_REQUIRED(rosalloc->lock_) {
+ size_t byte_size = ByteSize(rosalloc);
+ DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
+ if (kIsDebugBuild) {
+ // Exclude the first page that stores the magic number.
+ DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
+ byte_size -= kPageSize;
+ if (byte_size > 0) {
+ madvise(reinterpret_cast<byte*>(this) + kPageSize, byte_size, MADV_DONTNEED);
+ }
+ } else {
+ madvise(this, byte_size, MADV_DONTNEED);
+ }
+ }
+ };
+
+ // Represents a run of memory slots of the same size.
+ //
+ // A run's memory layout:
+ //
+ // +-------------------+
+ // | magic_num |
+ // +-------------------+
+ // | size_bracket_idx |
+ // +-------------------+
+ // | is_thread_local |
+ // +-------------------+
+ // | to_be_bulk_freed |
+ // +-------------------+
+ // | top_slot_idx |
+ // +-------------------+
+ // | |
+ // | alloc bit map |
+ // | |
+ // +-------------------+
+ // | |
+ // | bulk free bit map |
+ // | |
+ // +-------------------+
+ // | |
+ // | thread-local free |
+ // | bit map |
+ // | |
+ // +-------------------+
+ // | padding due to |
+ // | alignment |
+ // +-------------------+
+ // | slot 0 |
+ // +-------------------+
+ // | slot 1 |
+ // +-------------------+
+ // | slot 2 |
+ // +-------------------+
+ // ...
+ // +-------------------+
+ // | last slot |
+ // +-------------------+
+ //
+ class Run {
+ public:
+ byte magic_num_; // The magic number used for debugging.
+ byte size_bracket_idx_; // The index of the size bracket of this run.
+ byte is_thread_local_; // True if this run is used as a thread-local run.
+ byte to_be_bulk_freed_; // Used within BulkFree() to flag a run that's involved with a bulk free.
+ uint32_t top_slot_idx_; // The top slot index when this run is in bump index mode.
+ uint32_t alloc_bit_map_[0]; // The bit map that allocates if each slot is in use.
+
+ // bulk_free_bit_map_[] : The bit map that is used for GC to
+ // temporarily mark the slots to free without using a lock. After
+ // all the slots to be freed in a run are marked, all those slots
+ // get freed in bulk with one locking per run, as opposed to one
+ // locking per slot to minimize the lock contention. This is used
+ // within BulkFree().
+
+ // thread_local_free_bit_map_[] : The bit map that is used for GC
+ // to temporarily mark the slots to free in a thread-local run
+ // without using a lock (without synchronizing the thread that
+ // owns the thread-local run.) When the thread-local run becomes
+ // full, the thread will check this bit map and update the
+ // allocation bit map of the run (that is, the slots get freed.)
+
+ // Returns the byte size of the header except for the bit maps.
+ static size_t fixed_header_size() {
+ Run temp;
+ size_t size = reinterpret_cast<byte*>(&temp.alloc_bit_map_) - reinterpret_cast<byte*>(&temp);
+ DCHECK_EQ(size, static_cast<size_t>(8));
+ return size;
+ }
+ // Returns the base address of the free bit map.
+ uint32_t* bulk_free_bit_map() {
+ return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + bulkFreeBitMapOffsets[size_bracket_idx_]);
+ }
+ // Returns the base address of the thread local free bit map.
+ uint32_t* thread_local_free_bit_map() {
+ return reinterpret_cast<uint32_t*>(reinterpret_cast<byte*>(this) + threadLocalFreeBitMapOffsets[size_bracket_idx_]);
+ }
+ void* End() {
+ return reinterpret_cast<byte*>(this) + kPageSize * numOfPages[size_bracket_idx_];
+ }
+ // Frees slots in the allocation bit map with regard to the
+ // thread-local free bit map. Used when a thread-local run becomes
+ // full.
+ bool MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out);
+ // Frees slots in the allocation bit map with regard to the bulk
+ // free bit map. Used in a bulk free.
+ void MergeBulkFreeBitMapIntoAllocBitMap();
+ // Unions the slots to be freed in the free bit map into the
+ // thread-local free bit map. In a bulk free, as a two-step
+ // process, GC will first record all the slots to free in a run in
+ // the free bit map where it can write without a lock, and later
+ // acquire a lock once per run to union the bits of the free bit
+ // map to the thread-local free bit map.
+ void UnionBulkFreeBitMapToThreadLocalFreeBitMap();
+ // Allocates a slot in a run.
+ void* AllocSlot();
+ // Frees a slot in a run. This is used in a non-bulk free.
+ void FreeSlot(void* ptr);
+ // Marks the slots to free in the bulk free bit map.
+ void MarkBulkFreeBitMap(void* ptr);
+ // Marks the slots to free in the thread-local free bit map.
+ void MarkThreadLocalFreeBitMap(void* ptr);
+ // Returns true if all the slots in the run are not in use.
+ bool IsAllFree();
+ // Returns true if all the slots in the run are in use.
+ bool IsFull();
+ // Clear all the bit maps.
+ void ClearBitMaps();
+ // Iterate over all the slots and apply the given function.
+ void InspectAllSlots(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg), void* arg);
+ // Dump the run metadata for debugging.
+ void Dump();
+
+ private:
+ // The common part of MarkFreeBitMap() and MarkThreadLocalFreeBitMap().
+ void MarkFreeBitMapShared(void* ptr, uint32_t* free_bit_map_base, const char* caller_name);
+ };
+
+ // The magic number for a run.
+ static const byte kMagicNum = 42;
+ // The magic number for free pages.
+ static const byte kMagicNumFree = 43;
+ // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
+ static const size_t kNumOfSizeBrackets = 34;
+ // The number of smaller size brackets that are 16 bytes apart.
+ static const size_t kNumOfQuantumSizeBrackets = 32;
+ // The sizes (the slot sizes, in bytes) of the size brackets.
+ static size_t bracketSizes[kNumOfSizeBrackets];
+ // The numbers of pages that are used for runs for each size bracket.
+ static size_t numOfPages[kNumOfSizeBrackets];
+ // The numbers of slots of the runs for each size bracket.
+ static size_t numOfSlots[kNumOfSizeBrackets];
+ // The header sizes in bytes of the runs for each size bracket.
+ static size_t headerSizes[kNumOfSizeBrackets];
+ // The byte offsets of the bulk free bit maps of the runs for each size bracket.
+ static size_t bulkFreeBitMapOffsets[kNumOfSizeBrackets];
+ // The byte offsets of the thread-local free bit maps of the runs for each size bracket.
+ static size_t threadLocalFreeBitMapOffsets[kNumOfSizeBrackets];
+
+ // Initialize the run specs (the above arrays).
+ static void Initialize();
+ static bool initialized_;
+
+ // Returns the byte size of the bracket size from the index.
+ static size_t IndexToBracketSize(size_t idx) {
+ DCHECK(idx < kNumOfSizeBrackets);
+ return bracketSizes[idx];
+ }
+ // Returns the index of the size bracket from the bracket size.
+ static size_t BracketSizeToIndex(size_t size) {
+ DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+ size_t idx;
+ if (UNLIKELY(size == 1 * KB)) {
+ idx = kNumOfSizeBrackets - 2;
+ } else if (UNLIKELY(size == 2 * KB)) {
+ idx = kNumOfSizeBrackets - 1;
+ } else {
+ DCHECK(size < 1 * KB);
+ DCHECK_EQ(size % 16, static_cast<size_t>(0));
+ idx = size / 16 - 1;
+ }
+ DCHECK(bracketSizes[idx] == size);
+ return idx;
+ }
+ // Rounds up the size up the nearest bracket size.
+ static size_t RoundToBracketSize(size_t size) {
+ DCHECK(size <= kLargeSizeThreshold);
+ if (LIKELY(size <= 512)) {
+ return RoundUp(size, 16);
+ } else if (512 < size && size <= 1 * KB) {
+ return 1 * KB;
+ } else {
+ DCHECK(1 * KB < size && size <= 2 * KB);
+ return 2 * KB;
+ }
+ }
+ // Returns the size bracket index from the byte size with rounding.
+ static size_t SizeToIndex(size_t size) {
+ DCHECK(size <= kLargeSizeThreshold);
+ if (LIKELY(size <= 512)) {
+ return RoundUp(size, 16) / 16 - 1;
+ } else if (512 < size && size <= 1 * KB) {
+ return kNumOfSizeBrackets - 2;
+ } else {
+ DCHECK(1 * KB < size && size <= 2 * KB);
+ return kNumOfSizeBrackets - 1;
+ }
+ }
+ // A combination of SizeToIndex() and RoundToBracketSize().
+ static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
+ DCHECK(size <= kLargeSizeThreshold);
+ if (LIKELY(size <= 512)) {
+ size_t bracket_size = RoundUp(size, 16);
+ *bracket_size_out = bracket_size;
+ size_t idx = bracket_size / 16 - 1;
+ DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+ return idx;
+ } else if (512 < size && size <= 1 * KB) {
+ size_t bracket_size = 1024;
+ *bracket_size_out = bracket_size;
+ size_t idx = kNumOfSizeBrackets - 2;
+ DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+ return idx;
+ } else {
+ DCHECK(1 * KB < size && size <= 2 * KB);
+ size_t bracket_size = 2048;
+ *bracket_size_out = bracket_size;
+ size_t idx = kNumOfSizeBrackets - 1;
+ DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
+ return idx;
+ }
+ }
+ // Returns the page map index from an address. Requires that the
+ // address is page size aligned.
+ size_t ToPageMapIndex(const void* addr) const {
+ DCHECK(base_ <= addr && addr < base_ + capacity_);
+ size_t byte_offset = reinterpret_cast<const byte*>(addr) - base_;
+ DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
+ return byte_offset / kPageSize;
+ }
+ // Returns the page map index from an address with rounding.
+ size_t RoundDownToPageMapIndex(void* addr) {
+ DCHECK(base_ <= addr && addr < reinterpret_cast<byte*>(base_) + capacity_);
+ return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
+ }
+
+ // A memory allocation request larger than this size is treated as a large object and allocated
+ // at a page-granularity.
+ static const size_t kLargeSizeThreshold = 2048;
+
+ // We use use thread-local runs for the size Brackets whose indexes
+ // are less than or equal to this index. We use shared (current)
+ // runs for the rest.
+ static const size_t kMaxThreadLocalSizeBracketIdx = 10;
+
+ struct hash_run {
+ size_t operator()(const RosAlloc::Run* r) const {
+ return reinterpret_cast<size_t>(r);
+ }
+ };
+
+ struct eq_run {
+ bool operator()(const RosAlloc::Run* r1, const RosAlloc::Run* r2) const {
+ return r1 == r2;
+ }
+ };
+
+ // The base address of the memory region that's managed by this allocator.
+ byte* base_;
+
+ // The footprint in bytes of the currently allocated portion of the
+ // memory region.
+ size_t footprint_;
+
+ // The maximum footprint. The address, base_ + capacity_, indicates
+ // the end of the memory region that's managed by this allocator.
+ size_t capacity_;
+
+ // The run sets that hold the runs whose slots are not all
+ // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
+ std::set<Run*> non_full_runs_[kNumOfSizeBrackets];
+ // The run sets that hold the runs whose slots are all full. This is
+ // debug only. full_runs_[i] is guarded by size_bracket_locks_[i].
+ hash_set<Run*, hash_run, eq_run> full_runs_[kNumOfSizeBrackets];
+ // The set of free pages.
+ std::set<FreePageRun*> free_page_runs_ GUARDED_BY(lock_);
+ // The free page run whose end address is the end of the memory
+ // region that's managed by this allocator, if any.
+ FreePageRun* last_free_page_run_;
+ // The current runs where the allocations are first attempted for
+ // the size brackes that do not use thread-local
+ // runs. current_runs_[i] is guarded by size_bracket_locks_[i].
+ Run* current_runs_[kNumOfSizeBrackets];
+ // The mutexes, one per size bracket.
+ Mutex* size_bracket_locks_[kNumOfSizeBrackets];
+ // The types of page map entries.
+ enum {
+ kPageMapEmpty = 0, // Not allocated.
+ kPageMapRun = 1, // The beginning of a run.
+ kPageMapRunPart = 2, // The non-beginning part of a run.
+ kPageMapLargeObject = 3, // The beginning of a large object.
+ kPageMapLargeObjectPart = 4, // The non-beginning part of a large object.
+ };
+ // The table that indicates what pages are currently used for.
+ std::vector<byte> page_map_ GUARDED_BY(lock_);
+ // The table that indicates the size of free page runs. These sizes
+ // are stored here to avoid storing in the free page header and
+ // release backing pages.
+ std::vector<size_t> free_page_run_size_map_ GUARDED_BY(lock_);
+ // The global lock. Used to guard the page map, the free page set,
+ // and the footprint.
+ Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+ // The reader-writer lock to allow one bulk free at a time while
+ // allowing multiple individual frees at the same time.
+ ReaderWriterMutex bulk_free_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
+
+ // The base address of the memory region that's managed by this allocator.
+ byte* Begin() { return base_; }
+ // The end address of the memory region that's managed by this allocator.
+ byte* End() { return base_ + capacity_; }
+
+ // Page-granularity alloc/free
+ void* AllocPages(Thread* self, size_t num_pages, byte page_map_type)
+ EXCLUSIVE_LOCKS_REQUIRED(lock_);
+ void FreePages(Thread* self, void* ptr) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ // Allocate/free a run slot.
+ void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
+ LOCKS_EXCLUDED(lock_);
+ void FreeFromRun(Thread* self, void* ptr, Run* run)
+ LOCKS_EXCLUDED(lock_);
+
+ // Used to acquire a new/reused run for a size bracket. Used when a
+ // thread-local or current run gets full.
+ Run* RefillRun(Thread* self, size_t idx) LOCKS_EXCLUDED(lock_);
+
+ // The internal of non-bulk Free().
+ void FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_);
+
+ public:
+ RosAlloc(void* base, size_t capacity);
+ void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
+ LOCKS_EXCLUDED(lock_);
+ void Free(Thread* self, void* ptr)
+ LOCKS_EXCLUDED(bulk_free_lock_);
+ void BulkFree(Thread* self, void** ptrs, size_t num_ptrs)
+ LOCKS_EXCLUDED(bulk_free_lock_);
+ // Returns the size of the allocated slot for a given allocated memory chunk.
+ size_t UsableSize(void* ptr);
+ // Returns the size of the allocated slot for a given size.
+ size_t UsableSize(size_t bytes) {
+ if (UNLIKELY(bytes > kLargeSizeThreshold)) {
+ return RoundUp(bytes, kPageSize);
+ } else {
+ return RoundToBracketSize(bytes);
+ }
+ }
+ // Try to reduce the current footprint by releasing the free page
+ // run at the end of the memory region, if any.
+ bool Trim();
+ // Iterates over all the memory slots and apply the given function.
+ void InspectAll(void (*handler)(void* start, void* end, size_t used_bytes, void* callback_arg),
+ void* arg)
+ LOCKS_EXCLUDED(lock_);
+ // Returns the current footprint.
+ size_t Footprint() LOCKS_EXCLUDED(lock_);
+ // Returns the current capacity, maximum footprint.
+ size_t FootprintLimit() LOCKS_EXCLUDED(lock_);
+ // Update the current capacity.
+ void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_);
+ // Releases the thread-local runs assigned to the given thread back to the common set of runs.
+ void RevokeThreadLocalRuns(Thread* thread);
+ // Releases the thread-local runs assigned to all the threads back to the common set of runs.
+ void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_);
+ // Dumps the page map for debugging.
+ void DumpPageMap(Thread* self);
+
+ // Callbacks for InspectAll that will count the number of bytes
+ // allocated and objects allocated, respectively.
+ static void BytesAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+ static void ObjectsAllocatedCallback(void* start, void* end, size_t used_bytes, void* arg);
+};
+
+} // namespace allocator
+} // namespace gc
+} // namespace art
+
+#endif // ART_RUNTIME_GC_ALLOCATOR_ROSALLOC_H_