summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Richard Neill <richard.neill@arm.com> 2024-01-07 15:48:41 +0000
committer Hans Boehm <hboehm@google.com> 2024-01-24 05:15:45 +0000
commitdd98d26e9bb6add7b79efb5abe01a437b33a3b96 (patch)
tree54182f7af0e17a530f4cf218a1d6fae4f092807f
parent3027926d518f624b0287fcab705d7e5cd93b9b40 (diff)
Optimize division by / modulo of gPageSize
When running under the page-agnostic configuration, while the global constant gPageSize is always set to a power-of-two value at static initialization time, it may not be recognised as a guaranteed power-of-two by the compiler for the purpose of optimizations. This means that divisions by gPageSize may not be replaced by an efficient right-shift, and applications of modulo gPageSize may not be optimized via a bitwise-AND. This patch introduces two functions: one to perform a gPageSize division as a logical right-shift, and the other to compute modulo gPageSize via bitwise-AND. Inlining these optimized implementations is then done everywhere that requires division or modulo of gPageSize. As they are inlined, the compiler is able to optimize the reuse of registers when multiple divisions or modulos are required (for example, the result of WhichPowerOf2 on the page size is stored when subsequent divisions are necessary, which then each become just a single right-shift). The tests were run for legacy 4K, page size agnostic 4K and 16K. Test: art/tools/run-gtests.sh Test: art/test/testrunner/testrunner.py --target --64 Test: art/tools/run-libcore-tests.sh --mode=device --variant=X64 Test: art/tools/run-libjdwp-tests.sh --mode=device --variant=X64 Change-Id: I01bd5fa89b88806c84660d4a2d62ddcba061678c
-rw-r--r--runtime/base/gc_visited_arena_pool.cc8
-rw-r--r--runtime/base/gc_visited_arena_pool.h10
-rw-r--r--runtime/gc/allocator/rosalloc.cc70
-rw-r--r--runtime/gc/allocator/rosalloc.h12
-rw-r--r--runtime/gc/collector/garbage_collector.cc4
-rw-r--r--runtime/gc/collector/mark_compact.cc28
-rw-r--r--runtime/runtime_globals.h17
7 files changed, 83 insertions, 66 deletions
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
index 3791f44e69..57606d11a6 100644
--- a/runtime/base/gc_visited_arena_pool.cc
+++ b/runtime/base/gc_visited_arena_pool.cc
@@ -43,7 +43,7 @@ TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bo
} else {
DCHECK_ALIGNED_PARAM(size, gPageSize);
DCHECK_ALIGNED_PARAM(start, gPageSize);
- size_t arr_size = size / gPageSize;
+ size_t arr_size = DivideByPageSize(size);
first_obj_array_.reset(new uint8_t*[arr_size]);
std::fill_n(first_obj_array_.get(), arr_size, nullptr);
}
@@ -69,7 +69,7 @@ void TrackedArena::Release() {
if (bytes_allocated_ > 0) {
ReleasePages(Begin(), Size(), pre_zygote_fork_);
if (first_obj_array_.get() != nullptr) {
- std::fill_n(first_obj_array_.get(), Size() / gPageSize, nullptr);
+ std::fill_n(first_obj_array_.get(), DivideByPageSize(Size()), nullptr);
}
bytes_allocated_ = 0;
}
@@ -81,8 +81,8 @@ void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) {
DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end));
GcVisitedArenaPool* arena_pool =
static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
- size_t idx = static_cast<size_t>(obj_begin - Begin()) / gPageSize;
- size_t last_byte_idx = static_cast<size_t>(obj_end - 1 - Begin()) / gPageSize;
+ size_t idx = DivideByPageSize(static_cast<size_t>(obj_begin - Begin()));
+ size_t last_byte_idx = DivideByPageSize(static_cast<size_t>(obj_end - 1 - Begin()));
// Do the update below with arena-pool's lock in shared-mode to serialize with
// the compaction-pause wherein we acquire it exclusively. This is to ensure
// that last-byte read there doesn't change after reading it and before
diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h
index 028741f063..802303d2f8 100644
--- a/runtime/base/gc_visited_arena_pool.h
+++ b/runtime/base/gc_visited_arena_pool.h
@@ -46,7 +46,9 @@ class TrackedArena final : public Arena {
if (first_obj_array_.get() != nullptr) {
DCHECK_ALIGNED_PARAM(Size(), gPageSize);
DCHECK_ALIGNED_PARAM(Begin(), gPageSize);
- for (int i = 0, nr_pages = Size() / gPageSize; i < nr_pages; i++, page_begin += gPageSize) {
+ for (int i = 0, nr_pages = DivideByPageSize(Size());
+ i < nr_pages;
+ i++, page_begin += gPageSize) {
uint8_t* first = first_obj_array_[i];
if (first != nullptr) {
visitor(page_begin, first, gPageSize);
@@ -77,7 +79,7 @@ class TrackedArena final : public Arena {
} else {
DCHECK_EQ(last_byte, End());
}
- for (size_t i = (last_byte - Begin()) / gPageSize;
+ for (size_t i = DivideByPageSize(last_byte - Begin());
last_byte < End() && first_obj_array_[i] != nullptr;
last_byte += gPageSize, i++) {
// No body.
@@ -89,7 +91,7 @@ class TrackedArena final : public Arena {
DCHECK_LE(Begin(), addr);
DCHECK_GT(End(), addr);
if (first_obj_array_.get() != nullptr) {
- return first_obj_array_[(addr - Begin()) / gPageSize];
+ return first_obj_array_[DivideByPageSize(addr - Begin())];
} else {
// The pages of this arena contain array of GC-roots. So we don't need
// first-object of any given page of the arena.
@@ -252,7 +254,7 @@ class GcVisitedArenaPool final : public ArenaPool {
class TrackedArenaHash {
public:
size_t operator()(const TrackedArena* arena) const {
- return std::hash<size_t>{}(reinterpret_cast<uintptr_t>(arena->Begin()) / gPageSize);
+ return std::hash<size_t>{}(DivideByPageSize(reinterpret_cast<uintptr_t>(arena->Begin())));
}
};
using AllocatedArenaSet =
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index b50ffdcd3e..03f52eeaa9 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -88,8 +88,8 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
current_runs_[i] = dedicated_full_run_;
}
DCHECK_EQ(footprint_, capacity_);
- size_t num_of_pages = footprint_ / gPageSize;
- size_t max_num_of_pages = max_capacity_ / gPageSize;
+ size_t num_of_pages = DivideByPageSize(footprint_);
+ size_t max_num_of_pages = DivideByPageSize(max_capacity_);
std::string error_msg;
page_map_mem_map_ = MemMap::MapAnonymous("rosalloc page map",
RoundUp(max_num_of_pages, gPageSize),
@@ -106,7 +106,7 @@ RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
free_pages->magic_num_ = kMagicNumFree;
}
free_pages->SetByteSize(this, capacity_);
- DCHECK_EQ(capacity_ % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(capacity_), static_cast<size_t>(0));
DCHECK(free_pages->IsFree());
free_pages->ReleasePages(this);
DCHECK(free_pages->IsFree());
@@ -137,7 +137,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
FreePageRun* fpr = *it;
DCHECK(fpr->IsFree());
size_t fpr_byte_size = fpr->ByteSize(this);
- DCHECK_EQ(fpr_byte_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr_byte_size), static_cast<size_t>(0));
if (req_byte_size <= fpr_byte_size) {
// Found one.
it = free_page_runs_.erase(it);
@@ -154,7 +154,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
remainder->magic_num_ = kMagicNumFree;
}
remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
- DCHECK_EQ(remainder->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(remainder->ByteSize(this)), static_cast<size_t>(0));
// Don't need to call madvise on remainder here.
free_page_runs_.insert(remainder);
if (kTraceRosAlloc) {
@@ -163,7 +163,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
<< " into free_page_runs_";
}
fpr->SetByteSize(this, req_byte_size);
- DCHECK_EQ(fpr->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr->ByteSize(this)), static_cast<size_t>(0));
}
res = fpr;
break;
@@ -191,9 +191,9 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
// If we grow the heap, we can allocate it.
size_t increment = std::min(std::max(2 * MB, req_byte_size - last_free_page_run_size),
capacity_ - footprint_);
- DCHECK_EQ(increment % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(increment), static_cast<size_t>(0));
size_t new_footprint = footprint_ + increment;
- size_t new_num_of_pages = new_footprint / gPageSize;
+ size_t new_num_of_pages = DivideByPageSize(new_footprint);
DCHECK_LT(page_map_size_, new_num_of_pages);
DCHECK_LT(free_page_run_size_map_.size(), new_num_of_pages);
page_map_size_ = new_num_of_pages;
@@ -204,7 +204,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
// There was a free page run at the end. Expand its size.
DCHECK_EQ(last_free_page_run_size, last_free_page_run->ByteSize(this));
last_free_page_run->SetByteSize(this, last_free_page_run_size + increment);
- DCHECK_EQ(last_free_page_run->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(last_free_page_run->ByteSize(this)), static_cast<size_t>(0));
DCHECK_EQ(last_free_page_run->End(this), base_ + new_footprint);
} else {
// Otherwise, insert a new free page run at the end.
@@ -213,7 +213,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
new_free_page_run->magic_num_ = kMagicNumFree;
}
new_free_page_run->SetByteSize(this, increment);
- DCHECK_EQ(new_free_page_run->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(new_free_page_run->ByteSize(this)), static_cast<size_t>(0));
free_page_runs_.insert(new_free_page_run);
DCHECK_EQ(*free_page_runs_.rbegin(), new_free_page_run);
if (kTraceRosAlloc) {
@@ -238,7 +238,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
DCHECK_EQ(last_free_page_run, fpr);
}
size_t fpr_byte_size = fpr->ByteSize(this);
- DCHECK_EQ(fpr_byte_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr_byte_size), static_cast<size_t>(0));
DCHECK_LE(req_byte_size, fpr_byte_size);
free_page_runs_.erase(fpr);
if (kTraceRosAlloc) {
@@ -252,7 +252,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
remainder->magic_num_ = kMagicNumFree;
}
remainder->SetByteSize(this, fpr_byte_size - req_byte_size);
- DCHECK_EQ(remainder->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(remainder->ByteSize(this)), static_cast<size_t>(0));
free_page_runs_.insert(remainder);
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::AllocPages() : Inserted run 0x" << std::hex
@@ -260,7 +260,7 @@ void* RosAlloc::AllocPages(Thread* self, size_t num_pages, uint8_t page_map_type
<< " into free_page_runs_";
}
fpr->SetByteSize(this, req_byte_size);
- DCHECK_EQ(fpr->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr->ByteSize(this)), static_cast<size_t>(0));
}
res = fpr;
}
@@ -374,7 +374,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
}
for (auto it = free_page_runs_.upper_bound(fpr); it != free_page_runs_.end(); ) {
FreePageRun* h = *it;
- DCHECK_EQ(h->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(h->ByteSize(this)), static_cast<size_t>(0));
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a higher free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(h) << " [" << std::dec << ToPageMapIndex(h) << "] -0x"
@@ -396,7 +396,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
<< " from free_page_runs_";
}
fpr->SetByteSize(this, fpr->ByteSize(this) + h->ByteSize(this));
- DCHECK_EQ(fpr->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr->ByteSize(this)), static_cast<size_t>(0));
} else {
// Not adjacent. Stop.
if (kTraceRosAlloc) {
@@ -410,7 +410,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
--it;
FreePageRun* l = *it;
- DCHECK_EQ(l->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(l->ByteSize(this)), static_cast<size_t>(0));
if (kTraceRosAlloc) {
LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce with a lower free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(l) << " [" << std::dec << ToPageMapIndex(l) << "] -0x"
@@ -428,7 +428,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
<< " from free_page_runs_";
}
l->SetByteSize(this, l->ByteSize(this) + fpr->ByteSize(this));
- DCHECK_EQ(l->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(l->ByteSize(this)), static_cast<size_t>(0));
// Clear magic num since this is no longer the start of a free page run.
if (kIsDebugBuild) {
fpr->magic_num_ = 0;
@@ -445,7 +445,7 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) {
}
// Insert it.
- DCHECK_EQ(fpr->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(fpr->ByteSize(this)), static_cast<size_t>(0));
DCHECK(free_page_runs_.find(fpr) == free_page_runs_.end());
DCHECK(fpr->IsFree());
fpr->ReleasePages(this);
@@ -464,7 +464,7 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca
DCHECK(bytes_allocated != nullptr);
DCHECK(usable_size != nullptr);
DCHECK_GT(size, kLargeSizeThreshold);
- size_t num_pages = RoundUp(size, gPageSize) / gPageSize;
+ size_t num_pages = DivideByPageSize(RoundUp(size, gPageSize));
void* r;
{
MutexLock mu(self, lock_);
@@ -519,7 +519,7 @@ size_t RosAlloc::FreeInternal(Thread* self, void* ptr) {
// Find the beginning of the run.
do {
--pm_idx;
- DCHECK_LT(pm_idx, capacity_ / gPageSize);
+ DCHECK_LT(pm_idx, DivideByPageSize(capacity_));
} while (page_map_[pm_idx] != kPageMapRun);
FALLTHROUGH_INTENDED;
case kPageMapRun:
@@ -1043,7 +1043,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
// Find the beginning of the run.
do {
--pi;
- DCHECK_LT(pi, capacity_ / gPageSize);
+ DCHECK_LT(pi, DivideByPageSize(capacity_));
} while (page_map_[pi] != kPageMapRun);
run = reinterpret_cast<Run*>(base_ + pi * gPageSize);
} else if (page_map_entry == kPageMapLargeObject) {
@@ -1070,7 +1070,7 @@ size_t RosAlloc::BulkFree(Thread* self, void** ptrs, size_t num_ptrs) {
// Find the beginning of the run.
do {
--pi;
- DCHECK_LT(pi, capacity_ / gPageSize);
+ DCHECK_LT(pi, DivideByPageSize(capacity_));
} while (page_map_[pi] != kPageMapRun);
run = reinterpret_cast<Run*>(base_ + pi * gPageSize);
} else if (page_map_entry == kPageMapLargeObject) {
@@ -1229,7 +1229,7 @@ std::string RosAlloc::DumpPageMap() {
DCHECK_EQ(curr_fpr_size, static_cast<size_t>(0));
curr_fpr = fpr;
curr_fpr_size = fpr->ByteSize(this);
- DCHECK_EQ(curr_fpr_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(curr_fpr_size), static_cast<size_t>(0));
remaining_curr_fpr_size = curr_fpr_size - gPageSize;
stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
<< " (FPR start) fpr_size=" << curr_fpr_size
@@ -1245,7 +1245,7 @@ std::string RosAlloc::DumpPageMap() {
// Still part of the current free page run.
DCHECK_NE(num_running_empty_pages, static_cast<size_t>(0));
DCHECK(curr_fpr != nullptr && curr_fpr_size > 0 && remaining_curr_fpr_size > 0);
- DCHECK_EQ(remaining_curr_fpr_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(remaining_curr_fpr_size), static_cast<size_t>(0));
DCHECK_GE(remaining_curr_fpr_size, static_cast<size_t>(gPageSize));
remaining_curr_fpr_size -= gPageSize;
stream << "[" << i << "]=Empty (FPR part)"
@@ -1327,7 +1327,7 @@ size_t RosAlloc::UsableSize(const void* ptr) {
// Find the beginning of the run.
while (page_map_[pm_idx] != kPageMapRun) {
pm_idx--;
- DCHECK_LT(pm_idx, capacity_ / gPageSize);
+ DCHECK_LT(pm_idx, DivideByPageSize(capacity_));
}
DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
Run* run = reinterpret_cast<Run*>(base_ + pm_idx * gPageSize);
@@ -1348,19 +1348,19 @@ size_t RosAlloc::UsableSize(const void* ptr) {
bool RosAlloc::Trim() {
MutexLock mu(Thread::Current(), lock_);
FreePageRun* last_free_page_run;
- DCHECK_EQ(footprint_ % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(footprint_), static_cast<size_t>(0));
auto it = free_page_runs_.rbegin();
if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
// Remove the last free page run, if any.
DCHECK(last_free_page_run->IsFree());
DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
- DCHECK_EQ(last_free_page_run->ByteSize(this) % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(last_free_page_run->ByteSize(this)), static_cast<size_t>(0));
DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
free_page_runs_.erase(last_free_page_run);
size_t decrement = last_free_page_run->ByteSize(this);
size_t new_footprint = footprint_ - decrement;
- DCHECK_EQ(new_footprint % gPageSize, static_cast<size_t>(0));
- size_t new_num_of_pages = new_footprint / gPageSize;
+ DCHECK_EQ(ModuloPageSize(new_footprint), static_cast<size_t>(0));
+ size_t new_num_of_pages = DivideByPageSize(new_footprint);
DCHECK_GE(page_map_size_, new_num_of_pages);
// Zero out the tail of the page map.
uint8_t* zero_begin = const_cast<uint8_t*>(page_map_) + new_num_of_pages;
@@ -1422,13 +1422,13 @@ void RosAlloc::InspectAll(void (*handler)(void* start, void* end, size_t used_by
}
void* end = reinterpret_cast<uint8_t*>(fpr) + fpr_size;
handler(start, end, 0, arg);
- size_t num_pages = fpr_size / gPageSize;
+ size_t num_pages = DivideByPageSize(fpr_size);
if (kIsDebugBuild) {
for (size_t j = i + 1; j < i + num_pages; ++j) {
DCHECK(IsFreePage(j));
}
}
- i += fpr_size / gPageSize;
+ i += DivideByPageSize(fpr_size);
DCHECK_LE(i, pm_end);
break;
}
@@ -1770,7 +1770,7 @@ void RosAlloc::Verify() {
size_t fpr_size = fpr->ByteSize(this);
CHECK_ALIGNED_PARAM(fpr_size, gPageSize)
<< "A free page run size isn't page-aligned : " << fpr_size;
- size_t num_pages = fpr_size / gPageSize;
+ size_t num_pages = DivideByPageSize(fpr_size);
CHECK_GT(num_pages, static_cast<uintptr_t>(0))
<< "A free page run size must be > 0 : " << fpr_size;
for (size_t j = i + 1; j < i + num_pages; ++j) {
@@ -1801,7 +1801,7 @@ void RosAlloc::Verify() {
size_t obj_size = obj->SizeOf();
CHECK_GT(obj_size + memory_tool_modifier, kLargeSizeThreshold)
<< "A rosalloc large object size must be > " << kLargeSizeThreshold;
- CHECK_EQ(num_pages, RoundUp(obj_size + memory_tool_modifier, gPageSize) / gPageSize)
+ CHECK_EQ(num_pages, DivideByPageSize(RoundUp(obj_size + memory_tool_modifier, gPageSize)))
<< "A rosalloc large object size " << obj_size + memory_tool_modifier
<< " does not match the page map table " << (num_pages * gPageSize)
<< std::endl << DumpPageMap();
@@ -2016,7 +2016,7 @@ size_t RosAlloc::ReleasePages() {
DCHECK_ALIGNED_PARAM(fpr_size, gPageSize);
uint8_t* start = reinterpret_cast<uint8_t*>(fpr);
reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
- size_t pages = fpr_size / gPageSize;
+ size_t pages = DivideByPageSize(fpr_size);
CHECK_GT(pages, 0U) << "Infinite loop probable";
i += pages;
DCHECK_LE(i, page_map_size_);
@@ -2061,7 +2061,7 @@ size_t RosAlloc::ReleasePageRange(uint8_t* start, uint8_t* end) {
size_t pm_idx = ToPageMapIndex(start);
size_t reclaimed_bytes = 0;
// Calculate reclaimed bytes and upate page map.
- const size_t max_idx = pm_idx + (end - start) / gPageSize;
+ const size_t max_idx = pm_idx + DivideByPageSize(end - start);
for (; pm_idx < max_idx; ++pm_idx) {
DCHECK(IsFreePage(pm_idx));
if (page_map_[pm_idx] == kPageMapEmpty) {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 531f509fd8..333f80ff92 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -62,7 +62,7 @@ class RosAlloc {
}
void SetByteSize(RosAlloc* rosalloc, size_t byte_size)
REQUIRES(rosalloc->lock_) {
- DCHECK_EQ(byte_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0));
uint8_t* fpr_base = reinterpret_cast<uint8_t*>(this);
size_t pm_idx = rosalloc->ToPageMapIndex(fpr_base);
rosalloc->free_page_run_size_map_[pm_idx] = byte_size;
@@ -103,7 +103,7 @@ class RosAlloc {
void ReleasePages(RosAlloc* rosalloc) REQUIRES(rosalloc->lock_) {
uint8_t* start = reinterpret_cast<uint8_t*>(this);
size_t byte_size = ByteSize(rosalloc);
- DCHECK_EQ(byte_size % gPageSize, static_cast<size_t>(0));
+ DCHECK_EQ(ModuloPageSize(byte_size), static_cast<size_t>(0));
if (ShouldReleasePages(rosalloc)) {
rosalloc->ReleasePageRange(start, start + byte_size);
}
@@ -611,13 +611,13 @@ class RosAlloc {
DCHECK_LE(base_, addr);
DCHECK_LT(addr, base_ + capacity_);
size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
- DCHECK_EQ(byte_offset % static_cast<size_t>(gPageSize), static_cast<size_t>(0));
- return byte_offset / gPageSize;
+ DCHECK_EQ(ModuloPageSize(byte_offset), static_cast<size_t>(0));
+ return DivideByPageSize(byte_offset);
}
// Returns the page map index from an address with rounding.
size_t RoundDownToPageMapIndex(const void* addr) const {
DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
- return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / gPageSize;
+ return DivideByPageSize(reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_));
}
// A memory allocation request larger than this size is treated as a large object and allocated
@@ -916,7 +916,7 @@ class RosAlloc {
return dedicated_full_run_;
}
bool IsFreePage(size_t idx) const {
- DCHECK_LT(idx, capacity_ / gPageSize);
+ DCHECK_LT(idx, DivideByPageSize(capacity_));
uint8_t pm_type = page_map_[idx];
return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
}
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 0274d987c8..d6d3ab3418 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -168,13 +168,13 @@ uint64_t GarbageCollector::ExtractRssFromMincore(
}
size_t length = static_cast<uint8_t*>(it->second) - static_cast<uint8_t*>(it->first);
// Compute max length for vector allocation later.
- vec_len = std::max(vec_len, length / gPageSize);
+ vec_len = std::max(vec_len, DivideByPageSize(length));
}
std::unique_ptr<unsigned char[]> vec(new unsigned char[vec_len]);
for (const auto it : *gc_ranges) {
size_t length = static_cast<uint8_t*>(it.second) - static_cast<uint8_t*>(it.first);
if (mincore(it.first, length, vec.get()) == 0) {
- for (size_t i = 0; i < length / gPageSize; i++) {
+ for (size_t i = 0; i < DivideByPageSize(length); i++) {
// Least significant bit represents residency of a page. Other bits are
// reserved.
rss += vec[i] & 0x1;
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 1a6feda437..08404619b3 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -396,14 +396,14 @@ static bool IsSigbusFeatureAvailable() {
}
size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
- size_t nr_moving_pages = moving_space_sz / gPageSize;
+ size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
vector_length_ = moving_space_sz / kOffsetChunkSize;
size_t total = vector_length_ * sizeof(uint32_t);
first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
- total += heap_->GetNonMovingSpace()->Capacity() / gPageSize * sizeof(ObjReference);
+ total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
total += nr_moving_pages * sizeof(ObjReference);
@@ -456,8 +456,8 @@ MarkCompact::MarkCompact(Heap* heap)
// Create one MemMap for all the data structures
size_t moving_space_size = bump_pointer_space_->Capacity();
size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
- size_t nr_moving_pages = moving_space_size / gPageSize;
- size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / gPageSize;
+ size_t nr_moving_pages = DivideByPageSize(moving_space_size);
+ size_t nr_non_moving_pages = DivideByPageSize(heap->GetNonMovingSpace()->Capacity());
std::string err_msg;
info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
@@ -585,7 +585,7 @@ void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
}
MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
- len / gPageSize,
+ DivideByPageSize(len),
PROT_READ | PROT_WRITE,
/*low_4gb=*/false,
&err_msg));
@@ -917,7 +917,7 @@ void MarkCompact::InitNonMovingSpaceFirstObjects() {
// There are no live objects in the non-moving space
return;
}
- page_idx = (reinterpret_cast<uintptr_t>(obj) - begin) / gPageSize;
+ page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
first_objs_non_moving_space_[page_idx++].Assign(obj);
prev_obj = obj;
}
@@ -2309,7 +2309,7 @@ void MarkCompact::CompactMovingSpace(uint8_t* page) {
});
// We are sliding here, so no point attempting to madvise for every
// page. Wait for enough pages to be done.
- if (idx % (kMinFromSpaceMadviseSize / gPageSize) == 0) {
+ if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
FreeFromSpacePages(idx, kMode);
}
}
@@ -2511,15 +2511,15 @@ void MarkCompact::UpdateMovingSpaceBlackAllocations() {
if (page_remaining <= block_remaining) {
block_remaining -= page_remaining;
// current page and the subsequent empty pages in the block
- black_page_idx += 1 + block_remaining / gPageSize;
- remaining_chunk_size = block_remaining % gPageSize;
+ black_page_idx += 1 + DivideByPageSize(block_remaining);
+ remaining_chunk_size = ModuloPageSize(block_remaining);
} else {
remaining_chunk_size += block_remaining;
}
black_allocs = block_end;
}
}
- if (black_page_idx < bump_pointer_space_->Size() / gPageSize) {
+ if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
// Store the leftover first-chunk, if any, and update page index.
if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
black_page_idx++;
@@ -2567,7 +2567,7 @@ void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
non_moving_space_bitmap_->Set(obj);
// Clear so that we don't try to set the bit again in the next GC-cycle.
it->Clear();
- size_t idx = (reinterpret_cast<uint8_t*>(obj) - space_begin) / gPageSize;
+ size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
if (first_obj == nullptr
@@ -3331,7 +3331,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true);
return;
}
- size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / gPageSize;
+ size_t page_idx = DivideByPageSize(fault_page - bump_pointer_space_->Begin());
DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
if (first_obj == nullptr) {
@@ -3519,7 +3519,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i
}
DCHECK_NE(space_data, nullptr);
ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
- size_t page_idx = (fault_page - space_data->begin_) / gPageSize;
+ size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
Atomic<PageState>* state_arr =
reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire :
@@ -3654,7 +3654,7 @@ void MarkCompact::ProcessLinearAlloc() {
return;
}
LinearAllocPageUpdater updater(this);
- size_t page_idx = (page_begin - space_data->begin_) / gPageSize;
+ size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
DCHECK_LT(page_idx, space_data->page_status_map_.Size());
Atomic<PageState>* state_arr =
reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
diff --git a/runtime/runtime_globals.h b/runtime/runtime_globals.h
index c128c19c36..46d355ad0d 100644
--- a/runtime/runtime_globals.h
+++ b/runtime/runtime_globals.h
@@ -19,6 +19,7 @@
#include <android-base/logging.h>
+#include "base/bit_utils.h"
#include "base/globals.h"
namespace art {
@@ -36,7 +37,7 @@ struct PageSize {
PageSize()
: is_initialized_(true), is_access_allowed_(false) {}
- ALWAYS_INLINE operator size_t() const {
+ constexpr ALWAYS_INLINE operator size_t() const {
DCHECK(is_initialized_ && is_access_allowed_);
return value_;
}
@@ -91,6 +92,20 @@ extern PageSize gPageSize ALWAYS_HIDDEN;
static constexpr size_t gPageSize = kMinPageSize;
#endif
+// In the page-size-agnostic configuration the compiler may not recognise gPageSize as a
+// power-of-two value, and may therefore miss opportunities to optimize: divisions via a
+// right-shift, modulo via a bitwise-AND.
+// Here, define two functions which use the optimized implementations explicitly, which should be
+// used when dividing by or applying modulo of the page size. For simplificty, the same functions
+// are used under both configurations, as they optimize the page-size-agnostic configuration while
+// only replicating what the compiler already does on the non-page-size-agnostic configuration.
+static constexpr ALWAYS_INLINE size_t DivideByPageSize(size_t num) {
+ return (num >> WhichPowerOf2(static_cast<size_t>(gPageSize)));
+}
+static constexpr ALWAYS_INLINE size_t ModuloPageSize(size_t num) {
+ return (num & (gPageSize-1));
+}
+
// Returns whether the given memory offset can be used for generating
// an implicit null check.
static inline bool CanDoImplicitNullCheckOn(uintptr_t offset) {