diff options
Diffstat (limited to 'runtime')
43 files changed, 1290 insertions, 369 deletions
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 65c65e2b72..0f874a49e8 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1551,7 +1551,9 @@ DEFINE_FUNCTION art_quick_instrumentation_exit CFI_ADJUST_CFA_OFFSET(-8) POP rax // Restore integer result. - addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp // Drop save frame and fake return pc. + RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME + + addq LITERAL(8), %rsp // Drop fake return pc. jmp *%rdi // Return. END_FUNCTION art_quick_instrumentation_exit diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h index 93062a7c4b..893ab11bad 100644 --- a/runtime/check_reference_map_visitor.h +++ b/runtime/check_reference_map_visitor.h @@ -66,31 +66,36 @@ class CheckReferenceMapVisitor : public StackVisitor { mirror::ArtMethod* m = GetMethod(); CodeInfo code_info = m->GetOptimizedCodeInfo(); StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, m->GetCodeItem()->registers_size_); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, m->GetCodeItem()->registers_size_); MemoryRegion stack_mask = stack_map.GetStackMask(); uint32_t register_mask = stack_map.GetRegisterMask(); for (int i = 0; i < number_of_references; ++i) { int reg = registers[i]; CHECK(reg < m->GetCodeItem()->registers_size_); - DexRegisterMap::LocationKind location = dex_register_map.GetLocationKind(reg); - switch (location) { - case DexRegisterMap::kNone: + DexRegisterLocation location = dex_register_map.GetLocationKindAndValue(reg); + switch (location.GetKind()) { + case DexRegisterLocation::Kind::kNone: // Not set, should not be a reference. CHECK(false); break; - case DexRegisterMap::kInStack: - CHECK(stack_mask.LoadBit(dex_register_map.GetValue(reg) >> 2)); + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize)); break; - case DexRegisterMap::kInRegister: - CHECK_NE(register_mask & (1 << dex_register_map.GetValue(reg)), 0u); + case DexRegisterLocation::Kind::kInRegister: + CHECK_NE(register_mask & (1 << location.GetValue()), 0u); break; - case DexRegisterMap::kInFpuRegister: + case DexRegisterLocation::Kind::kInFpuRegister: // In Fpu register, should not be a reference. CHECK(false); break; - case DexRegisterMap::kConstant: - CHECK_EQ(dex_register_map.GetValue(reg), 0); + case DexRegisterLocation::Kind::kConstant: + CHECK_EQ(location.GetValue(), 0); break; + default: + LOG(FATAL) << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); } } } diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc index 84865973c6..e0d62d7012 100644 --- a/runtime/common_runtime_test.cc +++ b/runtime/common_runtime_test.cc @@ -263,6 +263,8 @@ void CommonRuntimeTest::SetUp() { // pool is created by the runtime. runtime_->GetHeap()->CreateThreadPool(); runtime_->GetHeap()->VerifyHeap(); // Check for heap corruption before the test + // Reduce timinig-dependent flakiness in OOME behavior (eg StubTest.AllocObject). + runtime_->GetHeap()->SetMinIntervalHomogeneousSpaceCompactionByOom(0U); // Get the boot class path from the runtime so it can be used in tests. boot_class_path_ = class_linker_->GetBootClassPath(); diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc index 87ce166147..77809358e4 100644 --- a/runtime/gc/accounting/mod_union_table_test.cc +++ b/runtime/gc/accounting/mod_union_table_test.cc @@ -48,9 +48,9 @@ class ModUnionTableTest : public CommonRuntimeTest { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { auto* klass = GetObjectArrayClass(self, space); const size_t size = ComputeArraySize(self, klass, component_count, 2); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* obj = down_cast<mirror::ObjectArray<mirror::Object>*>( - space->Alloc(self, size, &bytes_allocated, nullptr)); + space->Alloc(self, size, &bytes_allocated, nullptr, &bytes_tl_bulk_allocated)); if (obj != nullptr) { obj->SetClass(klass); obj->SetLength(static_cast<int32_t>(component_count)); @@ -77,9 +77,10 @@ class ModUnionTableTest : public CommonRuntimeTest { // copy of the class in the same space that we are allocating in. DCHECK(java_lang_object_array_ != nullptr); const size_t class_size = java_lang_object_array_->GetClassSize(); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; auto* klass = down_cast<mirror::Class*>(space->Alloc(self, class_size, &bytes_allocated, - nullptr)); + nullptr, + &bytes_tl_bulk_allocated)); DCHECK(klass != nullptr); memcpy(klass, java_lang_object_array_, class_size); Runtime::Current()->GetHeap()->GetCardTable()->MarkCard(klass); diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h index f6c9d3c144..bba92a1f40 100644 --- a/runtime/gc/allocator/rosalloc-inl.h +++ b/runtime/gc/allocator/rosalloc-inl.h @@ -28,15 +28,19 @@ inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() { } template<bool kThreadSafe> -inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) { +inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (UNLIKELY(size > kLargeSizeThreshold)) { - return AllocLargeObject(self, size, bytes_allocated); + return AllocLargeObject(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } void* m; if (kThreadSafe) { - m = AllocFromRun(self, size, bytes_allocated); + m = AllocFromRun(self, size, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } else { - m = AllocFromRunThreadUnsafe(self, size, bytes_allocated); + m = AllocFromRunThreadUnsafe(self, size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Check if the returned memory is really all zero. if (ShouldCheckZeroMemory() && m != nullptr) { @@ -48,6 +52,115 @@ inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* by return m; } +inline bool RosAlloc::Run::IsFull() { + const size_t num_vec = NumberOfBitmapVectors(); + for (size_t v = 0; v < num_vec; ++v) { + if (~alloc_bit_map_[v] != 0) { + return false; + } + } + return true; +} + +inline bool RosAlloc::CanAllocFromThreadLocalRun(Thread* self, size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return false; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + DCHECK_EQ(idx, SizeToIndex(size)); + DCHECK_EQ(bracket_size, IndexToBracketSize(idx)); + DCHECK_EQ(bracket_size, bracketSizes[idx]); + DCHECK_LE(size, bracket_size); + DCHECK(size > 512 || bracket_size - size < 16); + DCHECK_LT(idx, kNumThreadLocalSizeBrackets); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + return !thread_local_run->IsFull(); +} + +inline void* RosAlloc::AllocFromThreadLocalRun(Thread* self, size_t size, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return nullptr; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx)); + if (kIsDebugBuild) { + // Need the lock to prevent race conditions. + MutexLock mu(self, *size_bracket_locks_[idx]); + CHECK(non_full_runs_[idx].find(thread_local_run) == non_full_runs_[idx].end()); + CHECK(full_runs_[idx].find(thread_local_run) == full_runs_[idx].end()); + } + DCHECK(thread_local_run != nullptr); + DCHECK(thread_local_run->IsThreadLocal() || thread_local_run == dedicated_full_run_); + void* slot_addr = thread_local_run->AllocSlot(); + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + } + return slot_addr; +} + +inline size_t RosAlloc::MaxBytesBulkAllocatedFor(size_t size) { + if (UNLIKELY(!IsSizeForThreadLocal(size))) { + return size; + } + size_t bracket_size; + size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); + return numOfSlots[idx] * bracket_size; +} + +inline void* RosAlloc::Run::AllocSlot() { + const size_t idx = size_bracket_idx_; + while (true) { + if (kIsDebugBuild) { + // Make sure that no slots leaked, the bitmap should be full for all previous vectors. + for (size_t i = 0; i < first_search_vec_idx_; ++i) { + CHECK_EQ(~alloc_bit_map_[i], 0U); + } + } + uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; + uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); + if (LIKELY(ffz1 != 0)) { + const uint32_t ffz = ffz1 - 1; + const uint32_t slot_idx = ffz + + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; + const uint32_t mask = 1U << ffz; + DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; + // Found an empty slot. Set the bit. + DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); + *alloc_bitmap_ptr |= mask; + DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); + uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + + headerSizes[idx] + slot_idx * bracketSizes[idx]; + if (kTraceRosAlloc) { + LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) + << ", bracket_size=" << std::dec << bracketSizes[idx] + << ", slot_idx=" << slot_idx; + } + return slot_addr; + } + const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; + if (first_search_vec_idx_ + 1 >= num_words) { + DCHECK(IsFull()); + // Already at the last word, return null. + return nullptr; + } + // Increase the index to the next word and try again. + ++first_search_vec_idx_; + } +} + } // namespace allocator } // namespace gc } // namespace art diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc index f51093aa57..f64a4ff8df 100644 --- a/runtime/gc/allocator/rosalloc.cc +++ b/runtime/gc/allocator/rosalloc.cc @@ -454,7 +454,10 @@ size_t RosAlloc::FreePages(Thread* self, void* ptr, bool already_zero) { return byte_size; } -void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); DCHECK_GT(size, kLargeSizeThreshold); size_t num_pages = RoundUp(size, kPageSize) / kPageSize; void* r; @@ -470,6 +473,8 @@ void* RosAlloc::AllocLargeObject(Thread* self, size_t size, size_t* bytes_alloca } const size_t total_bytes = num_pages * kPageSize; *bytes_allocated = total_bytes; + *usable_size = total_bytes; + *bytes_tl_bulk_allocated = total_bytes; if (kTraceRosAlloc) { LOG(INFO) << "RosAlloc::AllocLargeObject() : 0x" << std::hex << reinterpret_cast<intptr_t>(r) << "-0x" << (reinterpret_cast<intptr_t>(r) + num_pages * kPageSize) @@ -622,7 +627,12 @@ inline void* RosAlloc::AllocFromCurrentRunUnlocked(Thread* self, size_t idx) { return slot_addr; } -void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -634,14 +644,19 @@ void* RosAlloc::AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* byte Locks::mutator_lock_->AssertExclusiveHeld(self); void* slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (LIKELY(slot_addr != nullptr)) { - DCHECK(bytes_allocated != nullptr); *bytes_allocated = bracket_size; - // Caller verifies that it is all 0. + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; } + // Caller verifies that it is all 0. return slot_addr; } -void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) { +void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { + DCHECK(bytes_allocated != nullptr); + DCHECK(usable_size != nullptr); + DCHECK(bytes_tl_bulk_allocated != nullptr); DCHECK_LE(size, kLargeSizeThreshold); size_t bracket_size; size_t idx = SizeToIndexAndBracketSize(size, &bracket_size); @@ -712,31 +727,43 @@ void* RosAlloc::AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) self->SetRosAllocRun(idx, thread_local_run); DCHECK(!thread_local_run->IsFull()); } - DCHECK(thread_local_run != nullptr); DCHECK(!thread_local_run->IsFull()); DCHECK(thread_local_run->IsThreadLocal()); + // Account for all the free slots in the new or refreshed thread local run. + *bytes_tl_bulk_allocated = thread_local_run->NumberOfFreeSlots() * bracket_size; slot_addr = thread_local_run->AllocSlot(); // Must succeed now with a new run. DCHECK(slot_addr != nullptr); + } else { + // The slot is already counted. Leave it as is. + *bytes_tl_bulk_allocated = 0; } + DCHECK(slot_addr != nullptr); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() thread-local : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + *bytes_allocated = bracket_size; + *usable_size = bracket_size; } else { // Use the (shared) current run. MutexLock mu(self, *size_bracket_locks_[idx]); slot_addr = AllocFromCurrentRunUnlocked(self, idx); if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) + LOG(INFO) << "RosAlloc::AllocFromRun() : 0x" << std::hex + << reinterpret_cast<intptr_t>(slot_addr) << "-0x" << (reinterpret_cast<intptr_t>(slot_addr) + bracket_size) << "(" << std::dec << (bracket_size) << ")"; } + if (LIKELY(slot_addr != nullptr)) { + *bytes_allocated = bracket_size; + *usable_size = bracket_size; + *bytes_tl_bulk_allocated = bracket_size; + } } - DCHECK(bytes_allocated != nullptr); - *bytes_allocated = bracket_size; // Caller verifies that it is all 0. return slot_addr; } @@ -852,44 +879,6 @@ std::string RosAlloc::Run::Dump() { return stream.str(); } -inline void* RosAlloc::Run::AllocSlot() { - const size_t idx = size_bracket_idx_; - while (true) { - if (kIsDebugBuild) { - // Make sure that no slots leaked, the bitmap should be full for all previous vectors. - for (size_t i = 0; i < first_search_vec_idx_; ++i) { - CHECK_EQ(~alloc_bit_map_[i], 0U); - } - } - uint32_t* const alloc_bitmap_ptr = &alloc_bit_map_[first_search_vec_idx_]; - uint32_t ffz1 = __builtin_ffs(~*alloc_bitmap_ptr); - if (LIKELY(ffz1 != 0)) { - const uint32_t ffz = ffz1 - 1; - const uint32_t slot_idx = ffz + first_search_vec_idx_ * sizeof(*alloc_bitmap_ptr) * kBitsPerByte; - const uint32_t mask = 1U << ffz; - DCHECK_LT(slot_idx, numOfSlots[idx]) << "out of range"; - // Found an empty slot. Set the bit. - DCHECK_EQ(*alloc_bitmap_ptr & mask, 0U); - *alloc_bitmap_ptr |= mask; - DCHECK_NE(*alloc_bitmap_ptr & mask, 0U); - uint8_t* slot_addr = reinterpret_cast<uint8_t*>(this) + headerSizes[idx] + slot_idx * bracketSizes[idx]; - if (kTraceRosAlloc) { - LOG(INFO) << "RosAlloc::Run::AllocSlot() : 0x" << std::hex << reinterpret_cast<intptr_t>(slot_addr) - << ", bracket_size=" << std::dec << bracketSizes[idx] << ", slot_idx=" << slot_idx; - } - return slot_addr; - } - const size_t num_words = RoundUp(numOfSlots[idx], 32) / 32; - if (first_search_vec_idx_ + 1 >= num_words) { - DCHECK(IsFull()); - // Already at the last word, return null. - return nullptr; - } - // Increase the index to the next word and try again. - ++first_search_vec_idx_; - } -} - void RosAlloc::Run::FreeSlot(void* ptr) { DCHECK(!IsThreadLocal()); const uint8_t idx = size_bracket_idx_; @@ -920,6 +909,25 @@ void RosAlloc::Run::FreeSlot(void* ptr) { } } +size_t RosAlloc::Run::NumberOfFreeSlots() { + size_t num_alloc_slots = 0; + const size_t idx = size_bracket_idx_; + const size_t num_slots = numOfSlots[idx]; + const size_t num_vec = RoundUp(num_slots, 32) / 32; + DCHECK_NE(num_vec, 0U); + for (size_t v = 0; v < num_vec - 1; v++) { + num_alloc_slots += POPCOUNT(alloc_bit_map_[v]); + } + // Don't count the invalid bits in the last vector. + uint32_t last_vec_masked = alloc_bit_map_[num_vec - 1] & + ~GetBitmapLastVectorMask(num_slots, num_vec); + num_alloc_slots += POPCOUNT(last_vec_masked); + size_t num_free_slots = num_slots - num_alloc_slots; + DCHECK_LE(num_alloc_slots, num_slots); + DCHECK_LE(num_free_slots, num_slots); + return num_free_slots; +} + inline bool RosAlloc::Run::MergeThreadLocalFreeBitMapToAllocBitMap(bool* is_all_free_after_out) { DCHECK(IsThreadLocal()); // Free slots in the alloc bit map based on the thread local free bit map. @@ -1055,16 +1063,6 @@ inline bool RosAlloc::Run::IsAllFree() { return alloc_bit_map_[num_vec - 1] == GetBitmapLastVectorMask(num_slots, num_vec); } -inline bool RosAlloc::Run::IsFull() { - const size_t num_vec = NumberOfBitmapVectors(); - for (size_t v = 0; v < num_vec; ++v) { - if (~alloc_bit_map_[v] != 0) { - return false; - } - } - return true; -} - inline bool RosAlloc::Run::IsBulkFreeBitmapClean() { const size_t num_vec = NumberOfBitmapVectors(); for (size_t v = 0; v < num_vec; v++) { @@ -1654,10 +1652,11 @@ void RosAlloc::SetFootprintLimit(size_t new_capacity) { } } -void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { +size_t RosAlloc::RevokeThreadLocalRuns(Thread* thread) { Thread* self = Thread::Current(); // Avoid race conditions on the bulk free bit maps with BulkFree() (GC). ReaderMutexLock wmu(self, bulk_free_lock_); + size_t free_bytes = 0U; for (size_t idx = 0; idx < kNumThreadLocalSizeBrackets; idx++) { MutexLock mu(self, *size_bracket_locks_[idx]); Run* thread_local_run = reinterpret_cast<Run*>(thread->GetRosAllocRun(idx)); @@ -1665,9 +1664,12 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { // Invalid means already revoked. DCHECK(thread_local_run->IsThreadLocal()); if (thread_local_run != dedicated_full_run_) { + // Note the thread local run may not be full here. thread->SetRosAllocRun(idx, dedicated_full_run_); DCHECK_EQ(thread_local_run->magic_num_, kMagicNum); - // Note the thread local run may not be full here. + // Count the number of free slots left. + size_t num_free_slots = thread_local_run->NumberOfFreeSlots(); + free_bytes += num_free_slots * bracketSizes[idx]; bool dont_care; thread_local_run->MergeThreadLocalFreeBitMapToAllocBitMap(&dont_care); thread_local_run->SetIsThreadLocal(false); @@ -1677,6 +1679,7 @@ void RosAlloc::RevokeThreadLocalRuns(Thread* thread) { RevokeRun(self, idx, thread_local_run); } } + return free_bytes; } void RosAlloc::RevokeRun(Thread* self, size_t idx, Run* run) { @@ -1719,16 +1722,18 @@ void RosAlloc::RevokeThreadUnsafeCurrentRuns() { } } -void RosAlloc::RevokeAllThreadLocalRuns() { +size_t RosAlloc::RevokeAllThreadLocalRuns() { // This is called when a mutator thread won't allocate such as at // the Zygote creation time or during the GC pause. MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_); MutexLock mu2(Thread::Current(), *Locks::thread_list_lock_); std::list<Thread*> thread_list = Runtime::Current()->GetThreadList()->GetList(); + size_t free_bytes = 0U; for (Thread* thread : thread_list) { - RevokeThreadLocalRuns(thread); + free_bytes += RevokeThreadLocalRuns(thread); } RevokeThreadUnsafeCurrentRuns(); + return free_bytes; } void RosAlloc::AssertThreadLocalRunsAreRevoked(Thread* thread) { diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h index 3269e102bc..d1e7ad91a0 100644 --- a/runtime/gc/allocator/rosalloc.h +++ b/runtime/gc/allocator/rosalloc.h @@ -230,8 +230,10 @@ class RosAlloc { static uint32_t GetBitmapLastVectorMask(size_t num_slots, size_t num_vec); // Returns true if all the slots in the run are not in use. bool IsAllFree(); + // Returns the number of free slots. + size_t NumberOfFreeSlots(); // Returns true if all the slots in the run are in use. - bool IsFull(); + ALWAYS_INLINE bool IsFull(); // Returns true if the bulk free bit map is clean. bool IsBulkFreeBitmapClean(); // Returns true if the thread local free bit map is clean. @@ -309,6 +311,15 @@ class RosAlloc { DCHECK(bracketSizes[idx] == size); return idx; } + // Returns true if the given allocation size is for a thread local allocation. + static bool IsSizeForThreadLocal(size_t size) { + DCHECK_GT(kNumThreadLocalSizeBrackets, 0U); + size_t max_thread_local_bracket_idx = kNumThreadLocalSizeBrackets - 1; + bool is_size_for_thread_local = size <= bracketSizes[max_thread_local_bracket_idx]; + DCHECK(size > kLargeSizeThreshold || + (is_size_for_thread_local == (SizeToIndex(size) < kNumThreadLocalSizeBrackets))); + return is_size_for_thread_local; + } // Rounds up the size up the nearest bracket size. static size_t RoundToBracketSize(size_t size) { DCHECK(size <= kLargeSizeThreshold); @@ -504,11 +515,13 @@ class RosAlloc { size_t FreePages(Thread* self, void* ptr, bool already_zero) EXCLUSIVE_LOCKS_REQUIRED(lock_); // Allocate/free a run slot. - void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); // Allocate/free a run slot without acquiring locks. // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) - void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated) + void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx); @@ -527,7 +540,9 @@ class RosAlloc { size_t FreeInternal(Thread* self, void* ptr) LOCKS_EXCLUDED(lock_); // Allocates large objects. - void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_); + void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated, + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Revoke a run by adding it to non_full_runs_ or freeing the pages. void RevokeRun(Thread* self, size_t idx, Run* run); @@ -551,13 +566,26 @@ class RosAlloc { // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization. // If used, this may cause race conditions if multiple threads are allocating at the same time. template<bool kThreadSafe = true> - void* Alloc(Thread* self, size_t size, size_t* bytes_allocated) + void* Alloc(Thread* self, size_t size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) LOCKS_EXCLUDED(lock_); size_t Free(Thread* self, void* ptr) LOCKS_EXCLUDED(bulk_free_lock_); size_t BulkFree(Thread* self, void** ptrs, size_t num_ptrs) LOCKS_EXCLUDED(bulk_free_lock_); + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocFromThreadLocalRun(Thread* self, size_t size); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE void* AllocFromThreadLocalRun(Thread* self, size_t size, size_t* bytes_allocated); + + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by RosAlloc::Alloc(). + ALWAYS_INLINE size_t MaxBytesBulkAllocatedFor(size_t size); + // Returns the size of the allocated slot for a given allocated memory chunk. size_t UsableSize(const void* ptr); // Returns the size of the allocated slot for a given size. @@ -586,9 +614,13 @@ class RosAlloc { void SetFootprintLimit(size_t bytes) LOCKS_EXCLUDED(lock_); // Releases the thread-local runs assigned to the given thread back to the common set of runs. - void RevokeThreadLocalRuns(Thread* thread); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeThreadLocalRuns(Thread* thread); // Releases the thread-local runs assigned to all the threads back to the common set of runs. - void RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); + // Returns the total bytes of free slots in the revoked thread local runs. This is to be + // subtracted from Heap::num_bytes_allocated_ to cancel out the ahead-of-time counting. + size_t RevokeAllThreadLocalRuns() LOCKS_EXCLUDED(Locks::thread_list_lock_); // Assert the thread local runs of a thread are revoked. void AssertThreadLocalRunsAreRevoked(Thread* thread); // Assert all the thread local runs are revoked. diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index dd45ecab7f..db7a4ef7e7 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1259,8 +1259,9 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { size_t region_space_bytes_allocated = 0U; size_t non_moving_space_bytes_allocated = 0U; size_t bytes_allocated = 0U; + size_t dummy; mirror::Object* to_ref = region_space_->AllocNonvirtual<true>( - region_space_alloc_size, ®ion_space_bytes_allocated, nullptr); + region_space_alloc_size, ®ion_space_bytes_allocated, nullptr, &dummy); bytes_allocated = region_space_bytes_allocated; if (to_ref != nullptr) { DCHECK_EQ(region_space_alloc_size, region_space_bytes_allocated); @@ -1286,7 +1287,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { } fall_back_to_non_moving = true; to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size, - &non_moving_space_bytes_allocated, nullptr); + &non_moving_space_bytes_allocated, nullptr, &dummy); CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed"; bytes_allocated = non_moving_space_bytes_allocated; // Mark it in the mark bitmap. diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc index 8be18be676..eafcc45a13 100644 --- a/runtime/gc/collector/garbage_collector.cc +++ b/runtime/gc/collector/garbage_collector.cc @@ -48,6 +48,7 @@ void Iteration::Reset(GcCause gc_cause, bool clear_soft_references) { gc_cause_ = gc_cause; freed_ = ObjectBytePair(); freed_los_ = ObjectBytePair(); + freed_bytes_revoke_ = 0; } uint64_t Iteration::GetEstimatedThroughput() const { diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index b8094694b0..ed5207a356 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -75,6 +75,12 @@ class Iteration { uint64_t GetFreedLargeObjects() const { return freed_los_.objects; } + uint64_t GetFreedRevokeBytes() const { + return freed_bytes_revoke_; + } + void SetFreedRevoke(uint64_t freed) { + freed_bytes_revoke_ = freed; + } void Reset(GcCause gc_cause, bool clear_soft_references); // Returns the estimated throughput of the iteration. uint64_t GetEstimatedThroughput() const; @@ -99,6 +105,7 @@ class Iteration { TimingLogger timings_; ObjectBytePair freed_; ObjectBytePair freed_los_; + uint64_t freed_bytes_revoke_; // see Heap::num_bytes_freed_revoke_. std::vector<uint64_t> pause_times_; friend class GarbageCollector; diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc index 8aac484f7f..ee4e752608 100644 --- a/runtime/gc/collector/mark_sweep.cc +++ b/runtime/gc/collector/mark_sweep.cc @@ -292,6 +292,7 @@ void MarkSweep::ReclaimPhase() { Runtime::Current()->AllowNewSystemWeaks(); { WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); + GetHeap()->RecordFreeRevoke(); // Reclaim unmarked objects. Sweep(false); // Swap the live and mark bitmaps for each space which we modified space. This is an diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc index c1ba5e3f72..b3d59f2a51 100644 --- a/runtime/gc/collector/semi_space.cc +++ b/runtime/gc/collector/semi_space.cc @@ -242,6 +242,7 @@ void SemiSpace::MarkingPhase() { // Revoke buffers before measuring how many objects were moved since the TLABs need to be revoked // before they are properly counted. RevokeAllThreadLocalBuffers(); + GetHeap()->RecordFreeRevoke(); // this is for the non-moving rosalloc space used by GSS. // Record freed memory. const int64_t from_bytes = from_space_->GetBytesAllocated(); const int64_t to_bytes = bytes_moved_; @@ -489,17 +490,18 @@ static inline size_t CopyAvoidingDirtyingPages(void* dest, const void* src, size mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { const size_t object_size = obj->SizeOf(); - size_t bytes_allocated; + size_t bytes_allocated, dummy; mirror::Object* forward_address = nullptr; if (generational_ && reinterpret_cast<uint8_t*>(obj) < last_gc_to_space_end_) { // If it's allocated before the last GC (older), move // (pseudo-promote) it to the main free list space (as sort // of an old generation.) forward_address = promo_dest_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); if (UNLIKELY(forward_address == nullptr)) { // If out of space, fall back to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); // No logic for marking the bitmap, so it must be null. DCHECK(to_space_live_bitmap_ == nullptr); } else { @@ -544,7 +546,8 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { } } else { // If it's allocated after the last GC (younger), copy it to the to-space. - forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr); + forward_address = to_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, nullptr, + &dummy); if (forward_address != nullptr && to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } @@ -552,7 +555,7 @@ mirror::Object* SemiSpace::MarkNonForwardedObject(mirror::Object* obj) { // If it's still null, attempt to use the fallback space. if (UNLIKELY(forward_address == nullptr)) { forward_address = fallback_space_->AllocThreadUnsafe(self_, object_size, &bytes_allocated, - nullptr); + nullptr, &dummy); CHECK(forward_address != nullptr) << "Out of memory in the to-space and fallback space."; accounting::ContinuousSpaceBitmap* bitmap = fallback_space_->GetLiveBitmap(); if (bitmap != nullptr) { diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h index b8c24521a2..b770096671 100644 --- a/runtime/gc/heap-inl.h +++ b/runtime/gc/heap-inl.h @@ -64,6 +64,7 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas // fragmentation. } AllocationTimer alloc_timer(this, &obj); + // bytes allocated for the (individual) object. size_t bytes_allocated; size_t usable_size; size_t new_num_bytes_allocated = 0; @@ -86,13 +87,29 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas usable_size = bytes_allocated; pre_fence_visitor(obj, usable_size); QuasiAtomic::ThreadFenceForConstructor(); + } else if (!kInstrumented && allocator == kAllocatorTypeRosAlloc && + (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) && + LIKELY(obj != nullptr)) { + DCHECK(!running_on_valgrind_); + obj->SetClass(klass); + if (kUseBakerOrBrooksReadBarrier) { + if (kUseBrooksReadBarrier) { + obj->SetReadBarrierPointer(obj); + } + obj->AssertReadBarrierPointer(); + } + usable_size = bytes_allocated; + pre_fence_visitor(obj, usable_size); + QuasiAtomic::ThreadFenceForConstructor(); } else { + // bytes allocated that takes bulk thread-local buffer allocations into account. + size_t bytes_tl_bulk_allocated = 0; obj = TryToAllocate<kInstrumented, false>(self, allocator, byte_count, &bytes_allocated, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); if (UNLIKELY(obj == nullptr)) { bool is_current_allocator = allocator == GetCurrentAllocator(); obj = AllocateInternalWithGc(self, allocator, byte_count, &bytes_allocated, &usable_size, - &klass); + &bytes_tl_bulk_allocated, &klass); if (obj == nullptr) { bool after_is_current_allocator = allocator == GetCurrentAllocator(); // If there is a pending exception, fail the allocation right away since the next one @@ -126,9 +143,9 @@ inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self, mirror::Clas WriteBarrierField(obj, mirror::Object::ClassOffset(), klass); } pre_fence_visitor(obj, usable_size); - new_num_bytes_allocated = - static_cast<size_t>(num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_allocated)) - + bytes_allocated; + new_num_bytes_allocated = static_cast<size_t>( + num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated)) + + bytes_tl_bulk_allocated; } if (kIsDebugBuild && Runtime::Current()->IsStarted()) { CHECK_LE(obj->SizeOf(), usable_size); @@ -196,8 +213,10 @@ inline mirror::Object* Heap::AllocLargeObject(Thread* self, mirror::Class** klas template <const bool kInstrumented, const bool kGrow> inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { if (allocator_type != kAllocatorTypeTLAB && allocator_type != kAllocatorTypeRegionTLAB && + allocator_type != kAllocatorTypeRosAlloc && UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { return nullptr; } @@ -210,35 +229,56 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (LIKELY(ret != nullptr)) { *bytes_allocated = alloc_size; *usable_size = alloc_size; + *bytes_tl_bulk_allocated = alloc_size; } break; } case kAllocatorTypeRosAlloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = rosalloc_space_->MaxBytesBulkAllocatedFor(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + ret = rosalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + size_t max_bytes_tl_bulk_allocated = + rosalloc_space_->MaxBytesBulkAllocatedForNonvirtual(alloc_size); + if (UNLIKELY(IsOutOfMemoryOnAllocation<kGrow>(allocator_type, + max_bytes_tl_bulk_allocated))) { + return nullptr; + } + if (!kInstrumented) { + DCHECK(!rosalloc_space_->CanAllocThreadLocal(self, alloc_size)); + } + ret = rosalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeDlMalloc: { if (kInstrumented && UNLIKELY(running_on_valgrind_)) { // If running on valgrind, we should be using the instrumented path. - ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(!running_on_valgrind_); - ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size); + ret = dlmalloc_space_->AllocNonvirtual(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } break; } case kAllocatorTypeNonMoving: { - ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = non_moving_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeLOS: { - ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size); + ret = large_object_space_->Alloc(self, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Note that the bump pointer spaces aren't necessarily next to // the other continuous spaces like the non-moving alloc space or // the zygote space. @@ -257,20 +297,22 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator if (!bump_pointer_space_->AllocNewTlab(self, new_tlab_size)) { return nullptr; } - *bytes_allocated = new_tlab_size; + *bytes_tl_bulk_allocated = new_tlab_size; } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } case kAllocatorTypeRegion: { DCHECK(region_space_ != nullptr); alloc_size = RoundUp(alloc_size, space::RegionSpace::kAlignment); - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); break; } case kAllocatorTypeRegionTLAB: { @@ -283,15 +325,17 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator // Try to allocate a tlab. if (!region_space_->AllocNewTlab(self)) { // Failed to allocate a tlab. Try non-tlab. - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } - *bytes_allocated = space::RegionSpace::kRegionSize; + *bytes_tl_bulk_allocated = space::RegionSpace::kRegionSize; // Fall-through. } else { // Check OOME for a non-tlab allocation. if (!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size)) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { // Neither tlab or non-tlab works. Give up. @@ -301,18 +345,20 @@ inline mirror::Object* Heap::TryToAllocate(Thread* self, AllocatorType allocator } else { // Large. Check OOME. if (LIKELY(!IsOutOfMemoryOnAllocation<kGrow>(allocator_type, alloc_size))) { - ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size); + ret = region_space_->AllocNonvirtual<false>(alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); return ret; } else { return nullptr; } } } else { - *bytes_allocated = 0; + *bytes_tl_bulk_allocated = 0; // Allocated in an existing buffer. } // The allocation can't fail. ret = self->AllocTlab(alloc_size); DCHECK(ret != nullptr); + *bytes_allocated = alloc_size; *usable_size = alloc_size; break; } diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 7534515a8a..9421db5139 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -156,6 +156,7 @@ Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max total_objects_freed_ever_(0), num_bytes_allocated_(0), native_bytes_allocated_(0), + num_bytes_freed_revoke_(0), verify_missing_card_marks_(false), verify_system_weaks_(false), verify_pre_gc_heap_(verify_pre_gc_heap), @@ -1344,6 +1345,19 @@ void Heap::RecordFree(uint64_t freed_objects, int64_t freed_bytes) { } } +void Heap::RecordFreeRevoke() { + // Subtract num_bytes_freed_revoke_ from num_bytes_allocated_ to cancel out the + // the ahead-of-time, bulk counting of bytes allocated in rosalloc thread-local buffers. + // If there's a concurrent revoke, ok to not necessarily reset num_bytes_freed_revoke_ + // all the way to zero exactly as the remainder will be subtracted at the next GC. + size_t bytes_freed = num_bytes_freed_revoke_.LoadSequentiallyConsistent(); + CHECK_GE(num_bytes_freed_revoke_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_freed_revoke_ underflow"; + CHECK_GE(num_bytes_allocated_.FetchAndSubSequentiallyConsistent(bytes_freed), + bytes_freed) << "num_bytes_allocated_ underflow"; + GetCurrentGcIteration()->SetFreedRevoke(bytes_freed); +} + space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) const { for (const auto& space : continuous_spaces_) { if (space->AsContinuousSpace()->IsRosAllocSpace()) { @@ -1358,6 +1372,7 @@ space::RosAllocSpace* Heap::GetRosAllocSpace(gc::allocator::RosAlloc* rosalloc) mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t alloc_size, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) { bool was_default_allocator = allocator == GetCurrentAllocator(); // Make sure there is no pending exception since we may need to throw an OOME. @@ -1377,7 +1392,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } // A GC was in progress and we blocked, retry allocation now that memory has been freed. mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1391,7 +1406,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } if (gc_ran) { mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1411,7 +1426,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (plan_gc_ran) { // Did we free sufficient memory for the allocation to succeed? mirror::Object* ptr = TryToAllocate<true, false>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1420,7 +1435,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat // Allocations have failed after GCs; this is an exceptional state. // Try harder, growing the heap if necessary. mirror::Object* ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { return ptr; } @@ -1437,7 +1452,8 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat if (was_default_allocator && allocator != GetCurrentAllocator()) { return nullptr; } - ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size); + ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (ptr == nullptr) { const uint64_t current_time = NanoTime(); switch (allocator) { @@ -1453,7 +1469,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat case HomogeneousSpaceCompactResult::kSuccess: // If the allocation succeeded, we delayed an oom. ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); if (ptr != nullptr) { count_delayed_oom_++; } @@ -1498,7 +1514,7 @@ mirror::Object* Heap::AllocateInternalWithGc(Thread* self, AllocatorType allocat } else { LOG(WARNING) << "Disabled moving GC due to the non moving space being full"; ptr = TryToAllocate<true, true>(self, allocator, alloc_size, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); } } break; @@ -1984,8 +2000,8 @@ class ZygoteCompactingCollector FINAL : public collector::SemiSpace { if (it == bins_.end()) { // No available space in the bins, place it in the target space instead (grows the zygote // space). - size_t bytes_allocated; - forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr); + size_t bytes_allocated, dummy; + forward_address = to_space_->Alloc(self_, object_size, &bytes_allocated, nullptr, &dummy); if (to_space_live_bitmap_ != nullptr) { to_space_live_bitmap_->Set(forward_address); } else { @@ -2048,8 +2064,6 @@ void Heap::PreZygoteFork() { non_moving_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE); const bool same_space = non_moving_space_ == main_space_; if (kCompactZygote) { - // Can't compact if the non moving space is the same as the main space. - DCHECK(semi_space_collector_ != nullptr); // Temporarily disable rosalloc verification because the zygote // compaction will mess up the rosalloc internal metadata. ScopedDisableRosAllocVerification disable_rosalloc_verif(this); @@ -2068,6 +2082,8 @@ void Heap::PreZygoteFork() { } } else { CHECK(main_space_ != nullptr); + CHECK_NE(main_space_, non_moving_space_) + << "Does not make sense to compact within the same space"; // Copy from the main space. zygote_collector.SetFromSpace(main_space_); reset_main_space = true; @@ -3084,7 +3100,8 @@ void Heap::GrowForUtilization(collector::GarbageCollector* collector_ran, SetIdealFootprint(target_size); if (IsGcConcurrent()) { const uint64_t freed_bytes = current_gc_iteration_.GetFreedBytes() + - current_gc_iteration_.GetFreedLargeObjectBytes(); + current_gc_iteration_.GetFreedLargeObjectBytes() + + current_gc_iteration_.GetFreedRevokeBytes(); // Bytes allocated will shrink by freed_bytes after the GC runs, so if we want to figure out // how many bytes were allocated during the GC we need to add freed_bytes back on. CHECK_GE(bytes_allocated + freed_bytes, bytes_allocated_before_gc); @@ -3290,31 +3307,43 @@ void Heap::RequestTrim(Thread* self) { void Heap::RevokeThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(bump_pointer_space_->RevokeThreadLocalBuffers(thread), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeThreadLocalBuffers(thread); + CHECK_EQ(region_space_->RevokeThreadLocalBuffers(thread), 0U); } } void Heap::RevokeRosAllocThreadLocalBuffers(Thread* thread) { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeThreadLocalBuffers(thread); + size_t freed_bytes_revoke = rosalloc_space_->RevokeThreadLocalBuffers(thread); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } } void Heap::RevokeAllThreadLocalBuffers() { if (rosalloc_space_ != nullptr) { - rosalloc_space_->RevokeAllThreadLocalBuffers(); + size_t freed_bytes_revoke = rosalloc_space_->RevokeAllThreadLocalBuffers(); + if (freed_bytes_revoke > 0U) { + num_bytes_freed_revoke_.FetchAndAddSequentiallyConsistent(freed_bytes_revoke); + CHECK_GE(num_bytes_allocated_.LoadRelaxed(), num_bytes_freed_revoke_.LoadRelaxed()); + } } if (bump_pointer_space_ != nullptr) { - bump_pointer_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(bump_pointer_space_->RevokeAllThreadLocalBuffers(), 0U); } if (region_space_ != nullptr) { - region_space_->RevokeAllThreadLocalBuffers(); + CHECK_EQ(region_space_->RevokeAllThreadLocalBuffers(), 0U); } } @@ -3355,6 +3384,8 @@ void Heap::RegisterNativeAllocation(JNIEnv* env, size_t bytes) { // Just finished a GC, attempt to run finalizers. RunFinalization(env); CHECK(!env->ExceptionCheck()); + // Native bytes allocated may be updated by finalization, refresh it. + new_native_bytes_allocated = native_bytes_allocated_.LoadRelaxed(); } // If we still are over the watermark, attempt a GC for alloc and run finalizers. if (new_native_bytes_allocated > growth_limit_) { diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h index d41e17fb75..959ff18516 100644 --- a/runtime/gc/heap.h +++ b/runtime/gc/heap.h @@ -390,6 +390,9 @@ class Heap { // free-list backed space. void RecordFree(uint64_t freed_objects, int64_t freed_bytes); + // Record the bytes freed by thread-local buffer revoke. + void RecordFreeRevoke(); + // Must be called if a field of an Object in the heap changes, and before any GC safe-point. // The call is not needed if NULL is stored in the field. ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst, MemberOffset /*offset*/, @@ -664,6 +667,11 @@ class Heap { // Whether or not we may use a garbage collector, used so that we only create collectors we need. bool MayUseCollector(CollectorType type) const; + // Used by tests to reduce timinig-dependent flakiness in OOME behavior. + void SetMinIntervalHomogeneousSpaceCompactionByOom(uint64_t interval) { + min_interval_homogeneous_space_compaction_by_oom_ = interval; + } + private: class ConcurrentGCTask; class CollectorTransitionTask; @@ -724,6 +732,7 @@ class Heap { // an initial allocation attempt failed. mirror::Object* AllocateInternalWithGc(Thread* self, AllocatorType allocator, size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated, mirror::Class** klass) LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -742,7 +751,8 @@ class Heap { template <const bool kInstrumented, const bool kGrow> ALWAYS_INLINE mirror::Object* TryToAllocate(Thread* self, AllocatorType allocator_type, size_t alloc_size, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); void ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) @@ -998,6 +1008,13 @@ class Heap { // Bytes which are allocated and managed by native code but still need to be accounted for. Atomic<size_t> native_bytes_allocated_; + // Number of bytes freed by thread local buffer revokes. This will + // cancel out the ahead-of-time bulk counting of bytes allocated in + // rosalloc thread-local buffers. It is temporarily accumulated + // here to be subtracted from num_bytes_allocated_ later at the next + // GC. + Atomic<size_t> num_bytes_freed_revoke_; + // Info related to the current or previous GC iteration. collector::Iteration current_gc_iteration_; diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h index 9f1f9533d0..14a93d1611 100644 --- a/runtime/gc/space/bump_pointer_space-inl.h +++ b/runtime/gc/space/bump_pointer_space-inl.h @@ -24,7 +24,8 @@ namespace gc { namespace space { inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); mirror::Object* ret = AllocNonvirtual(num_bytes); if (LIKELY(ret != nullptr)) { @@ -32,13 +33,15 @@ inline mirror::Object* BumpPointerSpace::Alloc(Thread*, size_t num_bytes, size_t if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; } return ret; } inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); num_bytes = RoundUp(num_bytes, kAlignment); uint8_t* end = end_.LoadRelaxed(); @@ -54,6 +57,7 @@ inline mirror::Object* BumpPointerSpace::AllocThreadUnsafe(Thread* self, size_t if (UNLIKELY(usable_size != nullptr)) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return obj; } diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc index fbfc4495e0..1303d7729e 100644 --- a/runtime/gc/space/bump_pointer_space.cc +++ b/runtime/gc/space/bump_pointer_space.cc @@ -93,12 +93,13 @@ mirror::Object* BumpPointerSpace::GetNextObject(mirror::Object* obj) { return reinterpret_cast<mirror::Object*>(RoundUp(position, kAlignment)); } -void BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t BumpPointerSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), block_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } -void BumpPointerSpace::RevokeAllThreadLocalBuffers() { +size_t BumpPointerSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -107,6 +108,7 @@ void BumpPointerSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void BumpPointerSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h index 089ede4453..c496a422e0 100644 --- a/runtime/gc/space/bump_pointer_space.h +++ b/runtime/gc/space/bump_pointer_space.h @@ -47,10 +47,10 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(size_t num_bytes); @@ -103,9 +103,9 @@ class BumpPointerSpace FINAL : public ContinuousMemMapAllocSpace { void Dump(std::ostream& os) const; - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(block_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(block_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h index 4c8a35e0f7..9eace897e6 100644 --- a/runtime/gc/space/dlmalloc_space-inl.h +++ b/runtime/gc/space/dlmalloc_space-inl.h @@ -27,11 +27,13 @@ namespace space { inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* obj; { MutexLock mu(self, lock_); - obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + obj = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != NULL)) { // Zero freshly allocated memory, done while not holding the space's lock. @@ -49,9 +51,11 @@ inline size_t DlMallocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ return size + kChunkOverhead; } -inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t num_bytes, - size_t* bytes_allocated, - size_t* usable_size) { +inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked( + Thread* /*self*/, size_t num_bytes, + size_t* bytes_allocated, + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { @@ -61,6 +65,7 @@ inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(Thread* /*self*/, size_t allocation_size = AllocationSizeNonvirtual(result, usable_size); DCHECK(bytes_allocated != NULL); *bytes_allocated = allocation_size; + *bytes_tl_bulk_allocated = allocation_size; } return result; } diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc index b8a9dd6639..225861db60 100644 --- a/runtime/gc/space/dlmalloc_space.cc +++ b/runtime/gc/space/dlmalloc_space.cc @@ -123,7 +123,8 @@ void* DlMallocSpace::CreateMspace(void* begin, size_t morecore_start, size_t ini } mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -131,7 +132,8 @@ mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); mspace_set_footprint_limit(mspace_, max_allowed); // Try the allocation. - result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size); + result = AllocWithoutGrowthLocked(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = mspace_footprint(mspace_); mspace_set_footprint_limit(mspace_, footprint); diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h index 6ce138c235..1f80f1fd6b 100644 --- a/runtime/gc/space/dlmalloc_space.h +++ b/runtime/gc/space/dlmalloc_space.h @@ -48,11 +48,15 @@ class DlMallocSpace : public MallocSpace { // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); // Virtual to allow ValgrindMallocSpace to intercept. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_) { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_) { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } // Virtual to allow ValgrindMallocSpace to intercept. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { @@ -67,15 +71,22 @@ class DlMallocSpace : public MallocSpace { LOCKS_EXCLUDED(lock_) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return num_bytes; + } + // DlMallocSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } // Faster non-virtual allocation path. mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + LOCKS_EXCLUDED(lock_); // Faster non-virtual allocation size path. size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size); @@ -134,7 +145,8 @@ class DlMallocSpace : public MallocSpace { private: mirror::Object* AllocWithoutGrowthLocked(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(lock_); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc index 7523de58bf..5c8e4b9299 100644 --- a/runtime/gc/space/large_object_space.cc +++ b/runtime/gc/space/large_object_space.cc @@ -38,10 +38,11 @@ class ValgrindLargeObjectMapSpace FINAL : public LargeObjectMapSpace { } virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE { mirror::Object* obj = LargeObjectMapSpace::Alloc(self, num_bytes + kValgrindRedZoneBytes * 2, bytes_allocated, - usable_size); + usable_size, bytes_tl_bulk_allocated); mirror::Object* object_without_rdz = reinterpret_cast<mirror::Object*>( reinterpret_cast<uintptr_t>(obj) + kValgrindRedZoneBytes); VALGRIND_MAKE_MEM_NOACCESS(reinterpret_cast<void*>(obj), kValgrindRedZoneBytes); @@ -108,7 +109,8 @@ LargeObjectMapSpace* LargeObjectMapSpace::Create(const std::string& name) { } mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { std::string error_msg; MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", nullptr, num_bytes, PROT_READ | PROT_WRITE, true, false, &error_msg); @@ -131,6 +133,8 @@ mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; num_bytes_allocated_ += allocation_size; total_bytes_allocated_ += allocation_size; ++num_objects_allocated_; @@ -413,7 +417,7 @@ size_t FreeListSpace::AllocationSize(mirror::Object* obj, size_t* usable_size) { } mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { MutexLock mu(self, lock_); const size_t allocation_size = RoundUp(num_bytes, kAlignment); AllocationInfo temp_info; @@ -451,6 +455,8 @@ mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* byt if (usable_size != nullptr) { *usable_size = allocation_size; } + DCHECK(bytes_tl_bulk_allocated != nullptr); + *bytes_tl_bulk_allocated = allocation_size; // Need to do these inside of the lock. ++num_objects_allocated_; ++total_objects_allocated_; diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h index 847f575815..d1f9386d09 100644 --- a/runtime/gc/space/large_object_space.h +++ b/runtime/gc/space/large_object_space.h @@ -62,9 +62,11 @@ class LargeObjectSpace : public DiscontinuousSpace, public AllocSpace { } size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // LargeObjectSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } bool IsAllocSpace() const OVERRIDE { return true; @@ -124,7 +126,7 @@ class LargeObjectMapSpace : public LargeObjectSpace { // Return the storage space required by obj. size_t AllocationSize(mirror::Object* obj, size_t* usable_size); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); size_t Free(Thread* self, mirror::Object* ptr); void Walk(DlMallocSpace::WalkCallback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); // TODO: disabling thread safety analysis as this may be called when we already hold lock_. @@ -153,7 +155,7 @@ class FreeListSpace FINAL : public LargeObjectSpace { size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t Free(Thread* self, mirror::Object* obj) OVERRIDE; void Walk(DlMallocSpace::WalkCallback callback, void* arg) OVERRIDE LOCKS_EXCLUDED(lock_); void Dump(std::ostream& os) const; diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc index e17bad8a14..a261663ec7 100644 --- a/runtime/gc/space/large_object_space_test.cc +++ b/runtime/gc/space/large_object_space_test.cc @@ -49,11 +49,13 @@ void LargeObjectSpaceTest::LargeObjectTest() { while (requests.size() < num_allocations) { size_t request_size = test_rand(&rand_seed) % max_allocation_size; size_t allocation_size = 0; + size_t bytes_tl_bulk_allocated; mirror::Object* obj = los->Alloc(Thread::Current(), request_size, &allocation_size, - nullptr); + nullptr, &bytes_tl_bulk_allocated); ASSERT_TRUE(obj != nullptr); ASSERT_EQ(allocation_size, los->AllocationSize(obj, nullptr)); ASSERT_GE(allocation_size, request_size); + ASSERT_EQ(allocation_size, bytes_tl_bulk_allocated); // Fill in our magic value. uint8_t magic = (request_size & 0xFF) | 1; memset(obj, magic, request_size); @@ -83,9 +85,10 @@ void LargeObjectSpaceTest::LargeObjectTest() { // Test that dump doesn't crash. los->Dump(LOG(INFO)); - size_t bytes_allocated = 0; + size_t bytes_allocated = 0, bytes_tl_bulk_allocated; // Checks that the coalescing works. - mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr); + mirror::Object* obj = los->Alloc(Thread::Current(), 100 * MB, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated); EXPECT_TRUE(obj != nullptr); los->Free(Thread::Current(), obj); @@ -102,8 +105,9 @@ class AllocRaceTask : public Task { void Run(Thread* self) { for (size_t i = 0; i < iterations_ ; ++i) { - size_t alloc_size; - mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr); + size_t alloc_size, bytes_tl_bulk_allocated; + mirror::Object* ptr = los_->Alloc(self, size_, &alloc_size, nullptr, + &bytes_tl_bulk_allocated); NanoSleep((id_ + 3) * 1000); // (3+id) mu s diff --git a/runtime/gc/space/malloc_space.h b/runtime/gc/space/malloc_space.h index 06239e5e73..bbf1bbbdbd 100644 --- a/runtime/gc/space/malloc_space.h +++ b/runtime/gc/space/malloc_space.h @@ -55,10 +55,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { // Allocate num_bytes allowing the underlying space to grow. virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) = 0; // Allocate num_bytes without allowing the underlying space to grow. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Return the storage space required by obj. If usable_size isn't nullptr then it is set to the // amount of the storage space that may be used by obj. virtual size_t AllocationSize(mirror::Object* obj, size_t* usable_size) = 0; @@ -67,6 +68,11 @@ class MallocSpace : public ContinuousMemMapAllocSpace { virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) = 0; + // Returns the maximum bytes that could be allocated for the given + // size in bulk, that is the maximum value for the + // bytes_allocated_bulk out param returned by MallocSpace::Alloc(). + virtual size_t MaxBytesBulkAllocatedFor(size_t num_bytes) = 0; + #ifndef NDEBUG virtual void CheckMoreCoreForPrecondition() {} // to be overridden in the debug build. #else diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h index a4ed7187c0..1cdf69dbe5 100644 --- a/runtime/gc/space/region_space-inl.h +++ b/runtime/gc/space/region_space-inl.h @@ -24,30 +24,36 @@ namespace gc { namespace space { inline mirror::Object* RegionSpace::Alloc(Thread*, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { num_bytes = RoundUp(num_bytes, kAlignment); - return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size); + return AllocNonvirtual<false>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } inline mirror::Object* RegionSpace::AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { Locks::mutator_lock_->AssertExclusiveHeld(self); - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } template<bool kForEvac> inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); mirror::Object* obj; if (LIKELY(num_bytes <= kRegionSize)) { // Non-large object. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { DCHECK(evac_region_ != nullptr); - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -55,9 +61,11 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by MutexLock mu(Thread::Current(), region_lock_); // Retry with current region since another thread may have updated it. if (!kForEvac) { - obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = current_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } else { - obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size); + obj = evac_region_->Alloc(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } if (LIKELY(obj != nullptr)) { return obj; @@ -73,7 +81,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by r->Unfree(time_); r->SetNewlyAllocated(); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); current_region_ = r; return obj; @@ -85,7 +93,7 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by if (r->IsFree()) { r->Unfree(time_); ++num_non_free_regions_; - obj = r->Alloc(num_bytes, bytes_allocated, usable_size); + obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); CHECK(obj != nullptr); evac_region_ = r; return obj; @@ -94,7 +102,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } } else { // Large object. - obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size); + obj = AllocLarge<kForEvac>(num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (LIKELY(obj != nullptr)) { return obj; } @@ -103,7 +112,8 @@ inline mirror::Object* RegionSpace::AllocNonvirtual(size_t num_bytes, size_t* by } inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAllocated() && IsInToSpace()); DCHECK(IsAligned<kAlignment>(num_bytes)); Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_); @@ -124,6 +134,7 @@ inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes, size_t* byte if (usable_size != nullptr) { *usable_size = num_bytes; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(old_top); } @@ -253,7 +264,8 @@ inline mirror::Object* RegionSpace::GetNextObject(mirror::Object* obj) { template<bool kForEvac> mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { DCHECK(IsAligned<kAlignment>(num_bytes)); DCHECK_GT(num_bytes, kRegionSize); size_t num_regs = RoundUp(num_bytes, kRegionSize) / kRegionSize; @@ -300,6 +312,7 @@ mirror::Object* RegionSpace::AllocLarge(size_t num_bytes, size_t* bytes_allocate if (usable_size != nullptr) { *usable_size = num_regs * kRegionSize; } + *bytes_tl_bulk_allocated = num_bytes; return reinterpret_cast<mirror::Object*>(first_reg->Begin()); } else { // right points to the non-free region. Start with the one after it. diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc index 8bb73d614c..814ab6ce92 100644 --- a/runtime/gc/space/region_space.cc +++ b/runtime/gc/space/region_space.cc @@ -76,7 +76,7 @@ RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map) current_region_ = &full_region_; evac_region_ = nullptr; size_t ignored; - DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr) == nullptr); + DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr, &ignored) == nullptr); } size_t RegionSpace::FromSpaceSize() { @@ -356,9 +356,10 @@ bool RegionSpace::AllocNewTlab(Thread* self) { return false; } -void RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { +size_t RegionSpace::RevokeThreadLocalBuffers(Thread* thread) { MutexLock mu(Thread::Current(), region_lock_); RevokeThreadLocalBuffersLocked(thread); + return 0U; } void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { @@ -377,7 +378,7 @@ void RegionSpace::RevokeThreadLocalBuffersLocked(Thread* thread) { thread->SetTlab(nullptr, nullptr); } -void RegionSpace::RevokeAllThreadLocalBuffers() { +size_t RegionSpace::RevokeAllThreadLocalBuffers() { Thread* self = Thread::Current(); MutexLock mu(self, *Locks::runtime_shutdown_lock_); MutexLock mu2(self, *Locks::thread_list_lock_); @@ -385,6 +386,7 @@ void RegionSpace::RevokeAllThreadLocalBuffers() { for (Thread* thread : thread_list) { RevokeThreadLocalBuffers(thread); } + return 0U; } void RegionSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h index 416054716c..b88ce24114 100644 --- a/runtime/gc/space/region_space.h +++ b/runtime/gc/space/region_space.h @@ -42,18 +42,20 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { // Allocate num_bytes, returns nullptr if the space is full. mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); // The main allocation routine. template<bool kForEvac> ALWAYS_INLINE mirror::Object* AllocNonvirtual(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); // Allocate/free large objects (objects that are larger than the region size.) template<bool kForEvac> - mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size); + mirror::Object* AllocLarge(size_t num_bytes, size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated); void FreeLarge(mirror::Object* large_obj, size_t bytes_allocated); // Return the storage space required by obj. @@ -87,10 +89,10 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { void DumpRegions(std::ostream& os); void DumpNonFreeRegions(std::ostream& os); - void RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); + size_t RevokeThreadLocalBuffers(Thread* thread) LOCKS_EXCLUDED(region_lock_); void RevokeThreadLocalBuffersLocked(Thread* thread) EXCLUSIVE_LOCKS_REQUIRED(region_lock_); - void RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, - Locks::thread_list_lock_); + size_t RevokeAllThreadLocalBuffers() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, + Locks::thread_list_lock_); void AssertThreadLocalBuffersAreRevoked(Thread* thread) LOCKS_EXCLUDED(region_lock_); void AssertAllThreadLocalBuffersAreRevoked() LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_, Locks::thread_list_lock_); @@ -269,7 +271,8 @@ class RegionSpace FINAL : public ContinuousMemMapAllocSpace { } ALWAYS_INLINE mirror::Object* Alloc(size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, + size_t* bytes_tl_bulk_allocated); bool IsFree() const { bool is_free = state_ == RegionState::kRegionStateFree; diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h index 5d6642d349..9d582a3f86 100644 --- a/runtime/gc/space/rosalloc_space-inl.h +++ b/runtime/gc/space/rosalloc_space-inl.h @@ -26,13 +26,19 @@ namespace art { namespace gc { namespace space { +template<bool kMaybeRunningOnValgrind> inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) { // obj is a valid object. Use its class in the header to get the size. // Don't use verification since the object may be dead if we are sweeping. size_t size = obj->SizeOf<kVerifyNone>(); - bool running_on_valgrind = RUNNING_ON_VALGRIND != 0; - if (running_on_valgrind) { - size += 2 * kDefaultValgrindRedZoneBytes; + bool running_on_valgrind = false; + if (kMaybeRunningOnValgrind) { + running_on_valgrind = RUNNING_ON_VALGRIND != 0; + if (running_on_valgrind) { + size += 2 * kDefaultValgrindRedZoneBytes; + } + } else { + DCHECK_EQ(RUNNING_ON_VALGRIND, 0U); } size_t size_by_size = rosalloc_->UsableSize(size); if (kIsDebugBuild) { @@ -55,28 +61,50 @@ inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_ template<bool kThreadSafe> inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { - size_t rosalloc_size = 0; + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { + size_t rosalloc_bytes_allocated = 0; + size_t rosalloc_usable_size = 0; + size_t rosalloc_bytes_tl_bulk_allocated = 0; if (!kThreadSafe) { Locks::mutator_lock_->AssertExclusiveHeld(self); } mirror::Object* result = reinterpret_cast<mirror::Object*>( - rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_size)); + rosalloc_->Alloc<kThreadSafe>(self, num_bytes, &rosalloc_bytes_allocated, + &rosalloc_usable_size, + &rosalloc_bytes_tl_bulk_allocated)); if (LIKELY(result != NULL)) { if (kDebugSpaces) { CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result) << ") not in bounds of allocation space " << *this; } DCHECK(bytes_allocated != NULL); - *bytes_allocated = rosalloc_size; - DCHECK_EQ(rosalloc_size, rosalloc_->UsableSize(result)); + *bytes_allocated = rosalloc_bytes_allocated; + DCHECK_EQ(rosalloc_usable_size, rosalloc_->UsableSize(result)); if (usable_size != nullptr) { - *usable_size = rosalloc_size; + *usable_size = rosalloc_usable_size; } + DCHECK(bytes_tl_bulk_allocated != NULL); + *bytes_tl_bulk_allocated = rosalloc_bytes_tl_bulk_allocated; } return result; } +inline bool RosAllocSpace::CanAllocThreadLocal(Thread* self, size_t num_bytes) { + return rosalloc_->CanAllocFromThreadLocalRun(self, num_bytes); +} + +inline mirror::Object* RosAllocSpace::AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated) { + DCHECK(bytes_allocated != nullptr); + return reinterpret_cast<mirror::Object*>( + rosalloc_->AllocFromThreadLocalRun(self, num_bytes, bytes_allocated)); +} + +inline size_t RosAllocSpace::MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes) { + return rosalloc_->MaxBytesBulkAllocatedFor(num_bytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc index ced25a40bb..f140021f76 100644 --- a/runtime/gc/space/rosalloc_space.cc +++ b/runtime/gc/space/rosalloc_space.cc @@ -154,7 +154,8 @@ allocator::RosAlloc* RosAllocSpace::CreateRosAlloc(void* begin, size_t morecore_ } mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { mirror::Object* result; { MutexLock mu(self, lock_); @@ -162,7 +163,8 @@ mirror::Object* RosAllocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t max_allowed = Capacity(); rosalloc_->SetFootprintLimit(max_allowed); // Try the allocation. - result = AllocCommon(self, num_bytes, bytes_allocated, usable_size); + result = AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); // Shrink back down as small as possible. size_t footprint = rosalloc_->Footprint(); rosalloc_->SetFootprintLimit(footprint); @@ -209,7 +211,7 @@ size_t RosAllocSpace::FreeList(Thread* self, size_t num_ptrs, mirror::Object** p __builtin_prefetch(reinterpret_cast<char*>(ptrs[i + kPrefetchLookAhead])); } if (kVerifyFreedBytes) { - verify_bytes += AllocationSizeNonvirtual(ptrs[i], nullptr); + verify_bytes += AllocationSizeNonvirtual<true>(ptrs[i], nullptr); } } @@ -338,12 +340,12 @@ void RosAllocSpace::InspectAllRosAlloc(void (*callback)(void *start, void *end, } } -void RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { - rosalloc_->RevokeThreadLocalRuns(thread); +size_t RosAllocSpace::RevokeThreadLocalBuffers(Thread* thread) { + return rosalloc_->RevokeThreadLocalRuns(thread); } -void RosAllocSpace::RevokeAllThreadLocalBuffers() { - rosalloc_->RevokeAllThreadLocalRuns(); +size_t RosAllocSpace::RevokeAllThreadLocalBuffers() { + return rosalloc_->RevokeAllThreadLocalRuns(); } void RosAllocSpace::AssertThreadLocalBuffersAreRevoked(Thread* thread) { diff --git a/runtime/gc/space/rosalloc_space.h b/runtime/gc/space/rosalloc_space.h index c856e9560a..36268f76f8 100644 --- a/runtime/gc/space/rosalloc_space.h +++ b/runtime/gc/space/rosalloc_space.h @@ -47,18 +47,21 @@ class RosAllocSpace : public MallocSpace { bool low_memory_mode, bool can_move_objects); mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE LOCKS_EXCLUDED(lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE LOCKS_EXCLUDED(lock_); mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE { - return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE { + return AllocNonvirtual(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size); + return AllocNonvirtualThreadUnsafe(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE { - return AllocationSizeNonvirtual(obj, usable_size); + return AllocationSizeNonvirtual<true>(obj, usable_size); } size_t Free(Thread* self, mirror::Object* ptr) OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); @@ -66,17 +69,33 @@ class RosAllocSpace : public MallocSpace { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) { + size_t* usable_size, size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. - return AllocCommon(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } mirror::Object* AllocNonvirtualThreadUnsafe(Thread* self, size_t num_bytes, - size_t* bytes_allocated, size_t* usable_size) { + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) { // RosAlloc zeroes memory internally. Pass in false for thread unsafe. - return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size); + return AllocCommon<false>(self, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); } + // Returns true if the given allocation request can be allocated in + // an existing thread local run without allocating a new run. + ALWAYS_INLINE bool CanAllocThreadLocal(Thread* self, size_t num_bytes); + // Allocate the given allocation request in an existing thread local + // run without allocating a new run. + ALWAYS_INLINE mirror::Object* AllocThreadLocal(Thread* self, size_t num_bytes, + size_t* bytes_allocated); + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE { + return MaxBytesBulkAllocatedForNonvirtual(num_bytes); + } + ALWAYS_INLINE size_t MaxBytesBulkAllocatedForNonvirtual(size_t num_bytes); + // TODO: NO_THREAD_SAFETY_ANALYSIS because SizeOf() requires that mutator_lock is held. + template<bool kMaybeRunningOnValgrind> size_t AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) NO_THREAD_SAFETY_ANALYSIS; @@ -99,8 +118,8 @@ class RosAllocSpace : public MallocSpace { uint64_t GetBytesAllocated() OVERRIDE; uint64_t GetObjectsAllocated() OVERRIDE; - void RevokeThreadLocalBuffers(Thread* thread); - void RevokeAllThreadLocalBuffers(); + size_t RevokeThreadLocalBuffers(Thread* thread); + size_t RevokeAllThreadLocalBuffers(); void AssertThreadLocalBuffersAreRevoked(Thread* thread); void AssertAllThreadLocalBuffersAreRevoked(); @@ -134,7 +153,7 @@ class RosAllocSpace : public MallocSpace { private: template<bool kThreadSafe = true> mirror::Object* AllocCommon(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size); + size_t* usable_size, size_t* bytes_tl_bulk_allocated); void* CreateAllocator(void* base, size_t morecore_start, size_t initial_size, size_t maximum_size, bool low_memory_mode) OVERRIDE { diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h index d24650b60d..f2378d9ff0 100644 --- a/runtime/gc/space/space.h +++ b/runtime/gc/space/space.h @@ -203,14 +203,24 @@ class AllocSpace { // succeeds, the output parameter bytes_allocated will be set to the // actually allocated bytes which is >= num_bytes. // Alloc can be called from multiple threads at the same time and must be thread-safe. + // + // bytes_tl_bulk_allocated - bytes allocated in bulk ahead of time for a thread local allocation, + // if applicable. It can be + // 1) equal to bytes_allocated if it's not a thread local allocation, + // 2) greater than bytes_allocated if it's a thread local + // allocation that required a new buffer, or + // 3) zero if it's a thread local allocation in an existing + // buffer. + // This is what is to be added to Heap::num_bytes_allocated_. virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) = 0; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) = 0; // Thread-unsafe allocation for when mutators are suspended, used by the semispace collector. virtual mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) + size_t* usable_size, + size_t* bytes_tl_bulk_allocated) EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_) { - return Alloc(self, num_bytes, bytes_allocated, usable_size); + return Alloc(self, num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated); } // Return the storage space required by obj. @@ -224,11 +234,15 @@ class AllocSpace { // Revoke any sort of thread-local buffers that are used to speed up allocations for the given // thread, if the alloc space implementation uses any. - virtual void RevokeThreadLocalBuffers(Thread* thread) = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeThreadLocalBuffers(Thread* thread) = 0; // Revoke any sort of thread-local buffers that are used to speed up allocations for all the // threads, if the alloc space implementation uses any. - virtual void RevokeAllThreadLocalBuffers() = 0; + // Returns the total free bytes in the revoked thread local runs that's to be subtracted + // from Heap::num_bytes_allocated_ or zero if unnecessary. + virtual size_t RevokeAllThreadLocalBuffers() = 0; virtual void LogFragmentationAllocFailure(std::ostream& os, size_t failed_alloc_bytes) = 0; diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h index 09d10dd94b..3e9e9f7a49 100644 --- a/runtime/gc/space/space_test.h +++ b/runtime/gc/space/space_test.h @@ -61,11 +61,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* Alloc(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->Alloc(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -73,11 +75,13 @@ class SpaceTest : public CommonRuntimeTest { } mirror::Object* AllocWithGrowth(space::MallocSpace* alloc_space, Thread* self, size_t bytes, - size_t* bytes_allocated, size_t* usable_size) + size_t* bytes_allocated, size_t* usable_size, + size_t* bytes_tl_bulk_allocated) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { StackHandleScope<1> hs(self); Handle<mirror::Class> byte_array_class(hs.NewHandle(GetByteArrayClass(self))); - mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size); + mirror::Object* obj = alloc_space->AllocWithGrowth(self, bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated); if (obj != nullptr) { InstallClass(obj, byte_array_class.Get(), bytes); } @@ -182,34 +186,38 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { ScopedObjectAccess soa(self); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = space->Alloc(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -219,13 +227,15 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -233,7 +243,7 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { EXPECT_LE(1U * MB, free1); // Make sure that the zygote space isn't directly at the start of the space. - EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr) != nullptr); + EXPECT_TRUE(space->Alloc(self, 1U * MB, &dummy, nullptr, &dummy) != nullptr); gc::Heap* heap = Runtime::Current()->GetHeap(); space::Space* old_space = space; @@ -250,22 +260,26 @@ void SpaceTest::ZygoteSpaceTestBody(CreateSpaceFn create_space) { AddSpace(space, false); // Succeeds, fits without adjusting the footprint limit. - ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size)); + ptr1.Assign(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size)); + ptr3.Assign(AllocWithGrowth(space, self, 2 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated)); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(2U * MB, ptr3_bytes_allocated); EXPECT_LE(2U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); space->Free(self, ptr3.Assign(nullptr)); // Final clean up. @@ -285,34 +299,38 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { AddSpace(space); // Succeeds, fits without adjusting the footprint limit. - size_t ptr1_bytes_allocated, ptr1_usable_size; + size_t ptr1_bytes_allocated, ptr1_usable_size, ptr1_bytes_tl_bulk_allocated; StackHandleScope<3> hs(soa.Self()); MutableHandle<mirror::Object> ptr1( - hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size))); + hs.NewHandle(Alloc(space, self, 1 * MB, &ptr1_bytes_allocated, &ptr1_usable_size, + &ptr1_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr1.Get() != nullptr); EXPECT_LE(1U * MB, ptr1_bytes_allocated); EXPECT_LE(1U * MB, ptr1_usable_size); EXPECT_LE(ptr1_usable_size, ptr1_bytes_allocated); + EXPECT_EQ(ptr1_bytes_tl_bulk_allocated, ptr1_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr2 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr2 == nullptr); // Succeeds, adjusts the footprint. - size_t ptr3_bytes_allocated, ptr3_usable_size; + size_t ptr3_bytes_allocated, ptr3_usable_size, ptr3_bytes_tl_bulk_allocated; MutableHandle<mirror::Object> ptr3( - hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 8 * MB, &ptr3_bytes_allocated, &ptr3_usable_size, + &ptr3_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr3.Get() != nullptr); EXPECT_LE(8U * MB, ptr3_bytes_allocated); EXPECT_LE(8U * MB, ptr3_usable_size); EXPECT_LE(ptr3_usable_size, ptr3_bytes_allocated); + EXPECT_EQ(ptr3_bytes_tl_bulk_allocated, ptr3_bytes_allocated); // Fails, requires a higher footprint limit. - mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr4 = Alloc(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr4 == nullptr); // Also fails, requires a higher allowed footprint. - mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr); + mirror::Object* ptr5 = AllocWithGrowth(space, self, 8 * MB, &dummy, nullptr, &dummy); EXPECT_TRUE(ptr5 == nullptr); // Release some memory. @@ -322,13 +340,15 @@ void SpaceTest::AllocAndFreeTestBody(CreateSpaceFn create_space) { EXPECT_LE(8U * MB, free3); // Succeeds, now that memory has been freed. - size_t ptr6_bytes_allocated, ptr6_usable_size; + size_t ptr6_bytes_allocated, ptr6_usable_size, ptr6_bytes_tl_bulk_allocated; Handle<mirror::Object> ptr6( - hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size))); + hs.NewHandle(AllocWithGrowth(space, self, 9 * MB, &ptr6_bytes_allocated, &ptr6_usable_size, + &ptr6_bytes_tl_bulk_allocated))); EXPECT_TRUE(ptr6.Get() != nullptr); EXPECT_LE(9U * MB, ptr6_bytes_allocated); EXPECT_LE(9U * MB, ptr6_usable_size); EXPECT_LE(ptr6_usable_size, ptr6_bytes_allocated); + EXPECT_EQ(ptr6_bytes_tl_bulk_allocated, ptr6_bytes_allocated); // Final clean up. size_t free1 = space->AllocationSize(ptr1.Get(), nullptr); @@ -348,14 +368,16 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits without adjusting the max allowed footprint. mirror::Object* lots_of_objects[1024]; for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; size_t size_of_zero_length_byte_array = SizeOfZeroLengthByteArray(); lots_of_objects[i] = Alloc(space, self, size_of_zero_length_byte_array, &allocation_size, - &usable_size); + &usable_size, &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -363,12 +385,15 @@ void SpaceTest::AllocAndFreeListTestBody(CreateSpaceFn create_space) { // Succeeds, fits by adjusting the max allowed footprint. for (size_t i = 0; i < arraysize(lots_of_objects); i++) { - size_t allocation_size, usable_size; - lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size); + size_t allocation_size, usable_size, bytes_tl_bulk_allocated; + lots_of_objects[i] = AllocWithGrowth(space, self, 1024, &allocation_size, &usable_size, + &bytes_tl_bulk_allocated); EXPECT_TRUE(lots_of_objects[i] != nullptr); size_t computed_usable_size; EXPECT_EQ(allocation_size, space->AllocationSize(lots_of_objects[i], &computed_usable_size)); EXPECT_EQ(usable_size, computed_usable_size); + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); } // Release memory. @@ -425,10 +450,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t StackHandleScope<1> hs(soa.Self()); auto object(hs.NewHandle<mirror::Object>(nullptr)); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(Alloc(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { - object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr)); + object.Assign(AllocWithGrowth(space, self, alloc_size, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } footprint = space->GetFootprint(); EXPECT_GE(space->Size(), footprint); // invariant @@ -441,6 +469,8 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t } else { EXPECT_GE(allocation_size, 8u); } + EXPECT_TRUE(bytes_tl_bulk_allocated == 0 || + bytes_tl_bulk_allocated >= allocation_size); amount_allocated += allocation_size; break; } @@ -518,11 +548,13 @@ void SpaceTest::SizeFootPrintGrowthLimitAndTrimBody(MallocSpace* space, intptr_t auto large_object(hs.NewHandle<mirror::Object>(nullptr)); size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4); size_t bytes_allocated = 0; + size_t bytes_tl_bulk_allocated; if (round <= 1) { - large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr)); + large_object.Assign(Alloc(space, self, three_quarters_space, &bytes_allocated, nullptr, + &bytes_tl_bulk_allocated)); } else { large_object.Assign(AllocWithGrowth(space, self, three_quarters_space, &bytes_allocated, - nullptr)); + nullptr, &bytes_tl_bulk_allocated)); } EXPECT_TRUE(large_object.Get() != nullptr); diff --git a/runtime/gc/space/valgrind_malloc_space-inl.h b/runtime/gc/space/valgrind_malloc_space-inl.h index ae8e892e29..bc329e129c 100644 --- a/runtime/gc/space/valgrind_malloc_space-inl.h +++ b/runtime/gc/space/valgrind_malloc_space-inl.h @@ -32,10 +32,15 @@ namespace valgrind_details { template <size_t kValgrindRedZoneBytes, bool kUseObjSizeForUsable> inline mirror::Object* AdjustForValgrind(void* obj_with_rdz, size_t num_bytes, size_t bytes_allocated, size_t usable_size, - size_t* bytes_allocated_out, size_t* usable_size_out) { + size_t bytes_tl_bulk_allocated, + size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { if (bytes_allocated_out != nullptr) { *bytes_allocated_out = bytes_allocated; } + if (bytes_tl_bulk_allocated_out != nullptr) { + *bytes_tl_bulk_allocated_out = bytes_tl_bulk_allocated; + } // This cuts over-provision and is a trade-off between testing the over-provisioning code paths // vs checking overflows in the regular paths. @@ -82,20 +87,25 @@ ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocWithGrowth( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -106,11 +116,13 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::Alloc( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } @@ -118,8 +130,10 @@ mirror::Object* ValgrindMallocSpace<S, return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>(obj_with_rdz, num_bytes, bytes_allocated, usable_size, + bytes_tl_bulk_allocated, bytes_allocated_out, - usable_size_out); + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -130,20 +144,25 @@ mirror::Object* ValgrindMallocSpace<S, kValgrindRedZoneBytes, kAdjustForRedzoneInAllocSize, kUseObjSizeForUsable>::AllocThreadUnsafe( - Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out) { + Thread* self, size_t num_bytes, size_t* bytes_allocated_out, size_t* usable_size_out, + size_t* bytes_tl_bulk_allocated_out) { size_t bytes_allocated; size_t usable_size; + size_t bytes_tl_bulk_allocated; void* obj_with_rdz = S::AllocThreadUnsafe(self, num_bytes + 2 * kValgrindRedZoneBytes, - &bytes_allocated, &usable_size); + &bytes_allocated, &usable_size, + &bytes_tl_bulk_allocated); if (obj_with_rdz == nullptr) { return nullptr; } - return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, - kUseObjSizeForUsable>(obj_with_rdz, num_bytes, - bytes_allocated, usable_size, - bytes_allocated_out, - usable_size_out); + return valgrind_details::AdjustForValgrind<kValgrindRedZoneBytes, kUseObjSizeForUsable>( + obj_with_rdz, num_bytes, + bytes_allocated, usable_size, + bytes_tl_bulk_allocated, + bytes_allocated_out, + usable_size_out, + bytes_tl_bulk_allocated_out); } template <typename S, @@ -226,6 +245,17 @@ ValgrindMallocSpace<S, mem_map->Size() - initial_size); } +template <typename S, + size_t kValgrindRedZoneBytes, + bool kAdjustForRedzoneInAllocSize, + bool kUseObjSizeForUsable> +size_t ValgrindMallocSpace<S, + kValgrindRedZoneBytes, + kAdjustForRedzoneInAllocSize, + kUseObjSizeForUsable>::MaxBytesBulkAllocatedFor(size_t num_bytes) { + return S::MaxBytesBulkAllocatedFor(num_bytes + 2 * kValgrindRedZoneBytes); +} + } // namespace space } // namespace gc } // namespace art diff --git a/runtime/gc/space/valgrind_malloc_space.h b/runtime/gc/space/valgrind_malloc_space.h index 707ea69a20..a6b010a2a1 100644 --- a/runtime/gc/space/valgrind_malloc_space.h +++ b/runtime/gc/space/valgrind_malloc_space.h @@ -34,12 +34,13 @@ template <typename BaseMallocSpaceType, class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { public: mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE; mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; mirror::Object* AllocThreadUnsafe(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE - EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); + size_t* usable_size, size_t* bytes_tl_bulk_allocated) + OVERRIDE EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_); size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -53,6 +54,8 @@ class ValgrindMallocSpace FINAL : public BaseMallocSpaceType { UNUSED(ptr); } + size_t MaxBytesBulkAllocatedFor(size_t num_bytes) OVERRIDE; + template <typename... Params> explicit ValgrindMallocSpace(MemMap* mem_map, size_t initial_size, Params... params); virtual ~ValgrindMallocSpace() {} diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc index a868e6831d..9e882a898e 100644 --- a/runtime/gc/space/zygote_space.cc +++ b/runtime/gc/space/zygote_space.cc @@ -77,7 +77,7 @@ void ZygoteSpace::Dump(std::ostream& os) const { << ",name=\"" << GetName() << "\"]"; } -mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*) { +mirror::Object* ZygoteSpace::Alloc(Thread*, size_t, size_t*, size_t*, size_t*) { UNIMPLEMENTED(FATAL); UNREACHABLE(); } diff --git a/runtime/gc/space/zygote_space.h b/runtime/gc/space/zygote_space.h index 0cf4bb139c..934a234345 100644 --- a/runtime/gc/space/zygote_space.h +++ b/runtime/gc/space/zygote_space.h @@ -46,7 +46,7 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { } mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated, - size_t* usable_size) OVERRIDE; + size_t* usable_size, size_t* bytes_tl_bulk_allocated) OVERRIDE; size_t AllocationSize(mirror::Object* obj, size_t* usable_size) OVERRIDE; @@ -55,9 +55,11 @@ class ZygoteSpace FINAL : public ContinuousMemMapAllocSpace { size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs) OVERRIDE; // ZygoteSpaces don't have thread local state. - void RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + size_t RevokeThreadLocalBuffers(art::Thread*) OVERRIDE { + return 0U; } - void RevokeAllThreadLocalBuffers() OVERRIDE { + size_t RevokeAllThreadLocalBuffers() OVERRIDE { + return 0U; } uint64_t GetBytesAllocated() { diff --git a/runtime/memory_region.h b/runtime/memory_region.h index b3820be26c..939a1a9212 100644 --- a/runtime/memory_region.h +++ b/runtime/memory_region.h @@ -23,6 +23,7 @@ #include "base/macros.h" #include "base/value_object.h" #include "globals.h" +#include "utils.h" namespace art { @@ -45,14 +46,64 @@ class MemoryRegion FINAL : public ValueObject { uint8_t* start() const { return reinterpret_cast<uint8_t*>(pointer_); } uint8_t* end() const { return start() + size_; } + // Load value of type `T` at `offset`. The memory address corresponding + // to `offset` should be word-aligned. template<typename T> T Load(uintptr_t offset) const { + // TODO: DCHECK that the address is word-aligned. return *ComputeInternalPointer<T>(offset); } + // Store `value` (of type `T`) at `offset`. The memory address + // corresponding to `offset` should be word-aligned. template<typename T> void Store(uintptr_t offset, T value) const { + // TODO: DCHECK that the address is word-aligned. *ComputeInternalPointer<T>(offset) = value; } + // TODO: Local hack to prevent name clashes between two conflicting + // implementations of bit_cast: + // - art::bit_cast<Destination, Source> runtime/base/casts.h, and + // - art::bit_cast<Source, Destination> from runtime/utils.h. + // Remove this when these routines have been merged. + template<typename Source, typename Destination> + static Destination local_bit_cast(Source in) { + static_assert(sizeof(Source) <= sizeof(Destination), + "Size of Source not <= size of Destination"); + union { + Source u; + Destination v; + } tmp; + tmp.u = in; + return tmp.v; + } + + // Load value of type `T` at `offset`. The memory address corresponding + // to `offset` does not need to be word-aligned. + template<typename T> T LoadUnaligned(uintptr_t offset) const { + // Equivalent unsigned integer type corresponding to T. + typedef typename UnsignedIntegerType<sizeof(T)>::type U; + U equivalent_unsigned_integer_value = 0; + // Read the value byte by byte in a little-endian fashion. + for (size_t i = 0; i < sizeof(U); ++i) { + equivalent_unsigned_integer_value += + *ComputeInternalPointer<uint8_t>(offset + i) << (i * kBitsPerByte); + } + return local_bit_cast<U, T>(equivalent_unsigned_integer_value); + } + + // Store `value` (of type `T`) at `offset`. The memory address + // corresponding to `offset` does not need to be word-aligned. + template<typename T> void StoreUnaligned(uintptr_t offset, T value) const { + // Equivalent unsigned integer type corresponding to T. + typedef typename UnsignedIntegerType<sizeof(T)>::type U; + U equivalent_unsigned_integer_value = local_bit_cast<T, U>(value); + // Write the value byte by byte in a little-endian fashion. + for (size_t i = 0; i < sizeof(U); ++i) { + *ComputeInternalPointer<uint8_t>(offset + i) = + (equivalent_unsigned_integer_value >> (i * kBitsPerByte)) & 0xFF; + } + } + template<typename T> T* PointerTo(uintptr_t offset) const { return ComputeInternalPointer<T>(offset); } diff --git a/runtime/memory_region_test.cc b/runtime/memory_region_test.cc new file mode 100644 index 0000000000..72e03a485a --- /dev/null +++ b/runtime/memory_region_test.cc @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memory_region.h" + +#include "gtest/gtest.h" + +namespace art { + +TEST(MemoryRegion, LoadUnaligned) { + const size_t n = 8; + uint8_t data[n] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + MemoryRegion region(&data, n); + + ASSERT_EQ(0, region.LoadUnaligned<char>(0)); + ASSERT_EQ(1u + + (2u << kBitsPerByte) + + (3u << 2 * kBitsPerByte) + + (4u << 3 * kBitsPerByte), + region.LoadUnaligned<uint32_t>(1)); + ASSERT_EQ(5 + (6 << kBitsPerByte), region.LoadUnaligned<int16_t>(5)); + ASSERT_EQ(7u, region.LoadUnaligned<unsigned char>(7)); +} + +TEST(MemoryRegion, StoreUnaligned) { + const size_t n = 8; + uint8_t data[n] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + MemoryRegion region(&data, n); + + region.StoreUnaligned<unsigned char>(0u, 7); + region.StoreUnaligned<int16_t>(1, 6 + (5 << kBitsPerByte)); + region.StoreUnaligned<uint32_t>(3, + 4u + + (3u << kBitsPerByte) + + (2u << 2 * kBitsPerByte) + + (1u << 3 * kBitsPerByte)); + region.StoreUnaligned<char>(7, 0); + + uint8_t expected[n] = { 7, 6, 5, 4, 3, 2, 1, 0 }; + for (size_t i = 0; i < n; ++i) { + ASSERT_EQ(expected[i], data[i]); + } +} + +} // namespace art diff --git a/runtime/primitive.h b/runtime/primitive.h index 9dda144755..2d6b6b30c7 100644 --- a/runtime/primitive.h +++ b/runtime/primitive.h @@ -165,6 +165,10 @@ class Primitive { } } + static bool IsIntOrLongType(Type type) { + return type == kPrimInt || type == kPrimLong; + } + static bool Is64BitType(Type type) { return type == kPrimLong || type == kPrimDouble; } diff --git a/runtime/stack.cc b/runtime/stack.cc index 48becf688f..e420c57346 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -204,29 +204,32 @@ bool StackVisitor::GetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, DCHECK(code_item != nullptr) << PrettyMethod(m); // Can't be NULL or how would we compile // its instructions? DCHECK_LT(vreg, code_item->registers_size_); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, - code_item->registers_size_); - DexRegisterMap::LocationKind location_kind = dex_register_map.GetLocationKind(vreg); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, code_item->registers_size_); + DexRegisterLocation::Kind location_kind = dex_register_map.GetLocationKind(vreg); switch (location_kind) { - case DexRegisterMap::kInStack: { + case DexRegisterLocation::Kind::kInStack: { const int32_t offset = dex_register_map.GetStackOffsetInBytes(vreg); const uint8_t* addr = reinterpret_cast<const uint8_t*>(cur_quick_frame_) + offset; *val = *reinterpret_cast<const uint32_t*>(addr); return true; } - case DexRegisterMap::kInRegister: - case DexRegisterMap::kInFpuRegister: { + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: { uint32_t reg = dex_register_map.GetMachineRegister(vreg); return GetRegisterIfAccessible(reg, kind, val); } - case DexRegisterMap::kConstant: + case DexRegisterLocation::Kind::kConstant: *val = dex_register_map.GetConstant(vreg); return true; - case DexRegisterMap::kNone: + case DexRegisterLocation::Kind::kNone: return false; + default: + LOG(FATAL) + << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(dex_register_map.GetLocationInternalKind(vreg)); + UNREACHABLE(); } - UNREACHABLE(); - return false; } bool StackVisitor::GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const { @@ -386,29 +389,29 @@ bool StackVisitor::SetVRegFromOptimizedCode(mirror::ArtMethod* m, uint16_t vreg, DCHECK(code_item != nullptr) << PrettyMethod(m); // Can't be NULL or how would we compile // its instructions? DCHECK_LT(vreg, code_item->registers_size_); - DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map, - code_item->registers_size_); - DexRegisterMap::LocationKind location_kind = dex_register_map.GetLocationKind(vreg); + DexRegisterMap dex_register_map = + code_info.GetDexRegisterMapOf(stack_map, code_item->registers_size_); + DexRegisterLocation::Kind location_kind = dex_register_map.GetLocationKind(vreg); uint32_t dex_pc = m->ToDexPc(cur_quick_frame_pc_, false); switch (location_kind) { - case DexRegisterMap::kInStack: { + case DexRegisterLocation::Kind::kInStack: { const int32_t offset = dex_register_map.GetStackOffsetInBytes(vreg); uint8_t* addr = reinterpret_cast<uint8_t*>(cur_quick_frame_) + offset; *reinterpret_cast<uint32_t*>(addr) = new_value; return true; } - case DexRegisterMap::kInRegister: - case DexRegisterMap::kInFpuRegister: { + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: { uint32_t reg = dex_register_map.GetMachineRegister(vreg); return SetRegisterIfAccessible(reg, new_value, kind); } - case DexRegisterMap::kConstant: + case DexRegisterLocation::Kind::kConstant: LOG(ERROR) << StringPrintf("Cannot change value of DEX register v%u used as a constant at " "DEX pc 0x%x (native pc 0x%x) of method %s", vreg, dex_pc, native_pc_offset, PrettyMethod(cur_quick_frame_->AsMirrorPtr()).c_str()); return false; - case DexRegisterMap::kNone: + case DexRegisterLocation::Kind::kNone: LOG(ERROR) << StringPrintf("No location for DEX register v%u at DEX pc 0x%x " "(native pc 0x%x) of method %s", vreg, dex_pc, native_pc_offset, diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 6d996722b4..c98162306c 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -23,6 +23,11 @@ namespace art { +// Size of a frame slot, in bytes. This constant is a signed value, +// to please the compiler in arithmetic operations involving int32_t +// (signed) values. +static ssize_t constexpr kFrameSlotSize = 4; + /** * Classes in the following file are wrapper on stack map information backed * by a MemoryRegion. As such they read and write to the region, they don't have @@ -58,6 +63,8 @@ class InlineInfo { } private: + // TODO: Instead of plain types such as "uint8_t", introduce + // typedefs (and document the memory layout of InlineInfo). static constexpr int kDepthOffset = 0; static constexpr int kFixedSize = kDepthOffset + sizeof(uint8_t); @@ -68,82 +75,327 @@ class InlineInfo { friend class StackMapStream; }; +// Dex register location container used by DexRegisterMap and StackMapStream. +class DexRegisterLocation { + public: + /* + * The location kind used to populate the Dex register information in a + * StackMapStream can either be: + * - kNone: the register has no location yet, meaning it has not been set; + * - kConstant: value holds the constant; + * - kStack: value holds the stack offset; + * - kRegister: value holds the physical register number; + * - kFpuRegister: value holds the physical register number. + * + * In addition, DexRegisterMap also uses these values: + * - kInStackLargeOffset: value holds a "large" stack offset (greater than + * 128 bytes); + * - kConstantLargeValue: value holds a "large" constant (lower than or + * equal to -16, or greater than 16). + */ + enum class Kind : uint8_t { + // Short location kinds, for entries fitting on one byte (3 bits + // for the kind, 5 bits for the value) in a DexRegisterMap. + kNone = 0, // 0b000 + kInStack = 1, // 0b001 + kInRegister = 2, // 0b010 + kInFpuRegister = 3, // 0b011 + kConstant = 4, // 0b100 + + // Large location kinds, requiring a 5-byte encoding (1 byte for the + // kind, 4 bytes for the value). + + // Stack location at a large offset, meaning that the offset value + // divided by the stack frame slot size (4 bytes) cannot fit on a + // 5-bit unsigned integer (i.e., this offset value is greater than + // or equal to 2^5 * 4 = 128 bytes). + kInStackLargeOffset = 5, // 0b101 + + // Large constant, that cannot fit on a 5-bit signed integer (i.e., + // lower than -2^(5-1) = -16, or greater than or equal to + // 2^(5-1) - 1 = 15). + kConstantLargeValue = 6, // 0b110 + + kLastLocationKind = kConstantLargeValue + }; + + static_assert( + sizeof(Kind) == 1u, + "art::DexRegisterLocation::Kind has a size different from one byte."); + + static const char* PrettyDescriptor(Kind kind) { + switch (kind) { + case Kind::kNone: + return "none"; + case Kind::kInStack: + return "in stack"; + case Kind::kInRegister: + return "in register"; + case Kind::kInFpuRegister: + return "in fpu register"; + case Kind::kConstant: + return "as constant"; + case Kind::kInStackLargeOffset: + return "in stack (large offset)"; + case Kind::kConstantLargeValue: + return "as constant (large value)"; + default: + UNREACHABLE(); + } + } + + static bool IsShortLocationKind(Kind kind) { + switch (kind) { + case Kind::kNone: + case Kind::kInStack: + case Kind::kInRegister: + case Kind::kInFpuRegister: + case Kind::kConstant: + return true; + + case Kind::kInStackLargeOffset: + case Kind::kConstantLargeValue: + return false; + + default: + UNREACHABLE(); + } + } + + // Convert `kind` to a "surface" kind, i.e. one that doesn't include + // any value with a "large" qualifier. + // TODO: Introduce another enum type for the surface kind? + static Kind ConvertToSurfaceKind(Kind kind) { + switch (kind) { + case Kind::kNone: + case Kind::kInStack: + case Kind::kInRegister: + case Kind::kInFpuRegister: + case Kind::kConstant: + return kind; + + case Kind::kInStackLargeOffset: + return Kind::kInStack; + + case Kind::kConstantLargeValue: + return Kind::kConstant; + + default: + UNREACHABLE(); + } + } + + DexRegisterLocation(Kind kind, int32_t value) + : kind_(kind), value_(value) {} + + // Get the "surface" kind of the location, i.e., the one that doesn't + // include any value with a "large" qualifier. + Kind GetKind() const { + return ConvertToSurfaceKind(kind_); + } + + // Get the value of the location. + int32_t GetValue() const { return value_; } + + // Get the actual kind of the location. + Kind GetInternalKind() const { return kind_; } + + private: + Kind kind_; + int32_t value_; +}; + /** * Information on dex register values for a specific PC. The information is * of the form: * [location_kind, register_value]+. - * - * The location_kind for a Dex register can either be: - * - kConstant: register_value holds the constant, - * - kStack: register_value holds the stack offset, - * - kRegister: register_value holds the physical register number. - * - kFpuRegister: register_value holds the physical register number. - * - kNone: the register has no location yet, meaning it has not been set. + * either on 1 or 5 bytes (see art::DexRegisterLocation::Kind). */ class DexRegisterMap { public: explicit DexRegisterMap(MemoryRegion region) : region_(region) {} - enum LocationKind { - kNone, - kInStack, - kInRegister, - kInFpuRegister, - kConstant - }; + // Short (compressed) location, fitting on one byte. + typedef uint8_t ShortLocation; + + void SetRegisterInfo(size_t offset, const DexRegisterLocation& dex_register_location) { + DexRegisterLocation::Kind kind = ComputeCompressedKind(dex_register_location); + int32_t value = dex_register_location.GetValue(); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Compress the kind and the value as a single byte. + if (kind == DexRegisterLocation::Kind::kInStack) { + // Instead of storing stack offsets expressed in bytes for + // short stack locations, store slot offsets. A stack offset + // is a multiple of 4 (kFrameSlotSize). This means that by + // dividing it by 4, we can fit values from the [0, 128) + // interval in a short stack location, and not just values + // from the [0, 32) interval. + DCHECK_EQ(value % kFrameSlotSize, 0); + value /= kFrameSlotSize; + } + DCHECK(IsUint<kValueBits>(value)) << value; + region_.StoreUnaligned<ShortLocation>(offset, MakeShortLocation(kind, value)); + } else { + // Large location. Write the location on one byte and the value + // on 4 bytes. + DCHECK(!IsUint<kValueBits>(value)) << value; + if (kind == DexRegisterLocation::Kind::kInStackLargeOffset) { + // Also divide large stack offsets by 4 for the sake of consistency. + DCHECK_EQ(value % kFrameSlotSize, 0); + value /= kFrameSlotSize; + } + // Data can be unaligned as the written Dex register locations can + // either be 1-byte or 5-byte wide. Use + // art::MemoryRegion::StoreUnaligned instead of + // art::MemoryRegion::Store to prevent unligned word accesses on ARM. + region_.StoreUnaligned<DexRegisterLocation::Kind>(offset, kind); + region_.StoreUnaligned<int32_t>(offset + sizeof(DexRegisterLocation::Kind), value); + } + } - static const char* PrettyDescriptor(LocationKind kind) { - switch (kind) { - case kNone: - return "none"; - case kInStack: - return "in stack"; - case kInRegister: - return "in register"; - case kInFpuRegister: - return "in fpu register"; - case kConstant: - return "as constant"; + // Find the offset of the Dex register location number `dex_register_index`. + size_t FindLocationOffset(uint16_t dex_register_index) const { + size_t offset = kFixedSize; + // Skip the first `dex_register_index - 1` entries. + for (uint16_t i = 0; i < dex_register_index; ++i) { + // Read the first next byte and inspect its first 3 bits to decide + // whether it is a short or a large location. + DexRegisterLocation::Kind kind = ExtractKindAtOffset(offset); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Skip the current byte. + offset += SingleShortEntrySize(); + } else { + // Large location. Skip the 5 next bytes. + offset += SingleLargeEntrySize(); + } } - UNREACHABLE(); - return nullptr; + return offset; } - LocationKind GetLocationKind(uint16_t register_index) const { - return region_.Load<LocationKind>( - kFixedSize + register_index * SingleEntrySize()); + // Get the surface kind. + DexRegisterLocation::Kind GetLocationKind(uint16_t dex_register_index) const { + return DexRegisterLocation::ConvertToSurfaceKind(GetLocationInternalKind(dex_register_index)); } - void SetRegisterInfo(uint16_t register_index, LocationKind kind, int32_t value) { - size_t entry = kFixedSize + register_index * SingleEntrySize(); - region_.Store<LocationKind>(entry, kind); - region_.Store<int32_t>(entry + sizeof(LocationKind), value); + // Get the internal kind. + DexRegisterLocation::Kind GetLocationInternalKind(uint16_t dex_register_index) const { + size_t offset = FindLocationOffset(dex_register_index); + return ExtractKindAtOffset(offset); } - int32_t GetValue(uint16_t register_index) const { - return region_.Load<int32_t>( - kFixedSize + sizeof(LocationKind) + register_index * SingleEntrySize()); + // TODO: Rename as GetDexRegisterLocation? + DexRegisterLocation GetLocationKindAndValue(uint16_t dex_register_index) const { + size_t offset = FindLocationOffset(dex_register_index); + // Read the first byte and inspect its first 3 bits to get the location. + ShortLocation first_byte = region_.LoadUnaligned<ShortLocation>(offset); + DexRegisterLocation::Kind kind = ExtractKindFromShortLocation(first_byte); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Extract the value from the remaining 5 bits. + int32_t value = ExtractValueFromShortLocation(first_byte); + if (kind == DexRegisterLocation::Kind::kInStack) { + // Convert the stack slot (short) offset to a byte offset value. + value *= kFrameSlotSize; + } + return DexRegisterLocation(kind, value); + } else { + // Large location. Read the four next bytes to get the value. + int32_t value = region_.LoadUnaligned<int32_t>(offset + sizeof(DexRegisterLocation::Kind)); + if (kind == DexRegisterLocation::Kind::kInStackLargeOffset) { + // Convert the stack slot (large) offset to a byte offset value. + value *= kFrameSlotSize; + } + return DexRegisterLocation(kind, value); + } } - int32_t GetStackOffsetInBytes(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kInStack); - // We currently encode the offset in bytes. - return GetValue(register_index); + int32_t GetStackOffsetInBytes(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetKind() == DexRegisterLocation::Kind::kInStack); + // GetLocationKindAndValue returns the offset in bytes. + return location.GetValue(); } - int32_t GetConstant(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kConstant); - return GetValue(register_index); + int32_t GetConstant(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetKind() == DexRegisterLocation::Kind::kConstant); + return location.GetValue(); } - int32_t GetMachineRegister(uint16_t register_index) const { - DCHECK(GetLocationKind(register_index) == kInRegister - || GetLocationKind(register_index) == kInFpuRegister); - return GetValue(register_index); + int32_t GetMachineRegister(uint16_t dex_register_index) const { + DexRegisterLocation location = GetLocationKindAndValue(dex_register_index); + DCHECK(location.GetInternalKind() == DexRegisterLocation::Kind::kInRegister + || location.GetInternalKind() == DexRegisterLocation::Kind::kInFpuRegister) + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); + return location.GetValue(); } - static size_t SingleEntrySize() { - return sizeof(LocationKind) + sizeof(int32_t); + // Compute the compressed kind of `location`. + static DexRegisterLocation::Kind ComputeCompressedKind(const DexRegisterLocation& location) { + switch (location.GetInternalKind()) { + case DexRegisterLocation::Kind::kNone: + DCHECK_EQ(location.GetValue(), 0); + return DexRegisterLocation::Kind::kNone; + + case DexRegisterLocation::Kind::kInRegister: + DCHECK_GE(location.GetValue(), 0); + DCHECK_LT(location.GetValue(), 1 << DexRegisterMap::kValueBits); + return DexRegisterLocation::Kind::kInRegister; + + case DexRegisterLocation::Kind::kInFpuRegister: + DCHECK_GE(location.GetValue(), 0); + DCHECK_LT(location.GetValue(), 1 << DexRegisterMap::kValueBits); + return DexRegisterLocation::Kind::kInFpuRegister; + + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + return IsUint<DexRegisterMap::kValueBits>(location.GetValue() / kFrameSlotSize) + ? DexRegisterLocation::Kind::kInStack + : DexRegisterLocation::Kind::kInStackLargeOffset; + + case DexRegisterLocation::Kind::kConstant: + return IsUint<DexRegisterMap::kValueBits>(location.GetValue()) + ? DexRegisterLocation::Kind::kConstant + : DexRegisterLocation::Kind::kConstantLargeValue; + + default: + LOG(FATAL) << "Unexpected location kind" + << DexRegisterLocation::PrettyDescriptor(location.GetInternalKind()); + UNREACHABLE(); + } + } + + // Can `location` be turned into a short location? + static bool CanBeEncodedAsShortLocation(const DexRegisterLocation& location) { + switch (location.GetInternalKind()) { + case DexRegisterLocation::Kind::kNone: + case DexRegisterLocation::Kind::kInRegister: + case DexRegisterLocation::Kind::kInFpuRegister: + return true; + + case DexRegisterLocation::Kind::kInStack: + DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0); + return IsUint<kValueBits>(location.GetValue() / kFrameSlotSize); + + case DexRegisterLocation::Kind::kConstant: + return IsUint<kValueBits>(location.GetValue()); + + default: + UNREACHABLE(); + } + } + + static size_t EntrySize(const DexRegisterLocation& location) { + return CanBeEncodedAsShortLocation(location) + ? DexRegisterMap::SingleShortEntrySize() + : DexRegisterMap::SingleLargeEntrySize(); + } + + static size_t SingleShortEntrySize() { + return sizeof(ShortLocation); + } + + static size_t SingleLargeEntrySize() { + return sizeof(DexRegisterLocation::Kind) + sizeof(int32_t); } size_t Size() const { @@ -153,7 +405,43 @@ class DexRegisterMap { static constexpr int kFixedSize = 0; private: + // Width of the kind "field" in a short location, in bits. + static constexpr size_t kKindBits = 3; + // Width of the value "field" in a short location, in bits. + static constexpr size_t kValueBits = 5; + + static constexpr uint8_t kKindMask = (1 << kKindBits) - 1; + static constexpr int32_t kValueMask = (1 << kValueBits) - 1; + static constexpr size_t kKindOffset = 0; + static constexpr size_t kValueOffset = kKindBits; + + static ShortLocation MakeShortLocation(DexRegisterLocation::Kind kind, int32_t value) { + DCHECK(IsUint<kKindBits>(static_cast<uint8_t>(kind))) << static_cast<uint8_t>(kind); + DCHECK(IsUint<kValueBits>(value)) << value; + return (static_cast<uint8_t>(kind) & kKindMask) << kKindOffset + | (value & kValueMask) << kValueOffset; + } + + static DexRegisterLocation::Kind ExtractKindFromShortLocation(ShortLocation location) { + uint8_t kind = (location >> kKindOffset) & kKindMask; + DCHECK_LE(kind, static_cast<uint8_t>(DexRegisterLocation::Kind::kLastLocationKind)); + return static_cast<DexRegisterLocation::Kind>(kind); + } + + static int32_t ExtractValueFromShortLocation(ShortLocation location) { + return (location >> kValueOffset) & kValueMask; + } + + // Extract a location kind from the byte at position `offset`. + DexRegisterLocation::Kind ExtractKindAtOffset(size_t offset) const { + ShortLocation first_byte = region_.LoadUnaligned<ShortLocation>(offset); + return ExtractKindFromShortLocation(first_byte); + } + MemoryRegion region_; + + friend class CodeInfo; + friend class StackMapStream; }; /** @@ -187,7 +475,7 @@ class StackMap { } void SetNativePcOffset(uint32_t native_pc_offset) { - return region_.Store<uint32_t>(kNativePcOffsetOffset, native_pc_offset); + region_.Store<uint32_t>(kNativePcOffsetOffset, native_pc_offset); } uint32_t GetDexRegisterMapOffset() const { @@ -195,7 +483,7 @@ class StackMap { } void SetDexRegisterMapOffset(uint32_t offset) { - return region_.Store<uint32_t>(kDexRegisterMapOffsetOffset, offset); + region_.Store<uint32_t>(kDexRegisterMapOffsetOffset, offset); } uint32_t GetInlineDescriptorOffset() const { @@ -203,7 +491,7 @@ class StackMap { } void SetInlineDescriptorOffset(uint32_t offset) { - return region_.Store<uint32_t>(kInlineDescriptorOffsetOffset, offset); + region_.Store<uint32_t>(kInlineDescriptorOffsetOffset, offset); } uint32_t GetRegisterMask() const { @@ -238,9 +526,9 @@ class StackMap { && region_.size() == other.region_.size(); } - static size_t ComputeAlignedStackMapSize(size_t stack_mask_size) { + static size_t ComputeAlignedStackMapSize(size_t stack_map_size) { // On ARM, the stack maps must be 4-byte aligned. - return RoundUp(StackMap::kFixedSize + stack_mask_size, 4); + return RoundUp(StackMap::kFixedSize + stack_map_size, 4); } // Special (invalid) offset for the DexRegisterMapOffset field meaning @@ -252,6 +540,8 @@ class StackMap { static constexpr uint32_t kNoInlineInfo = -1; private: + // TODO: Instead of plain types such as "uint32_t", introduce + // typedefs (and document the memory layout of StackMap). static constexpr int kDexPcOffset = 0; static constexpr int kNativePcOffsetOffset = kDexPcOffset + sizeof(uint32_t); static constexpr int kDexRegisterMapOffsetOffset = kNativePcOffsetOffset + sizeof(uint32_t); @@ -317,11 +607,15 @@ class CodeInfo { return StackMap::ComputeAlignedStackMapSize(GetStackMaskSize()); } + uint32_t GetStackMapsOffset() const { + return kFixedSize; + } + DexRegisterMap GetDexRegisterMapOf(StackMap stack_map, uint32_t number_of_dex_registers) const { DCHECK(stack_map.HasDexRegisterMap()); uint32_t offset = stack_map.GetDexRegisterMapOffset(); - return DexRegisterMap(region_.Subregion(offset, - DexRegisterMap::kFixedSize + number_of_dex_registers * DexRegisterMap::SingleEntrySize())); + size_t size = ComputeDexRegisterMapSize(offset, number_of_dex_registers); + return DexRegisterMap(region_.Subregion(offset, size)); } InlineInfo GetInlineInfoOf(StackMap stack_map) const { @@ -356,6 +650,8 @@ class CodeInfo { } private: + // TODO: Instead of plain types such as "uint32_t", introduce + // typedefs (and document the memory layout of CodeInfo). static constexpr int kOverallSizeOffset = 0; static constexpr int kNumberOfStackMapsOffset = kOverallSizeOffset + sizeof(uint32_t); static constexpr int kStackMaskSizeOffset = kNumberOfStackMapsOffset + sizeof(uint32_t); @@ -367,6 +663,33 @@ class CodeInfo { : region_.Subregion(kFixedSize, StackMapSize() * GetNumberOfStackMaps()); } + // Compute the size of a Dex register map starting at offset `origin` in + // `region_` and containing `number_of_dex_registers` locations. + size_t ComputeDexRegisterMapSize(uint32_t origin, uint32_t number_of_dex_registers) const { + // TODO: Ideally, we would like to use art::DexRegisterMap::Size or + // art::DexRegisterMap::FindLocationOffset, but the DexRegisterMap is not + // yet built. Try to factor common code. + size_t offset = origin + DexRegisterMap::kFixedSize; + // Skip the first `number_of_dex_registers - 1` entries. + for (uint16_t i = 0; i < number_of_dex_registers; ++i) { + // Read the first next byte and inspect its first 3 bits to decide + // whether it is a short or a large location. + DexRegisterMap::ShortLocation first_byte = + region_.LoadUnaligned<DexRegisterMap::ShortLocation>(offset); + DexRegisterLocation::Kind kind = + DexRegisterMap::ExtractKindFromShortLocation(first_byte); + if (DexRegisterLocation::IsShortLocationKind(kind)) { + // Short location. Skip the current byte. + offset += DexRegisterMap::SingleShortEntrySize(); + } else { + // Large location. Skip the 5 next bytes. + offset += DexRegisterMap::SingleLargeEntrySize(); + } + } + size_t size = offset - origin; + return size; + } + MemoryRegion region_; friend class StackMapStream; }; diff --git a/runtime/utils.h b/runtime/utils.h index d294f4b1a1..cd04c3ff2c 100644 --- a/runtime/utils.h +++ b/runtime/utils.h @@ -173,6 +173,24 @@ static inline uint32_t High32Bits(uint64_t value) { return static_cast<uint32_t>(value >> 32); } +// Traits class providing an unsigned integer type of (byte) size `n`. +template <size_t n> +struct UnsignedIntegerType { + // No defined `type`. +}; + +template <> +struct UnsignedIntegerType<1> { typedef uint8_t type; }; + +template <> +struct UnsignedIntegerType<2> { typedef uint16_t type; }; + +template <> +struct UnsignedIntegerType<4> { typedef uint32_t type; }; + +template <> +struct UnsignedIntegerType<8> { typedef uint64_t type; }; + // Type identity. template <typename T> struct TypeIdentity { @@ -271,6 +289,12 @@ static constexpr int CTZ(T x) { } template<typename T> +static inline int WhichPowerOf2(T x) { + DCHECK((x != 0) && IsPowerOfTwo(x)); + return CTZ(x); +} + +template<typename T> static constexpr int POPCOUNT(T x) { return (sizeof(T) == sizeof(uint32_t)) ? __builtin_popcount(x) |