diff options
author | 2019-08-14 17:16:46 +0100 | |
---|---|---|
committer | 2019-09-20 18:48:44 +0000 | |
commit | 1550a669adb7e9328879bed24d9edc22eb97c994 (patch) | |
tree | 80f1f230decc89af3397f258d3101a6cd0287338 | |
parent | 357d4db493467e4dd74b2ba1d4b8d7c80f8409b6 (diff) |
JIT mini-debug-info: Add support for zygote shared memory
Ensure that zygote's mini-debug-info is visible to apps.
Remove global seqlock synchronization.
It was replaced by entry-level synchronization.
Test: test.py -b -r --host --jit --64
Test: device boots
Bug: 119800099
Change-Id: I4885f9a4d44743d5608793a2e5d6453123d111f3
-rw-r--r-- | runtime/jit/debugger_interface.cc | 270 | ||||
-rw-r--r-- | runtime/jit/debugger_interface.h | 9 | ||||
-rw-r--r-- | runtime/jit/jit.cc | 4 |
3 files changed, 166 insertions, 117 deletions
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc index 24ca0fc1e1..0397ad8d99 100644 --- a/runtime/jit/debugger_interface.cc +++ b/runtime/jit/debugger_interface.cc @@ -46,40 +46,29 @@ // method, which is called after every modification of the linked list. // GDB does this, but it is complex to set up and it stops the process. // -// 2) Asynchronously, by monitoring the action_seqlock_. -// * The seqlock is a monotonically increasing counter which is incremented -// before and after every modification of the linked list. Odd value of -// the counter means the linked list is being modified (it is locked). -// * The tool should read the value of the seqlock both before and after -// copying the linked list. If the seqlock values match and are even, -// the copy is consistent. Otherwise, the reader should try again. -// * Note that using the data directly while is it being modified -// might crash the tool. Therefore, the only safe way is to make -// a copy and use the copy only after the seqlock has been checked. -// * Note that the process might even free and munmap the data while -// it is being copied, therefore the reader should either handle -// SEGV or use OS calls to read the memory (e.g. process_vm_readv). -// * The seqlock can be used to determine the number of modifications of -// the linked list, which can be used to intelligently cache the data. -// Note the possible overflow of the seqlock. It is intentionally -// 32-bit, since 64-bit atomics can be tricky on some architectures. -// * The timestamps on the entry record the time when the entry was -// created which is relevant if the unwinding is not live and is -// postponed until much later. All timestamps must be unique. -// * Memory barriers are used to make it possible to reason about -// the data even when it is being modified (e.g. the process crashed -// while that data was locked, and thus it will be never unlocked). -// * In particular, it should be possible to: -// 1) read the seqlock and then the linked list head pointer. -// 2) copy the entry and check that seqlock has not changed. -// 3) copy the symfile and check that seqlock has not changed. -// 4) go back to step 2 using the next pointer (if non-null). -// This safely creates copy of all symfiles, although other data -// might be inconsistent/unusable (e.g. prev_, action_timestamp_). -// * For full conformance with the C++ memory model, all seqlock -// protected accesses should be atomic. We currently do this in the -// more critical cases. The rest will have to be fixed before -// attempting to run TSAN on this code. +// 2) Asynchronously, using the entry seqlocks. +// * The seqlock is a monotonically increasing counter, which +// is even if the entry is valid and odd if it is invalid. +// It is set to even value after all other fields are set, +// and it is set to odd value before the entry is deleted. +// * This makes it possible to safely read the symfile data: +// * The reader should read the value of the seqlock both +// before and after reading the symfile. If the seqlock +// values match and are even the copy is consistent. +// * Entries are recycled, but never freed, which guarantees +// that the seqlock is not overwritten by a random value. +// * The linked-list is one level higher. The next-pointer +// must always point to an entry with even seqlock, which +// ensures that entries of a crashed process can be read. +// This means the entry must be added after it is created +// and it must be removed before it is invalidated (odd). +// * When iterating over the linked list the reader can use +// the timestamps to ensure that current and next entry +// were not deleted using the following steps: +// 1) Read next pointer and the next entry's seqlock. +// 2) Read the symfile and re-read the next pointer. +// 3) Re-read both the current and next seqlock. +// 4) Go to step 1 with using new entry and seqlock. // namespace art { @@ -87,6 +76,11 @@ namespace art { static Mutex g_jit_debug_lock("JIT native debug entries", kNativeDebugInterfaceLock); static Mutex g_dex_debug_lock("DEX native debug entries", kNativeDebugInterfaceLock); +// Most loads and stores need no synchronization since all memory is protected by the global locks. +// Some writes are synchronized so libunwindstack can read the memory safely from another process. +constexpr std::memory_order kNonRacingRelaxed = std::memory_order_relaxed; + +// Public binary interface between ART and native tools (gdb, libunwind, etc). extern "C" { enum JITAction { JIT_NOACTION = 0, @@ -96,15 +90,13 @@ extern "C" { // Public/stable binary interface. struct JITCodeEntryPublic { - // Atomic to ensure the reader can always iterate over the linked list - // (e.g. the process could crash in the middle of writing this field). - std::atomic<const JITCodeEntry*> next_; - const JITCodeEntry* prev_; // For linked list deletion. Unused in readers. - const uint8_t* symfile_addr_; // Address of the in-memory ELF file. - uint64_t symfile_size_; // Beware of the offset (12 on x86; but 16 on ARM32). + std::atomic<const JITCodeEntry*> next_; // Atomic to guarantee consistency after crash. + const JITCodeEntry* prev_ = nullptr; // For linked list deletion. Unused in readers. + const uint8_t* symfile_addr_ = nullptr; // Address of the in-memory ELF file. + uint64_t symfile_size_ = 0; // Note that the offset is 12 on x86, but 16 on ARM32. // Android-specific fields: - uint64_t register_timestamp_; // CLOCK_MONOTONIC time of entry registration. + std::atomic_uint32_t seqlock_{1}; // Synchronization. Even value if entry is valid. }; // Implementation-specific fields (which can be used only in this file). @@ -120,26 +112,34 @@ extern "C" { bool is_compressed_ = false; }; - struct JITDescriptor { + // Public/stable binary interface. + struct JITDescriptorPublic { uint32_t version_ = 1; // NB: GDB supports only version 1. uint32_t action_flag_ = JIT_NOACTION; // One of the JITAction enum values. const JITCodeEntry* relevant_entry_ = nullptr; // The entry affected by the action. std::atomic<const JITCodeEntry*> head_{nullptr}; // Head of link list of all entries. + }; - // Android-specific fields: - uint8_t magic_[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '1'}; - uint32_t flags_ = 0; // Reserved for future use. Must be 0. - uint32_t sizeof_descriptor = sizeof(JITDescriptor); - uint32_t sizeof_entry = sizeof(JITCodeEntryPublic); - std::atomic_uint32_t action_seqlock_{0}; // Incremented before and after any modification. - uint64_t action_timestamp_ = 1; // CLOCK_MONOTONIC time of last action. + // Implementation-specific fields (which can be used only in this file). + struct JITDescriptor : public JITDescriptorPublic { + const JITCodeEntry* free_entries_ = nullptr; // List of deleted entries ready for reuse. + + // Used for memory sharing with zygote. See NativeDebugInfoPreFork(). + const JITCodeEntry* zygote_head_entry_ = nullptr; + JITCodeEntry application_tail_entry_{}; }; + // Public interface: Can be used by reader to check the structs have the expected size. + uint32_t g_art_sizeof_jit_code_entry = sizeof(JITCodeEntryPublic); + uint32_t g_art_sizeof_jit_descriptor = sizeof(JITDescriptorPublic); + // Check that std::atomic has the expected layout. static_assert(alignof(std::atomic_uint32_t) == alignof(uint32_t), "Weird alignment"); static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t), "Weird size"); + static_assert(std::atomic_uint32_t::is_always_lock_free, "Expected to be lock free"); static_assert(alignof(std::atomic<void*>) == alignof(void*), "Weird alignment"); static_assert(sizeof(std::atomic<void*>) == sizeof(void*), "Weird size"); + static_assert(std::atomic<void*>::is_always_lock_free, "Expected to be lock free"); // GDB may set breakpoint here. We must ensure it is not removed or deduplicated. void __attribute__((noinline)) __jit_debug_register_code() { @@ -176,7 +176,12 @@ struct JitNativeInfo { static const void* Alloc(size_t size) { return Memory()->AllocateData(size); } static void Free(const void* ptr) { Memory()->FreeData(reinterpret_cast<const uint8_t*>(ptr)); } static void Free(void* ptr) = delete; + template<class T> static T* Writable(const T* v) { + // Special case: This entry is in static memory and not allocated in JIT memory. + if (v == reinterpret_cast<const void*>(&Descriptor().application_tail_entry_)) { + return const_cast<T*>(v); + } return const_cast<T*>(Memory()->GetWritableDataAddress(v)); } @@ -194,34 +199,29 @@ ArrayRef<const uint8_t> GetJITCodeEntrySymFile(const JITCodeEntry* entry) { return ArrayRef<const uint8_t>(entry->symfile_addr_, entry->symfile_size_); } -// Mark the descriptor as "locked", so native tools know the data is being modified. -static void ActionSeqlock(JITDescriptor& descriptor) { - DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 0u) << "Already locked"; - descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed); - // Ensure that any writes within the locked section cannot be reordered before the increment. - std::atomic_thread_fence(std::memory_order_release); -} - -// Mark the descriptor as "unlocked", so native tools know the data is safe to read. -static void ActionSequnlock(JITDescriptor& descriptor) { - DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 1u) << "Already unlocked"; - // Ensure that any writes within the locked section cannot be reordered after the increment. - std::atomic_thread_fence(std::memory_order_release); - descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed); -} - +// This must be called with the appropriate lock taken (g_{jit,dex}_debug_lock). template<class NativeInfo> static const JITCodeEntry* CreateJITCodeEntryInternal( - ArrayRef<const uint8_t> symfile, + ArrayRef<const uint8_t> symfile = ArrayRef<const uint8_t>(), const void* addr = nullptr, bool allow_packing = false, bool is_compressed = false) { JITDescriptor& descriptor = NativeInfo::Descriptor(); + // Allocate JITCodeEntry if needed. + if (descriptor.free_entries_ == nullptr) { + const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry)); + if (memory == nullptr) { + LOG(ERROR) << "Failed to allocate memory for native debug info"; + return nullptr; + } + new (NativeInfo::Writable(memory)) JITCodeEntry(); + descriptor.free_entries_ = reinterpret_cast<const JITCodeEntry*>(memory); + } + // Make a copy of the buffer to shrink it and to pass ownership to JITCodeEntry. - const uint8_t* copy = nullptr; - if (NativeInfo::kCopySymfileData) { - copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size())); + if (NativeInfo::kCopySymfileData && !symfile.empty()) { + const uint8_t* copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size())); if (copy == nullptr) { LOG(ERROR) << "Failed to allocate memory for native debug info"; return nullptr; @@ -230,41 +230,38 @@ static const JITCodeEntry* CreateJITCodeEntryInternal( symfile = ArrayRef<const uint8_t>(copy, symfile.size()); } - // Ensure the timestamp is monotonically increasing even in presence of low - // granularity system timer. This ensures each entry has unique timestamp. - uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime()); - - const JITCodeEntry* head = descriptor.head_.load(std::memory_order_relaxed); - const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry)); - if (memory == nullptr) { - LOG(ERROR) << "Failed to allocate memory for native debug info"; - if (copy != nullptr) { - NativeInfo::Free(copy); - } - return nullptr; + // Zygote must insert entries at specific place. See NativeDebugInfoPreFork(). + std::atomic<const JITCodeEntry*>* head = &descriptor.head_; + const JITCodeEntry* prev = nullptr; + if (Runtime::Current()->IsZygote() && descriptor.zygote_head_entry_ != nullptr) { + head = &NativeInfo::Writable(descriptor.zygote_head_entry_)->next_; + prev = descriptor.zygote_head_entry_; } - const JITCodeEntry* entry = reinterpret_cast<const JITCodeEntry*>(memory); + const JITCodeEntry* next = head->load(kNonRacingRelaxed); + + // Pop entry from the free list. + const JITCodeEntry* entry = descriptor.free_entries_; + descriptor.free_entries_ = descriptor.free_entries_->next_.load(kNonRacingRelaxed); + CHECK_EQ(entry->seqlock_.load(kNonRacingRelaxed) & 1, 1u) << "Expected invalid entry"; + + // Create the entry and set all its fields. JITCodeEntry* writable_entry = NativeInfo::Writable(entry); + writable_entry->next_.store(next, std::memory_order_relaxed); + writable_entry->prev_ = prev; writable_entry->symfile_addr_ = symfile.data(); writable_entry->symfile_size_ = symfile.size(); - writable_entry->prev_ = nullptr; - writable_entry->next_.store(head, std::memory_order_relaxed); - writable_entry->register_timestamp_ = timestamp; writable_entry->addr_ = addr; writable_entry->allow_packing_ = allow_packing; writable_entry->is_compressed_ = is_compressed; + writable_entry->seqlock_.fetch_add(1, std::memory_order_release); // Mark as valid. - // We are going to modify the linked list, so take the seqlock. - ActionSeqlock(descriptor); - if (head != nullptr) { - NativeInfo::Writable(head)->prev_ = entry; + // Add the entry to the main link-list. + if (next != nullptr) { + NativeInfo::Writable(next)->prev_ = entry; } - descriptor.head_.store(entry, std::memory_order_relaxed); + head->store(entry, std::memory_order_release); descriptor.relevant_entry_ = entry; descriptor.action_flag_ = JIT_REGISTER_FN; - descriptor.action_timestamp_ = timestamp; - ActionSequnlock(descriptor); - NativeInfo::NotifyNativeDebugger(); return entry; @@ -276,13 +273,8 @@ static void DeleteJITCodeEntryInternal(const JITCodeEntry* entry) { const uint8_t* symfile = entry->symfile_addr_; JITDescriptor& descriptor = NativeInfo::Descriptor(); - // Ensure the timestamp is monotonically increasing even in presence of low - // granularity system timer. This ensures each entry has unique timestamp. - uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime()); - - // We are going to modify the linked list, so take the seqlock. - ActionSeqlock(descriptor); - const JITCodeEntry* next = entry->next_.load(std::memory_order_relaxed); + // Remove the entry from the main linked-list. + const JITCodeEntry* next = entry->next_.load(kNonRacingRelaxed); if (entry->prev_ != nullptr) { NativeInfo::Writable(entry->prev_)->next_.store(next, std::memory_order_relaxed); } else { @@ -293,21 +285,22 @@ static void DeleteJITCodeEntryInternal(const JITCodeEntry* entry) { } descriptor.relevant_entry_ = entry; descriptor.action_flag_ = JIT_UNREGISTER_FN; - descriptor.action_timestamp_ = timestamp; - ActionSequnlock(descriptor); - NativeInfo::NotifyNativeDebugger(); - // Ensure that clear below can not be reordered above the unlock above. + // Delete the entry. + JITCodeEntry* writable_entry = NativeInfo::Writable(entry); + CHECK_EQ(writable_entry->seqlock_.load(kNonRacingRelaxed) & 1, 0u) << "Expected valid entry"; + // Release: Ensures that "next_" points to valid entry at any time in reader. + writable_entry->seqlock_.fetch_add(1, std::memory_order_release); // Mark as invalid. + // Release: Ensures that the entry is seen as invalid before it's data is freed. std::atomic_thread_fence(std::memory_order_release); - - // Aggressively clear the entry as an extra check of the synchronisation. - memset(NativeInfo::Writable(entry), 0, sizeof(*entry)); - - NativeInfo::Free(entry); - if (NativeInfo::kCopySymfileData) { + if (NativeInfo::kCopySymfileData && symfile != nullptr) { NativeInfo::Free(symfile); } + + // Push the entry to the free list. + writable_entry->next_.store(descriptor.free_entries_, kNonRacingRelaxed); + descriptor.free_entries_ = entry; } void AddNativeDebugInfoForDex(Thread* self, const DexFile* dexfile) { @@ -332,6 +325,48 @@ void RemoveNativeDebugInfoForDex(Thread* self, const DexFile* dexfile) { } } +// Splits the linked linked in to two parts: +// The first part (including the static head pointer) is owned by the application. +// The second part is owned by zygote and might be concurrently modified by it. +// +// We add two empty entries at the boundary which are never removed (app_tail, zygote_head). +// These entries are needed to preserve the next/prev pointers in the linked list, +// since zygote can not modify the application's data and vice versa. +// +// <--- owned by the application memory ---> <--- owned by zygote memory ---> +// |----------------------|------------------|-------------|-----------------| +// head -> | application_entries* | application_tail | zygote_head | zygote_entries* | +// |----------------------|------------------|-------------|-----------------| +// +void NativeDebugInfoPreFork() { + CHECK(Runtime::Current()->IsZygote()); + JITDescriptor& descriptor = JitNativeInfo::Descriptor(); + if (descriptor.zygote_head_entry_ != nullptr) { + return; // Already done - we need to do this only on the first fork. + } + + // Create the zygote-owned head entry (with no ELF file). + // The data will be allocated from the current JIT memory (owned by zygote). + MutexLock mu(Thread::Current(), *Locks::jit_lock_); // Needed to alloc entry. + const JITCodeEntry* zygote_head = CreateJITCodeEntryInternal<JitNativeInfo>(); + CHECK(zygote_head != nullptr); + descriptor.zygote_head_entry_ = zygote_head; + + // Create the child-owned tail entry (with no ELF file). + // The data is statically allocated since it must be owned by the forked process. + JITCodeEntry* app_tail = &descriptor.application_tail_entry_; + app_tail->next_ = zygote_head; + app_tail->seqlock_.store(2, kNonRacingRelaxed); // Mark as valid. + descriptor.head_.store(app_tail, std::memory_order_release); +} + +void NativeDebugInfoPostFork() { + JITDescriptor& descriptor = JitNativeInfo::Descriptor(); + if (!Runtime::Current()->IsZygote()) { + descriptor.free_entries_ = nullptr; // Don't reuse zygote's entries. + } +} + // Size of JIT code range covered by each packed JITCodeEntry. static constexpr uint32_t kJitRepackGroupSize = 64 * KB; @@ -349,11 +384,16 @@ static void RepackEntries(bool compress, ArrayRef<const void*> removed) if (jit == nullptr) { return; } + JITDescriptor& descriptor = __jit_debug_descriptor; + bool is_zygote = Runtime::Current()->IsZygote(); // Collect entries that we want to pack. std::vector<const JITCodeEntry*> entries; entries.reserve(2 * kJitRepackFrequency); - for (const JITCodeEntry* it = __jit_debug_descriptor.head_; it != nullptr; it = it->next_) { + for (const JITCodeEntry* it = descriptor.head_; it != nullptr; it = it->next_) { + if (it == descriptor.zygote_head_entry_ && !is_zygote) { + break; // Memory owned by the zygote process (read-only for an app). + } if (it->allow_packing_) { if (!compress && it->is_compressed_ && removed.empty()) { continue; // If we are not compressing, also avoid decompressing. @@ -420,10 +460,6 @@ void AddNativeDebugInfoForJit(const void* code_ptr, MutexLock mu(Thread::Current(), g_jit_debug_lock); DCHECK_NE(symfile.size(), 0u); - if (Runtime::Current()->IsZygote()) { - return; // TODO: Implement memory sharing with the zygote process. - } - CreateJITCodeEntryInternal<JitNativeInfo>(ArrayRef<const uint8_t>(symfile), /*addr=*/ code_ptr, /*allow_packing=*/ allow_packing, @@ -437,8 +473,10 @@ void AddNativeDebugInfoForJit(const void* code_ptr, // Automatically repack entries on regular basis to save space. // Pack (but don't compress) recent entries - this is cheap and reduces memory use by ~4x. // We delay compression until after GC since it is more expensive (and saves further ~4x). + // Always compress zygote, since it does not GC and we want to keep the high-water mark low. if (++g_jit_num_unpacked_entries >= kJitRepackFrequency) { - RepackEntries(/*compress=*/ false, /*removed=*/ ArrayRef<const void*>()); + bool is_zygote = Runtime::Current()->IsZygote(); + RepackEntries(/*compress=*/ is_zygote, /*removed=*/ ArrayRef<const void*>()); } } diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h index 0bb3236870..477d58c24c 100644 --- a/runtime/jit/debugger_interface.h +++ b/runtime/jit/debugger_interface.h @@ -32,6 +32,13 @@ class Mutex; class Thread; struct JITCodeEntry; +// Must be called before zygote forks. +// Used to ensure that zygote's mini-debug-info can be shared with apps. +void NativeDebugInfoPreFork(); + +// Must be called after zygote forks. +void NativeDebugInfoPostFork(); + ArrayRef<const uint8_t> GetJITCodeEntrySymFile(const JITCodeEntry*); // Notify native tools (e.g. libunwind) that DEX file has been opened. @@ -54,7 +61,7 @@ void RemoveNativeDebugInfoForJit(ArrayRef<const void*> removed_code_ptrs) REQUIRES_SHARED(Locks::jit_lock_); // Might need JIT code cache to allocate memory. // Returns approximate memory used by debug info for JIT code. -size_t GetJitMiniDebugInfoMemUsage(); +size_t GetJitMiniDebugInfoMemUsage() REQUIRES_SHARED(Locks::jit_lock_); // Get the lock which protects the native debug info. // Used only in tests to unwind while the JIT thread is running. diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index fd714a831a..c9b458fb49 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -1222,6 +1222,8 @@ void Jit::PreZygoteFork() { return; } thread_pool_->DeleteThreads(); + + NativeDebugInfoPreFork(); } void Jit::PostZygoteFork() { @@ -1229,6 +1231,8 @@ void Jit::PostZygoteFork() { return; } thread_pool_->CreateThreads(); + + NativeDebugInfoPostFork(); } void Jit::BootCompleted() { |