JIT mini-debug-info: Add support for zygote shared memory
Ensure that zygote's mini-debug-info is visible to apps.
Remove global seqlock synchronization.
It was replaced by entry-level synchronization.
Test: test.py -b -r --host --jit --64
Test: device boots
Bug: 119800099
Change-Id: I4885f9a4d44743d5608793a2e5d6453123d111f3
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 24ca0fc..0397ad8 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -46,40 +46,29 @@
// method, which is called after every modification of the linked list.
// GDB does this, but it is complex to set up and it stops the process.
//
-// 2) Asynchronously, by monitoring the action_seqlock_.
-// * The seqlock is a monotonically increasing counter which is incremented
-// before and after every modification of the linked list. Odd value of
-// the counter means the linked list is being modified (it is locked).
-// * The tool should read the value of the seqlock both before and after
-// copying the linked list. If the seqlock values match and are even,
-// the copy is consistent. Otherwise, the reader should try again.
-// * Note that using the data directly while is it being modified
-// might crash the tool. Therefore, the only safe way is to make
-// a copy and use the copy only after the seqlock has been checked.
-// * Note that the process might even free and munmap the data while
-// it is being copied, therefore the reader should either handle
-// SEGV or use OS calls to read the memory (e.g. process_vm_readv).
-// * The seqlock can be used to determine the number of modifications of
-// the linked list, which can be used to intelligently cache the data.
-// Note the possible overflow of the seqlock. It is intentionally
-// 32-bit, since 64-bit atomics can be tricky on some architectures.
-// * The timestamps on the entry record the time when the entry was
-// created which is relevant if the unwinding is not live and is
-// postponed until much later. All timestamps must be unique.
-// * Memory barriers are used to make it possible to reason about
-// the data even when it is being modified (e.g. the process crashed
-// while that data was locked, and thus it will be never unlocked).
-// * In particular, it should be possible to:
-// 1) read the seqlock and then the linked list head pointer.
-// 2) copy the entry and check that seqlock has not changed.
-// 3) copy the symfile and check that seqlock has not changed.
-// 4) go back to step 2 using the next pointer (if non-null).
-// This safely creates copy of all symfiles, although other data
-// might be inconsistent/unusable (e.g. prev_, action_timestamp_).
-// * For full conformance with the C++ memory model, all seqlock
-// protected accesses should be atomic. We currently do this in the
-// more critical cases. The rest will have to be fixed before
-// attempting to run TSAN on this code.
+// 2) Asynchronously, using the entry seqlocks.
+// * The seqlock is a monotonically increasing counter, which
+// is even if the entry is valid and odd if it is invalid.
+// It is set to even value after all other fields are set,
+// and it is set to odd value before the entry is deleted.
+// * This makes it possible to safely read the symfile data:
+// * The reader should read the value of the seqlock both
+// before and after reading the symfile. If the seqlock
+// values match and are even the copy is consistent.
+// * Entries are recycled, but never freed, which guarantees
+// that the seqlock is not overwritten by a random value.
+// * The linked-list is one level higher. The next-pointer
+// must always point to an entry with even seqlock, which
+// ensures that entries of a crashed process can be read.
+// This means the entry must be added after it is created
+// and it must be removed before it is invalidated (odd).
+// * When iterating over the linked list the reader can use
+// the timestamps to ensure that current and next entry
+// were not deleted using the following steps:
+// 1) Read next pointer and the next entry's seqlock.
+// 2) Read the symfile and re-read the next pointer.
+// 3) Re-read both the current and next seqlock.
+// 4) Go to step 1 with using new entry and seqlock.
//
namespace art {
@@ -87,6 +76,11 @@
static Mutex g_jit_debug_lock("JIT native debug entries", kNativeDebugInterfaceLock);
static Mutex g_dex_debug_lock("DEX native debug entries", kNativeDebugInterfaceLock);
+// Most loads and stores need no synchronization since all memory is protected by the global locks.
+// Some writes are synchronized so libunwindstack can read the memory safely from another process.
+constexpr std::memory_order kNonRacingRelaxed = std::memory_order_relaxed;
+
+// Public binary interface between ART and native tools (gdb, libunwind, etc).
extern "C" {
enum JITAction {
JIT_NOACTION = 0,
@@ -96,15 +90,13 @@
// Public/stable binary interface.
struct JITCodeEntryPublic {
- // Atomic to ensure the reader can always iterate over the linked list
- // (e.g. the process could crash in the middle of writing this field).
- std::atomic<const JITCodeEntry*> next_;
- const JITCodeEntry* prev_; // For linked list deletion. Unused in readers.
- const uint8_t* symfile_addr_; // Address of the in-memory ELF file.
- uint64_t symfile_size_; // Beware of the offset (12 on x86; but 16 on ARM32).
+ std::atomic<const JITCodeEntry*> next_; // Atomic to guarantee consistency after crash.
+ const JITCodeEntry* prev_ = nullptr; // For linked list deletion. Unused in readers.
+ const uint8_t* symfile_addr_ = nullptr; // Address of the in-memory ELF file.
+ uint64_t symfile_size_ = 0; // Note that the offset is 12 on x86, but 16 on ARM32.
// Android-specific fields:
- uint64_t register_timestamp_; // CLOCK_MONOTONIC time of entry registration.
+ std::atomic_uint32_t seqlock_{1}; // Synchronization. Even value if entry is valid.
};
// Implementation-specific fields (which can be used only in this file).
@@ -120,26 +112,34 @@
bool is_compressed_ = false;
};
- struct JITDescriptor {
+ // Public/stable binary interface.
+ struct JITDescriptorPublic {
uint32_t version_ = 1; // NB: GDB supports only version 1.
uint32_t action_flag_ = JIT_NOACTION; // One of the JITAction enum values.
const JITCodeEntry* relevant_entry_ = nullptr; // The entry affected by the action.
std::atomic<const JITCodeEntry*> head_{nullptr}; // Head of link list of all entries.
-
- // Android-specific fields:
- uint8_t magic_[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '1'};
- uint32_t flags_ = 0; // Reserved for future use. Must be 0.
- uint32_t sizeof_descriptor = sizeof(JITDescriptor);
- uint32_t sizeof_entry = sizeof(JITCodeEntryPublic);
- std::atomic_uint32_t action_seqlock_{0}; // Incremented before and after any modification.
- uint64_t action_timestamp_ = 1; // CLOCK_MONOTONIC time of last action.
};
+ // Implementation-specific fields (which can be used only in this file).
+ struct JITDescriptor : public JITDescriptorPublic {
+ const JITCodeEntry* free_entries_ = nullptr; // List of deleted entries ready for reuse.
+
+ // Used for memory sharing with zygote. See NativeDebugInfoPreFork().
+ const JITCodeEntry* zygote_head_entry_ = nullptr;
+ JITCodeEntry application_tail_entry_{};
+ };
+
+ // Public interface: Can be used by reader to check the structs have the expected size.
+ uint32_t g_art_sizeof_jit_code_entry = sizeof(JITCodeEntryPublic);
+ uint32_t g_art_sizeof_jit_descriptor = sizeof(JITDescriptorPublic);
+
// Check that std::atomic has the expected layout.
static_assert(alignof(std::atomic_uint32_t) == alignof(uint32_t), "Weird alignment");
static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t), "Weird size");
+ static_assert(std::atomic_uint32_t::is_always_lock_free, "Expected to be lock free");
static_assert(alignof(std::atomic<void*>) == alignof(void*), "Weird alignment");
static_assert(sizeof(std::atomic<void*>) == sizeof(void*), "Weird size");
+ static_assert(std::atomic<void*>::is_always_lock_free, "Expected to be lock free");
// GDB may set breakpoint here. We must ensure it is not removed or deduplicated.
void __attribute__((noinline)) __jit_debug_register_code() {
@@ -176,7 +176,12 @@
static const void* Alloc(size_t size) { return Memory()->AllocateData(size); }
static void Free(const void* ptr) { Memory()->FreeData(reinterpret_cast<const uint8_t*>(ptr)); }
static void Free(void* ptr) = delete;
+
template<class T> static T* Writable(const T* v) {
+ // Special case: This entry is in static memory and not allocated in JIT memory.
+ if (v == reinterpret_cast<const void*>(&Descriptor().application_tail_entry_)) {
+ return const_cast<T*>(v);
+ }
return const_cast<T*>(Memory()->GetWritableDataAddress(v));
}
@@ -194,34 +199,29 @@
return ArrayRef<const uint8_t>(entry->symfile_addr_, entry->symfile_size_);
}
-// Mark the descriptor as "locked", so native tools know the data is being modified.
-static void ActionSeqlock(JITDescriptor& descriptor) {
- DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 0u) << "Already locked";
- descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
- // Ensure that any writes within the locked section cannot be reordered before the increment.
- std::atomic_thread_fence(std::memory_order_release);
-}
-
-// Mark the descriptor as "unlocked", so native tools know the data is safe to read.
-static void ActionSequnlock(JITDescriptor& descriptor) {
- DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 1u) << "Already unlocked";
- // Ensure that any writes within the locked section cannot be reordered after the increment.
- std::atomic_thread_fence(std::memory_order_release);
- descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
-}
-
+// This must be called with the appropriate lock taken (g_{jit,dex}_debug_lock).
template<class NativeInfo>
static const JITCodeEntry* CreateJITCodeEntryInternal(
- ArrayRef<const uint8_t> symfile,
+ ArrayRef<const uint8_t> symfile = ArrayRef<const uint8_t>(),
const void* addr = nullptr,
bool allow_packing = false,
bool is_compressed = false) {
JITDescriptor& descriptor = NativeInfo::Descriptor();
+ // Allocate JITCodeEntry if needed.
+ if (descriptor.free_entries_ == nullptr) {
+ const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry));
+ if (memory == nullptr) {
+ LOG(ERROR) << "Failed to allocate memory for native debug info";
+ return nullptr;
+ }
+ new (NativeInfo::Writable(memory)) JITCodeEntry();
+ descriptor.free_entries_ = reinterpret_cast<const JITCodeEntry*>(memory);
+ }
+
// Make a copy of the buffer to shrink it and to pass ownership to JITCodeEntry.
- const uint8_t* copy = nullptr;
- if (NativeInfo::kCopySymfileData) {
- copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size()));
+ if (NativeInfo::kCopySymfileData && !symfile.empty()) {
+ const uint8_t* copy = reinterpret_cast<const uint8_t*>(NativeInfo::Alloc(symfile.size()));
if (copy == nullptr) {
LOG(ERROR) << "Failed to allocate memory for native debug info";
return nullptr;
@@ -230,41 +230,38 @@
symfile = ArrayRef<const uint8_t>(copy, symfile.size());
}
- // Ensure the timestamp is monotonically increasing even in presence of low
- // granularity system timer. This ensures each entry has unique timestamp.
- uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
-
- const JITCodeEntry* head = descriptor.head_.load(std::memory_order_relaxed);
- const void* memory = NativeInfo::Alloc(sizeof(JITCodeEntry));
- if (memory == nullptr) {
- LOG(ERROR) << "Failed to allocate memory for native debug info";
- if (copy != nullptr) {
- NativeInfo::Free(copy);
- }
- return nullptr;
+ // Zygote must insert entries at specific place. See NativeDebugInfoPreFork().
+ std::atomic<const JITCodeEntry*>* head = &descriptor.head_;
+ const JITCodeEntry* prev = nullptr;
+ if (Runtime::Current()->IsZygote() && descriptor.zygote_head_entry_ != nullptr) {
+ head = &NativeInfo::Writable(descriptor.zygote_head_entry_)->next_;
+ prev = descriptor.zygote_head_entry_;
}
- const JITCodeEntry* entry = reinterpret_cast<const JITCodeEntry*>(memory);
+ const JITCodeEntry* next = head->load(kNonRacingRelaxed);
+
+ // Pop entry from the free list.
+ const JITCodeEntry* entry = descriptor.free_entries_;
+ descriptor.free_entries_ = descriptor.free_entries_->next_.load(kNonRacingRelaxed);
+ CHECK_EQ(entry->seqlock_.load(kNonRacingRelaxed) & 1, 1u) << "Expected invalid entry";
+
+ // Create the entry and set all its fields.
JITCodeEntry* writable_entry = NativeInfo::Writable(entry);
+ writable_entry->next_.store(next, std::memory_order_relaxed);
+ writable_entry->prev_ = prev;
writable_entry->symfile_addr_ = symfile.data();
writable_entry->symfile_size_ = symfile.size();
- writable_entry->prev_ = nullptr;
- writable_entry->next_.store(head, std::memory_order_relaxed);
- writable_entry->register_timestamp_ = timestamp;
writable_entry->addr_ = addr;
writable_entry->allow_packing_ = allow_packing;
writable_entry->is_compressed_ = is_compressed;
+ writable_entry->seqlock_.fetch_add(1, std::memory_order_release); // Mark as valid.
- // We are going to modify the linked list, so take the seqlock.
- ActionSeqlock(descriptor);
- if (head != nullptr) {
- NativeInfo::Writable(head)->prev_ = entry;
+ // Add the entry to the main link-list.
+ if (next != nullptr) {
+ NativeInfo::Writable(next)->prev_ = entry;
}
- descriptor.head_.store(entry, std::memory_order_relaxed);
+ head->store(entry, std::memory_order_release);
descriptor.relevant_entry_ = entry;
descriptor.action_flag_ = JIT_REGISTER_FN;
- descriptor.action_timestamp_ = timestamp;
- ActionSequnlock(descriptor);
-
NativeInfo::NotifyNativeDebugger();
return entry;
@@ -276,13 +273,8 @@
const uint8_t* symfile = entry->symfile_addr_;
JITDescriptor& descriptor = NativeInfo::Descriptor();
- // Ensure the timestamp is monotonically increasing even in presence of low
- // granularity system timer. This ensures each entry has unique timestamp.
- uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
-
- // We are going to modify the linked list, so take the seqlock.
- ActionSeqlock(descriptor);
- const JITCodeEntry* next = entry->next_.load(std::memory_order_relaxed);
+ // Remove the entry from the main linked-list.
+ const JITCodeEntry* next = entry->next_.load(kNonRacingRelaxed);
if (entry->prev_ != nullptr) {
NativeInfo::Writable(entry->prev_)->next_.store(next, std::memory_order_relaxed);
} else {
@@ -293,21 +285,22 @@
}
descriptor.relevant_entry_ = entry;
descriptor.action_flag_ = JIT_UNREGISTER_FN;
- descriptor.action_timestamp_ = timestamp;
- ActionSequnlock(descriptor);
-
NativeInfo::NotifyNativeDebugger();
- // Ensure that clear below can not be reordered above the unlock above.
+ // Delete the entry.
+ JITCodeEntry* writable_entry = NativeInfo::Writable(entry);
+ CHECK_EQ(writable_entry->seqlock_.load(kNonRacingRelaxed) & 1, 0u) << "Expected valid entry";
+ // Release: Ensures that "next_" points to valid entry at any time in reader.
+ writable_entry->seqlock_.fetch_add(1, std::memory_order_release); // Mark as invalid.
+ // Release: Ensures that the entry is seen as invalid before it's data is freed.
std::atomic_thread_fence(std::memory_order_release);
-
- // Aggressively clear the entry as an extra check of the synchronisation.
- memset(NativeInfo::Writable(entry), 0, sizeof(*entry));
-
- NativeInfo::Free(entry);
- if (NativeInfo::kCopySymfileData) {
+ if (NativeInfo::kCopySymfileData && symfile != nullptr) {
NativeInfo::Free(symfile);
}
+
+ // Push the entry to the free list.
+ writable_entry->next_.store(descriptor.free_entries_, kNonRacingRelaxed);
+ descriptor.free_entries_ = entry;
}
void AddNativeDebugInfoForDex(Thread* self, const DexFile* dexfile) {
@@ -332,6 +325,48 @@
}
}
+// Splits the linked linked in to two parts:
+// The first part (including the static head pointer) is owned by the application.
+// The second part is owned by zygote and might be concurrently modified by it.
+//
+// We add two empty entries at the boundary which are never removed (app_tail, zygote_head).
+// These entries are needed to preserve the next/prev pointers in the linked list,
+// since zygote can not modify the application's data and vice versa.
+//
+// <--- owned by the application memory ---> <--- owned by zygote memory --->
+// |----------------------|------------------|-------------|-----------------|
+// head -> | application_entries* | application_tail | zygote_head | zygote_entries* |
+// |----------------------|------------------|-------------|-----------------|
+//
+void NativeDebugInfoPreFork() {
+ CHECK(Runtime::Current()->IsZygote());
+ JITDescriptor& descriptor = JitNativeInfo::Descriptor();
+ if (descriptor.zygote_head_entry_ != nullptr) {
+ return; // Already done - we need to do this only on the first fork.
+ }
+
+ // Create the zygote-owned head entry (with no ELF file).
+ // The data will be allocated from the current JIT memory (owned by zygote).
+ MutexLock mu(Thread::Current(), *Locks::jit_lock_); // Needed to alloc entry.
+ const JITCodeEntry* zygote_head = CreateJITCodeEntryInternal<JitNativeInfo>();
+ CHECK(zygote_head != nullptr);
+ descriptor.zygote_head_entry_ = zygote_head;
+
+ // Create the child-owned tail entry (with no ELF file).
+ // The data is statically allocated since it must be owned by the forked process.
+ JITCodeEntry* app_tail = &descriptor.application_tail_entry_;
+ app_tail->next_ = zygote_head;
+ app_tail->seqlock_.store(2, kNonRacingRelaxed); // Mark as valid.
+ descriptor.head_.store(app_tail, std::memory_order_release);
+}
+
+void NativeDebugInfoPostFork() {
+ JITDescriptor& descriptor = JitNativeInfo::Descriptor();
+ if (!Runtime::Current()->IsZygote()) {
+ descriptor.free_entries_ = nullptr; // Don't reuse zygote's entries.
+ }
+}
+
// Size of JIT code range covered by each packed JITCodeEntry.
static constexpr uint32_t kJitRepackGroupSize = 64 * KB;
@@ -349,11 +384,16 @@
if (jit == nullptr) {
return;
}
+ JITDescriptor& descriptor = __jit_debug_descriptor;
+ bool is_zygote = Runtime::Current()->IsZygote();
// Collect entries that we want to pack.
std::vector<const JITCodeEntry*> entries;
entries.reserve(2 * kJitRepackFrequency);
- for (const JITCodeEntry* it = __jit_debug_descriptor.head_; it != nullptr; it = it->next_) {
+ for (const JITCodeEntry* it = descriptor.head_; it != nullptr; it = it->next_) {
+ if (it == descriptor.zygote_head_entry_ && !is_zygote) {
+ break; // Memory owned by the zygote process (read-only for an app).
+ }
if (it->allow_packing_) {
if (!compress && it->is_compressed_ && removed.empty()) {
continue; // If we are not compressing, also avoid decompressing.
@@ -420,10 +460,6 @@
MutexLock mu(Thread::Current(), g_jit_debug_lock);
DCHECK_NE(symfile.size(), 0u);
- if (Runtime::Current()->IsZygote()) {
- return; // TODO: Implement memory sharing with the zygote process.
- }
-
CreateJITCodeEntryInternal<JitNativeInfo>(ArrayRef<const uint8_t>(symfile),
/*addr=*/ code_ptr,
/*allow_packing=*/ allow_packing,
@@ -437,8 +473,10 @@
// Automatically repack entries on regular basis to save space.
// Pack (but don't compress) recent entries - this is cheap and reduces memory use by ~4x.
// We delay compression until after GC since it is more expensive (and saves further ~4x).
+ // Always compress zygote, since it does not GC and we want to keep the high-water mark low.
if (++g_jit_num_unpacked_entries >= kJitRepackFrequency) {
- RepackEntries(/*compress=*/ false, /*removed=*/ ArrayRef<const void*>());
+ bool is_zygote = Runtime::Current()->IsZygote();
+ RepackEntries(/*compress=*/ is_zygote, /*removed=*/ ArrayRef<const void*>());
}
}
diff --git a/runtime/jit/debugger_interface.h b/runtime/jit/debugger_interface.h
index 0bb3236..477d58c 100644
--- a/runtime/jit/debugger_interface.h
+++ b/runtime/jit/debugger_interface.h
@@ -32,6 +32,13 @@
class Thread;
struct JITCodeEntry;
+// Must be called before zygote forks.
+// Used to ensure that zygote's mini-debug-info can be shared with apps.
+void NativeDebugInfoPreFork();
+
+// Must be called after zygote forks.
+void NativeDebugInfoPostFork();
+
ArrayRef<const uint8_t> GetJITCodeEntrySymFile(const JITCodeEntry*);
// Notify native tools (e.g. libunwind) that DEX file has been opened.
@@ -54,7 +61,7 @@
REQUIRES_SHARED(Locks::jit_lock_); // Might need JIT code cache to allocate memory.
// Returns approximate memory used by debug info for JIT code.
-size_t GetJitMiniDebugInfoMemUsage();
+size_t GetJitMiniDebugInfoMemUsage() REQUIRES_SHARED(Locks::jit_lock_);
// Get the lock which protects the native debug info.
// Used only in tests to unwind while the JIT thread is running.
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index fd714a8..c9b458f 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -1222,6 +1222,8 @@
return;
}
thread_pool_->DeleteThreads();
+
+ NativeDebugInfoPreFork();
}
void Jit::PostZygoteFork() {
@@ -1229,6 +1231,8 @@
return;
}
thread_pool_->CreateThreads();
+
+ NativeDebugInfoPostFork();
}
void Jit::BootCompleted() {