More JIT debug data synchronisation.

I want to be able to reason about consistency of the data even
when it is being modified (e.g. debug-malloc backtrace running
on one thread while the JIT is running on a different thread).

Test: testrunner.py --host -t 137
Change-Id: I051bf8dcf2801d9671cf83f0e0a94e1f19b98c0f
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 505e626..63fb22c 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -24,7 +24,9 @@
 #include "thread-current-inl.h"
 #include "thread.h"
 
+#include <atomic>
 #include <unordered_map>
+#include <cstddef>
 
 //
 // Debug interface for native tools (gdb, lldb, libunwind, simpleperf).
@@ -37,13 +39,40 @@
 //    method, which is called after every modification of the linked list.
 //    GDB does this, but it is complex to set up and it stops the process.
 //
-// 2) Asynchronously, by monitoring the action_counter_, which is incremented
-//    on every modification of the linked list and kept at -1 during updates.
-//    Therefore, if the tool process reads the counter both before and after
-//    iterating over the linked list, and the counters match and are not -1,
-//    the tool process can be sure the list was not modified during the read.
-//    Obviously, it can also cache the data and use the counter to determine
-//    if the cache is up to date, or to intelligently update it if needed.
+// 2) Asynchronously, by monitoring the action_seqlock_.
+//   * The seqlock is a monotonically increasing counter which is incremented
+//     before and after every modification of the linked list. Odd value of
+//     the counter means the linked list is being modified (it is locked).
+//   * The tool should read the value of the seqlock both before and after
+//     copying the linked list.  If the seqlock values match and are even,
+//     the copy is consistent.  Otherwise, the reader should try again.
+//     * Note that using the data directly while is it being modified
+//       might crash the tool.  Therefore, the only safe way is to make
+//       a copy and use the copy only after the seqlock has been checked.
+//     * Note that the process might even free and munmap the data while
+//       it is being copied, therefore the reader should either handle
+//       SEGV or use OS calls to read the memory (e.g. process_vm_readv).
+//   * The seqlock can be used to determine the number of modifications of
+//     the linked list, which can be used to intelligently cache the data.
+//     Note the possible overflow of the seqlock.  It is intentionally
+//     32-bit, since 64-bit atomics can be tricky on some architectures.
+//   * The timestamps on the entry record the time when the entry was
+//     created which is relevant if the unwinding is not live and is
+//     postponed until much later.  All timestamps must be unique.
+//   * Memory barriers are used to make it possible to reason about
+//     the data even when it is being modified (e.g. the process crashed
+//     while that data was locked, and thus it will be never unlocked).
+//     * In particular, it should be possible to:
+//       1) read the seqlock and then the linked list head pointer.
+//       2) copy the entry and check that seqlock has not changed.
+//       3) copy the symfile and check that seqlock has not changed.
+//       4) go back to step 2 using the next pointer (if non-null).
+//       This safely creates copy of all symfiles, although other data
+//       might be inconsistent/unusable (e.g. prev_, action_timestamp_).
+//   * For full conformance with the C++ memory model, all seqlock
+//     protected accesses should be atomic. We currently do this in the
+//     more critical cases. The rest will have to be fixed before
+//     attempting to run TSAN on this code.
 //
 
 namespace art {
@@ -55,7 +84,10 @@
   } JITAction;
 
   struct JITCodeEntry {
-    JITCodeEntry* next_;
+    // Atomic to ensure the reader can always iterate over the linked list
+    // (e.g. the process could crash in the middle of writing this field).
+    std::atomic<JITCodeEntry*> next_;
+    // Non-atomic. The reader should not use it. It is only used for deletion.
     JITCodeEntry* prev_;
     const uint8_t* symfile_addr_;
     uint64_t symfile_size_;  // Beware of the offset (12 on x86; but 16 on ARM32).
@@ -65,24 +97,25 @@
   };
 
   struct JITDescriptor {
-    uint32_t version_ = 1;                    // NB: GDB supports only version 1.
-    uint32_t action_flag_ = JIT_NOACTION;     // One of the JITAction enum values.
-    JITCodeEntry* relevant_entry_ = nullptr;  // The entry affected by the action.
-    JITCodeEntry* first_entry_ = nullptr;     // Head of link list of all entries.
+    uint32_t version_ = 1;                      // NB: GDB supports only version 1.
+    uint32_t action_flag_ = JIT_NOACTION;       // One of the JITAction enum values.
+    JITCodeEntry* relevant_entry_ = nullptr;    // The entry affected by the action.
+    std::atomic<JITCodeEntry*> head_{nullptr};  // Head of link list of all entries.
 
     // Android-specific fields:
     uint8_t magic_[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '1'};
-    uint32_t flags_ = 0;                   // Reserved for future use. Must be 0.
+    uint32_t flags_ = 0;  // Reserved for future use. Must be 0.
     uint32_t sizeof_descriptor = sizeof(JITDescriptor);
     uint32_t sizeof_entry = sizeof(JITCodeEntry);
-    std::atomic_int32_t action_counter_;   // Number of actions, or -1 if locked.
-                                           // It can overflow from INT32_MAX to 0.
-    uint64_t action_timestamp_ = 1;        // CLOCK_MONOTONIC time of last action.
+    std::atomic_uint32_t action_seqlock_{0};  // Incremented before and after any modification.
+    uint64_t action_timestamp_ = 1;           // CLOCK_MONOTONIC time of last action.
   };
 
-  // Check that std::atomic_int32_t has the same layout as int32_t.
-  static_assert(alignof(std::atomic_int32_t) == alignof(int32_t), "Weird alignment");
-  static_assert(sizeof(std::atomic_int32_t) == sizeof(int32_t), "Weird size");
+  // Check that std::atomic has the expected layout.
+  static_assert(alignof(std::atomic_uint32_t) == alignof(uint32_t), "Weird alignment");
+  static_assert(sizeof(std::atomic_uint32_t) == sizeof(uint32_t), "Weird size");
+  static_assert(alignof(std::atomic<void*>) == alignof(void*), "Weird alignment");
+  static_assert(sizeof(std::atomic<void*>) == sizeof(void*), "Weird size");
 
   // GDB may set breakpoint here. We must ensure it is not removed or deduplicated.
   void __attribute__((noinline)) __jit_debug_register_code() {
@@ -103,17 +136,20 @@
   JITDescriptor __dex_debug_descriptor {};
 }
 
-// Mark the descriptor as "locked", so native tools know the data is unstable.
-// Returns the old value of the counter.
-static int32_t LockActionCounter(JITDescriptor& descriptor) {
-  return descriptor.action_counter_.exchange(-1);
+// Mark the descriptor as "locked", so native tools know the data is being modified.
+static void ActionSeqlock(JITDescriptor& descriptor) {
+  DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 0u) << "Already locked";
+  descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
+  // Ensure that any writes within the locked section cannot be reordered before the increment.
+  std::atomic_thread_fence(std::memory_order_release);
 }
 
 // Mark the descriptor as "unlocked", so native tools know the data is safe to read.
-// It will also increment the value so that the tools know the data has changed.
-static void UnlockActionCounter(JITDescriptor& descriptor, int32_t old_value) {
-  int32_t new_value = (old_value + 1) & 0x7FFFFFFF;  // Handle overflow to avoid -1.
-  descriptor.action_counter_.store(new_value);
+static void ActionSequnlock(JITDescriptor& descriptor) {
+  DCHECK_EQ(descriptor.action_seqlock_.load() & 1, 1u) << "Already unlocked";
+  // Ensure that any writes within the locked section cannot be reordered after the increment.
+  std::atomic_thread_fence(std::memory_order_release);
+  descriptor.action_seqlock_.fetch_add(1, std::memory_order_relaxed);
 }
 
 static JITCodeEntry* CreateJITCodeEntryInternal(
@@ -121,23 +157,29 @@
     void (*register_code_ptr)(),
     const ArrayRef<const uint8_t>& symfile)
     REQUIRES(Locks::native_debug_interface_lock_) {
-  int32_t old_action_counter = LockActionCounter(descriptor);
+  // Ensure the timestamp is monotonically increasing even in presence of low
+  // granularity system timer.  This ensures each entry has unique timestamp.
+  uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
 
+  JITCodeEntry* head = descriptor.head_.load(std::memory_order_relaxed);
   JITCodeEntry* entry = new JITCodeEntry;
   CHECK(entry != nullptr);
   entry->symfile_addr_ = symfile.data();
   entry->symfile_size_ = symfile.size();
   entry->prev_ = nullptr;
-  entry->next_ = descriptor.first_entry_;
-  entry->register_timestamp_ = NanoTime();
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry;
+  entry->next_.store(head, std::memory_order_relaxed);
+  entry->register_timestamp_ = timestamp;
+
+  // We are going to modify the linked list, so take the seqlock.
+  ActionSeqlock(descriptor);
+  if (head != nullptr) {
+    head->prev_ = entry;
   }
-  descriptor.first_entry_ = entry;
+  descriptor.head_.store(entry, std::memory_order_relaxed);
   descriptor.relevant_entry_ = entry;
   descriptor.action_flag_ = JIT_REGISTER_FN;
-  descriptor.action_timestamp_ = entry->register_timestamp_;
-  UnlockActionCounter(descriptor, old_action_counter);
+  descriptor.action_timestamp_ = timestamp;
+  ActionSequnlock(descriptor);
 
   (*register_code_ptr)();
   return entry;
@@ -149,23 +191,35 @@
     JITCodeEntry* entry)
     REQUIRES(Locks::native_debug_interface_lock_) {
   CHECK(entry != nullptr);
-  int32_t old_action_counter = LockActionCounter(descriptor);
 
+  // Ensure the timestamp is monotonically increasing even in presence of low
+  // granularity system timer.  This ensures each entry has unique timestamp.
+  uint64_t timestamp = std::max(descriptor.action_timestamp_ + 1, NanoTime());
+
+  // We are going to modify the linked list, so take the seqlock.
+  ActionSeqlock(descriptor);
+  JITCodeEntry* next = entry->next_.load(std::memory_order_relaxed);
   if (entry->prev_ != nullptr) {
-    entry->prev_->next_ = entry->next_;
+    entry->prev_->next_.store(next, std::memory_order_relaxed);
   } else {
-    descriptor.first_entry_ = entry->next_;
+    descriptor.head_.store(next, std::memory_order_relaxed);
   }
-  if (entry->next_ != nullptr) {
-    entry->next_->prev_ = entry->prev_;
+  if (next != nullptr) {
+    next->prev_ = entry->prev_;
   }
-
   descriptor.relevant_entry_ = entry;
   descriptor.action_flag_ = JIT_UNREGISTER_FN;
-  descriptor.action_timestamp_ = NanoTime();
-  UnlockActionCounter(descriptor, old_action_counter);
+  descriptor.action_timestamp_ = timestamp;
+  ActionSequnlock(descriptor);
 
   (*register_code_ptr)();
+
+  // Ensure that clear below can not be reordered above the unlock above.
+  std::atomic_thread_fence(std::memory_order_release);
+
+  // Aggressively clear the entry as an extra check of the synchronisation.
+  memset(entry, 0, sizeof(*entry));
+
   delete entry;
 }