Use custom ids instead of thread_id to record threads in trace events

We only use 16-bits to identify a thread in the trace events. Thread ids
can be more than 16-bits and we just truncated it to 16-bits. This could
mean we might see a thread if of 0 which is used as a special value to
differentiate between method trace entries and method / thread
information entries.

This is especially a problem on host where thread ids usually don't fit
in 16-bits. On target, it is a less of a problem but there is no hard
limit on 64-bit devices that the thread ids should fit in 16-bits.
This lookup is only done each time we are flushing the thread local
buffer to the file. Hence the impact on performance isn't expected to be
much.

Bug: 279547877
Test: art/test.py -t 2246
Change-Id: I11d6b9a69db468960dc288b230aa322c51d87421
diff --git a/runtime/trace.cc b/runtime/trace.cc
index b50e23b..25b6f0b 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -223,6 +223,19 @@
   return idx;
 }
 
+uint16_t Trace::GetThreadEncoding(pid_t thread_id) {
+  auto it = thread_id_map_.find(thread_id);
+  if (it != thread_id_map_.end()) {
+    return it->second;
+  }
+
+  uint16_t idx = current_thread_index_;
+  thread_id_map_.emplace(thread_id, current_thread_index_);
+  DCHECK_LT(current_thread_index_, (1 << 16) - 2);
+  current_thread_index_++;
+  return idx;
+}
+
 std::vector<ArtMethod*>* Trace::AllocStackTrace() {
   return (temp_stack_trace_.get() != nullptr)  ? temp_stack_trace_.release() :
       new std::vector<ArtMethod*>();
@@ -790,6 +803,10 @@
     }
     cur_offset_.store(0, std::memory_order_relaxed);
   }
+
+  // Thread index of 0 is a special identifier used to distinguish between trace
+  // event entries and thread / method info entries.
+  current_thread_index_ = 1;
 }
 
 static uint64_t ReadBytes(uint8_t* buf, size_t bytes) {
@@ -1021,20 +1038,15 @@
     thread->GetThreadName(thread_name);
     static constexpr size_t kThreadNameHeaderSize = 7;
     uint8_t header[kThreadNameHeaderSize];
-    Append2LE(header, 0);
-    header[2] = kOpNewThread;
-    // We use only 16 bits to encode thread id. On Android, we don't expect to use more than
-    // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
-    // See  __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
-    // is currently less than 65536.
-    // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
-    // a map similar to method ids.
-    DCHECK(!kIsTargetBuild || thread->GetTid() < (1 << 16));
-    Append2LE(header + 3, static_cast<uint16_t>(thread->GetTid()));
-    Append2LE(header + 5, static_cast<uint16_t>(thread_name.length()));
 
     {
       MutexLock mu(Thread::Current(), tracing_lock_);
+      Append2LE(header, 0);
+      header[2] = kOpNewThread;
+      Append2LE(header + 3, GetThreadEncoding(thread->GetTid()));
+      DCHECK(thread_name.length() < (1 << 16));
+      Append2LE(header + 5, static_cast<uint16_t>(thread_name.length()));
+
       if (!trace_file_->WriteFully(header, kThreadNameHeaderSize) ||
           !trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(thread_name.c_str()),
                                    thread_name.length())) {
@@ -1108,6 +1120,7 @@
   std::unique_ptr<uint8_t[]> buffer(new uint8_t[std::max(kMinBufSize, buffer_size)]);
 
   size_t num_entries = *(thread->GetMethodTraceIndexPtr());
+  uint16_t thread_id = GetThreadEncoding(thread->GetTid());
   for (size_t entry_index = 0; entry_index < num_entries;) {
     ArtMethod* method = reinterpret_cast<ArtMethod*>(method_trace_buffer[entry_index++]);
     TraceAction action = DecodeTraceAction(method_trace_buffer[entry_index++]);
@@ -1139,6 +1152,7 @@
       DCHECK_LT(kMethodNameHeaderSize, kPerThreadBufSize);
       Append2LE(method_header, 0);
       method_header[2] = kOpNewMethod;
+      DCHECK(method_line.length() < (1 << 16));
       Append2LE(method_header + 3, static_cast<uint16_t>(method_line.length()));
       WriteToBuf(method_header,
                  kMethodNameHeaderSize,
@@ -1154,7 +1168,7 @@
     DCHECK_LT(record_size, kPerThreadBufSize);
     EnsureSpace(buffer.get(), &current_index, buffer_size, record_size);
     EncodeEventEntry(
-        buffer.get() + current_index, thread, method_index, action, thread_time, wall_time);
+        buffer.get() + current_index, thread_id, method_index, action, thread_time, wall_time);
     current_index += record_size;
   }
 
@@ -1201,8 +1215,12 @@
   ptr = buf_.get() + old_offset;
   uint32_t wall_clock_diff = GetMicroTime(timestamp_counter) - start_time_;
   MutexLock mu(Thread::Current(), tracing_lock_);
-  EncodeEventEntry(
-      ptr, thread, EncodeTraceMethod(method), action, thread_clock_diff, wall_clock_diff);
+  EncodeEventEntry(ptr,
+                   GetThreadEncoding(thread->GetTid()),
+                   EncodeTraceMethod(method),
+                   action,
+                   thread_clock_diff,
+                   wall_clock_diff);
 }
 
 void Trace::LogMethodTraceEvent(Thread* thread,
@@ -1226,14 +1244,15 @@
 }
 
 void Trace::EncodeEventEntry(uint8_t* ptr,
-                             Thread* thread,
+                             uint16_t thread_id,
                              uint32_t method_index,
                              TraceAction action,
                              uint32_t thread_clock_diff,
                              uint32_t wall_clock_diff) {
   static constexpr size_t kPacketSize = 14U;  // The maximum size of data in a packet.
+  DCHECK(method_index < (1 << (32 - TraceActionBits)));
   uint32_t method_value = (method_index << TraceActionBits) | action;
-  Append2LE(ptr, thread->GetTid());
+  Append2LE(ptr, thread_id);
   Append4LE(ptr + 2, method_value);
   ptr += 6;
 
@@ -1269,15 +1288,9 @@
 }
 
 void Trace::DumpThreadList(std::ostream& os) {
+  MutexLock mu(Thread::Current(), tracing_lock_);
   for (const auto& it : threads_list_) {
-    // We use only 16 bits to encode thread id. On Android, we don't expect to use more than
-    // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits.
-    // See  __check_max_thread_id in bionic for more details. Even on 64-bit the max threads
-    // is currently less than 65536.
-    // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding
-    // a map similar to method ids.
-    DCHECK(!kIsTargetBuild || it.first < (1 << 16));
-    os << static_cast<uint16_t>(it.first) << "\t" << it.second << "\n";
+    os << GetThreadEncoding(it.first) << "\t" << it.second << "\n";
   }
 }
 
diff --git a/runtime/trace.h b/runtime/trace.h
index 6df21c6..5e74343 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -279,7 +279,7 @@
   // Encodes event in non-streaming mode. This assumes that there is enough space reserved to
   // encode the entry.
   void EncodeEventEntry(uint8_t* ptr,
-                        Thread* thread,
+                        uint16_t thread_id,
                         uint32_t method_index,
                         TraceAction action,
                         uint32_t thread_clock_diff,
@@ -330,6 +330,8 @@
 
   void UpdateThreadsList(Thread* thread);
 
+  uint16_t GetThreadEncoding(pid_t thread_id) REQUIRES(tracing_lock_);
+
   // Singleton instance of the Trace or null when no method tracing is active.
   static Trace* volatile the_trace_ GUARDED_BY(Locks::trace_lock_);
 
@@ -414,6 +416,10 @@
   std::unordered_map<ArtMethod*, uint32_t> art_method_id_map_ GUARDED_BY(tracing_lock_);
   uint32_t current_method_index_ = 0;
 
+  // Map from thread_id to a 16-bit identifier.
+  std::unordered_map<pid_t, uint16_t> thread_id_map_ GUARDED_BY(tracing_lock_);
+  uint16_t current_thread_index_;
+
   DISALLOW_COPY_AND_ASSIGN(Trace);
 };