Implement DDMS recent allocation tracking.

Change-Id: I7cec7311ba496f93e147ae17fbce94d4d17b5269
diff --git a/src/debugger.cc b/src/debugger.cc
index d8f58da..51e43c1 100644
--- a/src/debugger.cc
+++ b/src/debugger.cc
@@ -18,6 +18,9 @@
 
 #include <sys/uio.h>
 
+#include <set>
+
+#include "class_linker.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
 #include "stack_indirect_reference_table.h"
@@ -32,6 +35,9 @@
 
 namespace art {
 
+static const size_t kMaxAllocRecordStackDepth = 16; // Max 255.
+static const size_t kNumAllocRecords = 512; // Must be power of 2.
+
 class ObjectRegistry {
  public:
   ObjectRegistry() : lock_("ObjectRegistry lock") {
@@ -71,6 +77,34 @@
   std::map<JDWP::ObjectId, Object*> map_;
 };
 
+struct AllocRecordStackTraceElement {
+  const Method* method;
+  uintptr_t raw_pc;
+
+  int32_t LineNumber() const {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Class* c = method->GetDeclaringClass();
+    DexCache* dex_cache = c->GetDexCache();
+    const DexFile& dex_file = class_linker->FindDexFile(dex_cache);
+    return dex_file.GetLineNumFromPC(method, method->ToDexPC(raw_pc));
+  }
+};
+
+struct AllocRecord {
+  Class* type;
+  size_t byte_count;
+  uint16_t thin_lock_id;
+  AllocRecordStackTraceElement stack[kMaxAllocRecordStackDepth]; // Unused entries have NULL method.
+
+  size_t GetDepth() {
+    size_t depth = 0;
+    while (depth < kMaxAllocRecordStackDepth && stack[depth].method != NULL) {
+      ++depth;
+    }
+    return depth;
+  }
+};
+
 // JDWP is allowed unless the Zygote forbids it.
 static bool gJdwpAllowed = true;
 
@@ -96,6 +130,12 @@
 
 static ObjectRegistry* gRegistry = NULL;
 
+// Recent allocation tracking.
+static Mutex gAllocTrackerLock("AllocTracker lock");
+AllocRecord* Dbg::recent_allocation_records_ = NULL; // TODO: CircularBuffer<AllocRecord>
+static size_t gAllocRecordHead = 0;
+static size_t gAllocRecordCount = 0;
+
 /*
  * Handle one of the JDWP name/value pairs.
  *
@@ -813,15 +853,8 @@
 
     size_t byte_count = char_count*2 + sizeof(uint32_t)*2;
     std::vector<uint8_t> bytes(byte_count);
-    uint8_t* dst = &bytes[0];
-    JDWP::Write4BE(&dst, t->GetThinLockId());
-    JDWP::Write4BE(&dst, char_count);
-    if (char_count > 0) {
-      // Copy the UTF-16 string, transforming to big-endian.
-      while (char_count--) {
-        JDWP::Write2BE(&dst, *chars++);
-      }
-    }
+    JDWP::Append4BE(bytes, t->GetThinLockId());
+    JDWP::AppendUtf16BE(bytes, chars, char_count);
     Dbg::DdmSendChunk(type, bytes.size(), &bytes[0]);
   }
 }
@@ -937,15 +970,14 @@
    */
   uint8_t heap_count = 1;
   std::vector<uint8_t> bytes(4 + (heap_count * (4 + 8 + 1 + 4 + 4 + 4 + 4)));
-  uint8_t* dst = &bytes[0];
-  JDWP::Write4BE(&dst, heap_count);
-  JDWP::Write4BE(&dst, 1); // Heap id (bogus; we only have one heap).
-  JDWP::Write8BE(&dst, MilliTime());
-  JDWP::Write1BE(&dst, reason);
-  JDWP::Write4BE(&dst, Heap::GetMaxMemory()); // Max allowed heap size in bytes.
-  JDWP::Write4BE(&dst, Heap::GetTotalMemory()); // Current heap size in bytes.
-  JDWP::Write4BE(&dst, Heap::GetBytesAllocated());
-  JDWP::Write4BE(&dst, Heap::GetObjectsAllocated());
+  JDWP::Append4BE(bytes, heap_count);
+  JDWP::Append4BE(bytes, 1); // Heap id (bogus; we only have one heap).
+  JDWP::Append8BE(bytes, MilliTime());
+  JDWP::Append1BE(bytes, reason);
+  JDWP::Append4BE(bytes, Heap::GetMaxMemory()); // Max allowed heap size in bytes.
+  JDWP::Append4BE(bytes, Heap::GetTotalMemory()); // Current heap size in bytes.
+  JDWP::Append4BE(bytes, Heap::GetBytesAllocated());
+  JDWP::Append4BE(bytes, Heap::GetObjectsAllocated());
   Dbg::DdmSendChunk(CHUNK_TYPE("HPIF"), bytes.size(), &bytes[0]);
 }
 
@@ -1167,4 +1199,310 @@
   Dbg::DdmSendChunk(native ? CHUNK_TYPE("NHEN") : CHUNK_TYPE("HPEN"), sizeof(heap_id), heap_id);
 }
 
+void Dbg::SetAllocTrackingEnabled(bool enabled) {
+  MutexLock mu(gAllocTrackerLock);
+  if (enabled) {
+    if (recent_allocation_records_ == NULL) {
+      LOG(INFO) << "Enabling alloc tracker (" << kNumAllocRecords << " entries, "
+                << kMaxAllocRecordStackDepth << " frames --> "
+                << (sizeof(AllocRecord) * kNumAllocRecords) << " bytes)";
+      gAllocRecordHead = gAllocRecordCount = 0;
+      recent_allocation_records_ = new AllocRecord[kNumAllocRecords];
+      CHECK(recent_allocation_records_ != NULL);
+    }
+  } else {
+    delete[] recent_allocation_records_;
+    recent_allocation_records_ = NULL;
+  }
+}
+
+struct AllocRecordStackVisitor : public Thread::StackVisitor {
+  AllocRecordStackVisitor(AllocRecord* record) : record(record), depth(0) {
+  }
+
+  virtual void VisitFrame(const Frame& f, uintptr_t pc) {
+    if (depth >= kMaxAllocRecordStackDepth) {
+      return;
+    }
+    Method* m = f.GetMethod();
+    if (m == NULL || m->IsCalleeSaveMethod()) {
+      return;
+    }
+    record->stack[depth].method = m;
+    record->stack[depth].raw_pc = pc;
+    ++depth;
+  }
+
+  ~AllocRecordStackVisitor() {
+    // Clear out any unused stack trace elements.
+    for (; depth < kMaxAllocRecordStackDepth; ++depth) {
+      record->stack[depth].method = NULL;
+      record->stack[depth].raw_pc = 0;
+    }
+  }
+
+  AllocRecord* record;
+  size_t depth;
+};
+
+void Dbg::RecordAllocation(Class* type, size_t byte_count) {
+  Thread* self = Thread::Current();
+  CHECK(self != NULL);
+
+  MutexLock mu(gAllocTrackerLock);
+  if (recent_allocation_records_ == NULL) {
+    return;
+  }
+
+  // Advance and clip.
+  if (++gAllocRecordHead == kNumAllocRecords) {
+    gAllocRecordHead = 0;
+  }
+
+  // Fill in the basics.
+  AllocRecord* record = &recent_allocation_records_[gAllocRecordHead];
+  record->type = type;
+  record->byte_count = byte_count;
+  record->thin_lock_id = self->GetThinLockId();
+
+  // Fill in the stack trace.
+  AllocRecordStackVisitor visitor(record);
+  self->WalkStack(&visitor);
+
+  if (gAllocRecordCount < kNumAllocRecords) {
+    ++gAllocRecordCount;
+  }
+}
+
+/*
+ * Return the index of the head element.
+ *
+ * We point at the most-recently-written record, so if allocRecordCount is 1
+ * we want to use the current element.  Take "head+1" and subtract count
+ * from it.
+ *
+ * We need to handle underflow in our circular buffer, so we add
+ * kNumAllocRecords and then mask it back down.
+ */
+inline static int headIndex() {
+  return (gAllocRecordHead+1 + kNumAllocRecords - gAllocRecordCount) & (kNumAllocRecords-1);
+}
+
+void Dbg::DumpRecentAllocations() {
+  MutexLock mu(gAllocTrackerLock);
+  if (recent_allocation_records_ == NULL) {
+    LOG(INFO) << "Not recording tracked allocations";
+    return;
+  }
+
+  // "i" is the head of the list.  We want to start at the end of the
+  // list and move forward to the tail.
+  size_t i = headIndex();
+  size_t count = gAllocRecordCount;
+
+  LOG(INFO) << "Tracked allocations, (head=" << gAllocRecordHead << " count=" << count << ")";
+  while (count--) {
+    AllocRecord* record = &recent_allocation_records_[i];
+
+    LOG(INFO) << StringPrintf(" T=%-2d %6d ", record->thin_lock_id, record->byte_count)
+              << PrettyClass(record->type);
+
+    for (size_t stack_frame = 0; stack_frame < kMaxAllocRecordStackDepth; ++stack_frame) {
+      const Method* m = record->stack[stack_frame].method;
+      if (m == NULL) {
+        break;
+      }
+      LOG(INFO) << "    " << PrettyMethod(m) << " line " << record->stack[stack_frame].LineNumber();
+    }
+
+    // pause periodically to help logcat catch up
+    if ((count % 5) == 0) {
+      usleep(40000);
+    }
+
+    i = (i + 1) & (kNumAllocRecords-1);
+  }
+}
+
+class StringTable {
+ public:
+  StringTable() {
+  }
+
+  void Add(const String* s) {
+    table_.insert(s);
+  }
+
+  size_t IndexOf(const String* s) {
+    return std::distance(table_.begin(), table_.find(s));
+  }
+
+  size_t Size() {
+    return table_.size();
+  }
+
+  void WriteTo(std::vector<uint8_t>& bytes) {
+    typedef std::set<const String*>::const_iterator It; // TODO: C++0x auto
+    for (It it = table_.begin(); it != table_.end(); ++it) {
+      const String* s = *it;
+      JDWP::AppendUtf16BE(bytes, s->GetCharArray()->GetData(), s->GetLength());
+    }
+  }
+
+ private:
+  std::set<const String*> table_;
+  DISALLOW_COPY_AND_ASSIGN(StringTable);
+};
+
+/*
+ * The data we send to DDMS contains everything we have recorded.
+ *
+ * Message header (all values big-endian):
+ * (1b) message header len (to allow future expansion); includes itself
+ * (1b) entry header len
+ * (1b) stack frame len
+ * (2b) number of entries
+ * (4b) offset to string table from start of message
+ * (2b) number of class name strings
+ * (2b) number of method name strings
+ * (2b) number of source file name strings
+ * For each entry:
+ *   (4b) total allocation size
+ *   (2b) threadId
+ *   (2b) allocated object's class name index
+ *   (1b) stack depth
+ *   For each stack frame:
+ *     (2b) method's class name
+ *     (2b) method name
+ *     (2b) method source file
+ *     (2b) line number, clipped to 32767; -2 if native; -1 if no source
+ * (xb) class name strings
+ * (xb) method name strings
+ * (xb) source file strings
+ *
+ * As with other DDM traffic, strings are sent as a 4-byte length
+ * followed by UTF-16 data.
+ *
+ * We send up 16-bit unsigned indexes into string tables.  In theory there
+ * can be (kMaxAllocRecordStackDepth * kNumAllocRecords) unique strings in
+ * each table, but in practice there should be far fewer.
+ *
+ * The chief reason for using a string table here is to keep the size of
+ * the DDMS message to a minimum.  This is partly to make the protocol
+ * efficient, but also because we have to form the whole thing up all at
+ * once in a memory buffer.
+ *
+ * We use separate string tables for class names, method names, and source
+ * files to keep the indexes small.  There will generally be no overlap
+ * between the contents of these tables.
+ */
+jbyteArray Dbg::GetRecentAllocations() {
+  if (false) {
+    DumpRecentAllocations();
+  }
+
+  MutexLock mu(gAllocTrackerLock);
+
+  /*
+   * Part 1: generate string tables.
+   */
+  StringTable class_names;
+  StringTable method_names;
+  StringTable filenames;
+
+  int count = gAllocRecordCount;
+  int idx = headIndex();
+  while (count--) {
+    AllocRecord* record = &recent_allocation_records_[idx];
+
+    class_names.Add(record->type->GetDescriptor());
+
+    for (size_t i = 0; i < kMaxAllocRecordStackDepth; i++) {
+      const Method* m = record->stack[i].method;
+      if (m != NULL) {
+        class_names.Add(m->GetDeclaringClass()->GetDescriptor());
+        method_names.Add(m->GetName());
+        filenames.Add(m->GetDeclaringClass()->GetSourceFile());
+      }
+    }
+
+    idx = (idx + 1) & (kNumAllocRecords-1);
+  }
+
+  LOG(INFO) << "allocation records: " << gAllocRecordCount;
+
+  /*
+   * Part 2: allocate a buffer and generate the output.
+   */
+  std::vector<uint8_t> bytes;
+
+  // (1b) message header len (to allow future expansion); includes itself
+  // (1b) entry header len
+  // (1b) stack frame len
+  const int kMessageHeaderLen = 15;
+  const int kEntryHeaderLen = 9;
+  const int kStackFrameLen = 8;
+  JDWP::Append1BE(bytes, kMessageHeaderLen);
+  JDWP::Append1BE(bytes, kEntryHeaderLen);
+  JDWP::Append1BE(bytes, kStackFrameLen);
+
+  // (2b) number of entries
+  // (4b) offset to string table from start of message
+  // (2b) number of class name strings
+  // (2b) number of method name strings
+  // (2b) number of source file name strings
+  JDWP::Append2BE(bytes, gAllocRecordCount);
+  size_t string_table_offset = bytes.size();
+  JDWP::Append4BE(bytes, 0); // We'll patch this later...
+  JDWP::Append2BE(bytes, class_names.Size());
+  JDWP::Append2BE(bytes, method_names.Size());
+  JDWP::Append2BE(bytes, filenames.Size());
+
+  count = gAllocRecordCount;
+  idx = headIndex();
+  while (count--) {
+    // For each entry:
+    // (4b) total allocation size
+    // (2b) thread id
+    // (2b) allocated object's class name index
+    // (1b) stack depth
+    AllocRecord* record = &recent_allocation_records_[idx];
+    size_t stack_depth = record->GetDepth();
+    JDWP::Append4BE(bytes, record->byte_count);
+    JDWP::Append2BE(bytes, record->thin_lock_id);
+    JDWP::Append2BE(bytes, class_names.IndexOf(record->type->GetDescriptor()));
+    JDWP::Append1BE(bytes, stack_depth);
+
+    for (size_t stack_frame = 0; stack_frame < stack_depth; ++stack_frame) {
+      // For each stack frame:
+      // (2b) method's class name
+      // (2b) method name
+      // (2b) method source file
+      // (2b) line number, clipped to 32767; -2 if native; -1 if no source
+      const Method* m = record->stack[stack_frame].method;
+      JDWP::Append2BE(bytes, class_names.IndexOf(m->GetDeclaringClass()->GetDescriptor()));
+      JDWP::Append2BE(bytes, method_names.IndexOf(m->GetName()));
+      JDWP::Append2BE(bytes, filenames.IndexOf(m->GetDeclaringClass()->GetSourceFile()));
+      JDWP::Append2BE(bytes, record->stack[stack_frame].LineNumber());
+    }
+
+    idx = (idx + 1) & (kNumAllocRecords-1);
+  }
+
+  // (xb) class name strings
+  // (xb) method name strings
+  // (xb) source file strings
+  JDWP::Set4BE(&bytes[string_table_offset], bytes.size());
+  class_names.WriteTo(bytes);
+  method_names.WriteTo(bytes);
+  filenames.WriteTo(bytes);
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  jbyteArray result = env->NewByteArray(bytes.size());
+  if (result != NULL) {
+    env->SetByteArrayRegion(result, 0, bytes.size(), reinterpret_cast<const jbyte*>(&bytes[0]));
+  }
+  return result;
+}
+
 }  // namespace art
diff --git a/src/debugger.h b/src/debugger.h
index 7ae6609..0aa2c83 100644
--- a/src/debugger.h
+++ b/src/debugger.h
@@ -30,6 +30,7 @@
 
 namespace art {
 
+struct AllocRecord;
 struct Thread;
 
 /*
@@ -240,6 +241,15 @@
   static void DdmSendChunk(uint32_t type, size_t len, const uint8_t* buf);
   static void DdmSendChunkV(uint32_t type, const struct iovec* iov, int iovcnt);
 
+  /*
+   * Recent allocation tracking support.
+   */
+  static void RecordAllocation(Class* type, size_t byte_count);
+  static void SetAllocTrackingEnabled(bool enabled);
+  static inline bool IsAllocTrackingEnabled() { return recent_allocation_records_ != NULL; }
+  static jbyteArray GetRecentAllocations();
+  static void DumpRecentAllocations();
+
   enum HpifWhen {
     HPIF_WHEN_NEVER = 0,
     HPIF_WHEN_NOW = 1,
@@ -260,6 +270,9 @@
 
   static void DdmSendHeapInfo(HpifWhen reason);
   static void DdmSendHeapSegments(bool native);
+
+ private:
+  static AllocRecord* recent_allocation_records_;
 };
 
 #define CHUNK_TYPE(_name) \
diff --git a/src/heap.cc b/src/heap.cc
index b2ae47b..ac0c366 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -176,6 +176,9 @@
     Object* obj = AllocateLocked(byte_count);
     if (obj != NULL) {
       obj->SetClass(klass);
+      if (Dbg::IsAllocTrackingEnabled()) {
+        Dbg::RecordAllocation(klass, byte_count);
+      }
       return obj;
     }
   }
diff --git a/src/jdwp/jdwp_bits.h b/src/jdwp/jdwp_bits.h
index 3b951f9..5e5b120 100644
--- a/src/jdwp/jdwp_bits.h
+++ b/src/jdwp/jdwp_bits.h
@@ -21,6 +21,8 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <string>
+#include <vector>
 
 namespace art {
 
@@ -80,15 +82,52 @@
   return buf;
 }
 
+static inline void Append1BE(std::vector<uint8_t>& bytes, uint8_t value) {
+  bytes.push_back(value);
+}
+
+static inline void Append2BE(std::vector<uint8_t>& bytes, uint16_t value) {
+  bytes.push_back(static_cast<uint8_t>(value >> 8));
+  bytes.push_back(static_cast<uint8_t>(value));
+}
+
+static inline void Append4BE(std::vector<uint8_t>& bytes, uint32_t value) {
+  bytes.push_back(static_cast<uint8_t>(value >> 24));
+  bytes.push_back(static_cast<uint8_t>(value >> 16));
+  bytes.push_back(static_cast<uint8_t>(value >> 8));
+  bytes.push_back(static_cast<uint8_t>(value));
+}
+
+static inline void Append8BE(std::vector<uint8_t>& bytes, uint64_t value) {
+  bytes.push_back(static_cast<uint8_t>(value >> 56));
+  bytes.push_back(static_cast<uint8_t>(value >> 48));
+  bytes.push_back(static_cast<uint8_t>(value >> 40));
+  bytes.push_back(static_cast<uint8_t>(value >> 32));
+  bytes.push_back(static_cast<uint8_t>(value >> 24));
+  bytes.push_back(static_cast<uint8_t>(value >> 16));
+  bytes.push_back(static_cast<uint8_t>(value >> 8));
+  bytes.push_back(static_cast<uint8_t>(value));
+}
+
+static inline void AppendUtf16BE(std::vector<uint8_t>& bytes, const uint16_t* chars, size_t char_count) {
+  Append4BE(bytes, char_count);
+  for (size_t i = 0; i < char_count; ++i) {
+    Append2BE(bytes, chars[i]);
+  }
+}
+
+// @deprecated
 static inline void Set1(uint8_t* buf, uint8_t val) {
   *buf = (uint8_t)(val);
 }
 
+// @deprecated
 static inline void Set2BE(uint8_t* buf, uint16_t val) {
   *buf++ = (uint8_t)(val >> 8);
   *buf = (uint8_t)(val);
 }
 
+// @deprecated
 static inline void Set4BE(uint8_t* buf, uint32_t val) {
   *buf++ = (uint8_t)(val >> 24);
   *buf++ = (uint8_t)(val >> 16);
@@ -96,6 +135,7 @@
   *buf = (uint8_t)(val);
 }
 
+// @deprecated
 static inline void Set8BE(uint8_t* buf, uint64_t val) {
   *buf++ = (uint8_t)(val >> 56);
   *buf++ = (uint8_t)(val >> 48);
@@ -107,29 +147,19 @@
   *buf = (uint8_t)(val);
 }
 
+// @deprecated
 static inline void Write1BE(uint8_t** dst, uint8_t value) {
   Set1(*dst, value);
   *dst += sizeof(value);
 }
 
-static inline void Write2BE(uint8_t** dst, uint16_t value) {
-  Set2BE(*dst, value);
-  *dst += sizeof(value);
-}
-
+// @deprecated
 static inline void Write4BE(uint8_t** dst, uint32_t value) {
   Set4BE(*dst, value);
   *dst += sizeof(value);
 }
 
-static inline void Write8BE(uint8_t** dst, uint64_t value) {
-  Set8BE(*dst, value);
-  *dst += sizeof(value);
-}
-
-/*
- * Stuff a UTF-8 string into the buffer.
- */
+// @deprecated
 static inline void SetUtf8String(uint8_t* buf, const uint8_t* str) {
   uint32_t strLen = strlen((const char*)str);
   Set4BE(buf, strLen);
diff --git a/src/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/src/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 1cda892..5ea3729 100644
--- a/src/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/src/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -27,26 +27,15 @@
 namespace {
 
 static void DdmVmInternal_enableRecentAllocations(JNIEnv* env, jclass, jboolean enable) {
-  UNIMPLEMENTED(WARNING);
-  if (enable) {
-    //dvmEnableAllocTracker();
-  } else {
-    //dvmDisableAllocTracker();
-  }
+  Dbg::SetAllocTrackingEnabled(enable);
 }
 
 static jbyteArray DdmVmInternal_getRecentAllocations(JNIEnv* env, jclass) {
-  UNIMPLEMENTED(WARNING);
-  return NULL;
-  //ArrayObject* data = dvmDdmGetRecentAllocations();
-  //dvmReleaseTrackedAlloc(data, NULL);
-  //return reinterpret_cast<jbyteArray>(addLocalReference(env, data));
+  return Dbg::GetRecentAllocations();
 }
 
 static jboolean DdmVmInternal_getRecentAllocationStatus(JNIEnv* env, jclass) {
-  UNIMPLEMENTED(WARNING);
-  return JNI_FALSE;
-  //return (gDvm.allocRecords != NULL);
+  return Dbg::IsAllocTrackingEnabled();
 }
 
 static Thread* FindThreadByThinLockId(uint32_t thin_lock_id) {
@@ -149,7 +138,9 @@
   }
 
   jbyteArray result = env->NewByteArray(bytes.size());
-  env->SetByteArrayRegion(result, 0, bytes.size(), reinterpret_cast<const jbyte*>(&bytes[0]));
+  if (result != NULL) {
+    env->SetByteArrayRegion(result, 0, bytes.size(), reinterpret_cast<const jbyte*>(&bytes[0]));
+  }
   return result;
 }