logd: record multiple duplicate messages as chatty

If a series of messages arrive from a single source with identical
message content payload, then suppress them and generate a chatty
report.  The checking is done on a per log id basis.

This alters the assumption that chatty messages are always at the
oldest entries, they now show up in the middle too.  To address this
change in behavior we print the first line, a chatty reference
which internally takes little space, then the last line in the series.

This does not conserve processing time in logd, and certainly has no
impact on the long path of formatting and submitting log messages from
from the source, but it may contribute to memory space and signal to
noise savings under heavy spammy loads.

Test: gTest liblog-unit-tests, logd-unit-tests & logcat-unit-tests
Bug: 33535908
Change-Id: I3160c36d4f4e2f8216f528605a1b3993173f4dec
diff --git a/logd/LogAudit.cpp b/logd/LogAudit.cpp
index c3ccd84..aa05932 100644
--- a/logd/LogAudit.cpp
+++ b/logd/LogAudit.cpp
@@ -34,6 +34,7 @@
 #include "LogBuffer.h"
 #include "LogKlog.h"
 #include "LogReader.h"
+#include "LogUtils.h"
 
 #define KMSG_PRIORITY(PRI)                          \
     '<',                                            \
@@ -117,7 +118,8 @@
             if (avcl) {
                 char *avcr = strstr(str, avc);
 
-                skip = avcr && !strcmp(avcl + strlen(avc), avcr + strlen(avc));
+                skip = avcr && !fastcmp<strcmp>(avcl + strlen(avc),
+                                                avcr + strlen(avc));
                 if (skip) {
                     ++count;
                     free(last_str);
diff --git a/logd/LogBuffer.cpp b/logd/LogBuffer.cpp
index a009433..d476596 100644
--- a/logd/LogBuffer.cpp
+++ b/logd/LogBuffer.cpp
@@ -33,6 +33,7 @@
 #include "LogBuffer.h"
 #include "LogKlog.h"
 #include "LogReader.h"
+#include "LogUtils.h"
 
 #ifndef __predict_false
 #define __predict_false(exp) __builtin_expect((exp) != 0, 0)
@@ -110,9 +111,65 @@
         mTimes(*times) {
     pthread_mutex_init(&mLogElementsLock, NULL);
 
+    log_id_for_each(i) {
+        lastLoggedElements[i] = NULL;
+        droppedElements[i] = NULL;
+    }
+
     init();
 }
 
+LogBuffer::~LogBuffer() {
+    log_id_for_each(i) {
+        delete lastLoggedElements[i];
+        delete droppedElements[i];
+    }
+}
+
+static bool identical(LogBufferElement* elem, LogBufferElement* last) {
+    // is it mostly identical?
+//  if (!elem) return false;
+    unsigned short lenl = elem->getMsgLen();
+    if (!lenl) return false;
+//  if (!last) return false;
+    unsigned short lenr = last->getMsgLen();
+    if (!lenr) return false;
+//  if (elem->getLogId() != last->getLogId()) return false;
+    if (elem->getUid() != last->getUid()) return false;
+    if (elem->getPid() != last->getPid()) return false;
+    if (elem->getTid() != last->getTid()) return false;
+
+    // last is more than a minute old, stop squashing identical messages
+    if (elem->getRealTime().nsec() >
+        (last->getRealTime().nsec() + 60 * NS_PER_SEC)) return false;
+
+    // Identical message
+    const char* msgl = elem->getMsg();
+    const char* msgr = last->getMsg();
+    if ((lenl == lenr) && !fastcmp<memcmp>(msgl, msgr, lenl)) return true;
+
+    // audit message (except sequence number) identical?
+    static const char avc[] = "): avc: ";
+
+    if (last->isBinary()) {
+        if (fastcmp<memcmp>(msgl, msgr,
+                            sizeof(android_log_event_string_t) -
+                                sizeof(int32_t))) return false;
+        msgl += sizeof(android_log_event_string_t);
+        lenl -= sizeof(android_log_event_string_t);
+        msgr += sizeof(android_log_event_string_t);
+        lenr -= sizeof(android_log_event_string_t);
+    }
+    const char *avcl = android::strnstr(msgl, lenl, avc);
+    if (!avcl) return false;
+    lenl -= avcl - msgl;
+    const char *avcr = android::strnstr(msgr, lenr, avc);
+    if (!avcr) return false;
+    lenr -= avcr - msgr;
+    if (lenl != lenr) return false;
+    return !fastcmp<memcmp>(avcl + strlen(avc), avcr + strlen(avc), lenl);
+}
+
 int LogBuffer::log(log_id_t log_id, log_time realtime,
                    uid_t uid, pid_t pid, pid_t tid,
                    const char *msg, unsigned short len) {
@@ -145,14 +202,57 @@
     }
 
     pthread_mutex_lock(&mLogElementsLock);
+    LogBufferElement* currentLast = lastLoggedElements[log_id];
+    if (currentLast) {
+        LogBufferElement *dropped = droppedElements[log_id];
+        unsigned short count = dropped ? dropped->getDropped() : 0;
+        if (identical(elem, currentLast)) {
+            if (dropped) {
+                if (count == USHRT_MAX) {
+                    log(dropped);
+                    count = 1;
+                } else {
+                    delete dropped;
+                    ++count;
+                }
+            }
+            if (count) {
+                stats.add(currentLast);
+                stats.subtract(currentLast);
+                currentLast->setDropped(count);
+            }
+            droppedElements[log_id] = currentLast;
+            lastLoggedElements[log_id] = elem;
+            pthread_mutex_unlock(&mLogElementsLock);
+            return len;
+        }
+        if (dropped) {
+            log(dropped);
+            droppedElements[log_id] = NULL;
+        }
+        if (count) {
+            log(currentLast);
+        } else {
+            delete currentLast;
+        }
+    }
+    lastLoggedElements[log_id] = new LogBufferElement(*elem);
 
+    log(elem);
+    pthread_mutex_unlock(&mLogElementsLock);
+
+    return len;
+}
+
+// assumes mLogElementsLock held, owns elem, will look after garbage collection
+void LogBuffer::log(LogBufferElement* elem) {
     // Insert elements in time sorted order if possible
     //  NB: if end is region locked, place element at end of list
     LogBufferElementCollection::iterator it = mLogElements.end();
     LogBufferElementCollection::iterator last = it;
     while (last != mLogElements.begin()) {
         --it;
-        if ((*it)->getRealTime() <= realtime) {
+        if ((*it)->getRealTime() <= elem->getRealTime()) {
             break;
         }
         last = it;
@@ -169,7 +269,7 @@
 
         LastLogTimes::iterator times = mTimes.begin();
         while(times != mTimes.end()) {
-            LogTimeEntry *entry = (*times);
+            LogTimeEntry* entry = (*times);
             if (entry->owned_Locked()) {
                 if (!entry->mNonBlock) {
                     end_always = true;
@@ -187,17 +287,14 @@
                 || (end_set && (end >= (*last)->getSequence()))) {
             mLogElements.push_back(elem);
         } else {
-            mLogElements.insert(last,elem);
+            mLogElements.insert(last, elem);
         }
 
         LogTimeEntry::unlock();
     }
 
     stats.add(elem);
-    maybePrune(log_id);
-    pthread_mutex_unlock(&mLogElementsLock);
-
-    return len;
+    maybePrune(elem->getLogId());
 }
 
 // Prune at most 10% of the log entries or maxPrune, whichever is less.
diff --git a/logd/LogBuffer.h b/logd/LogBuffer.h
index ff9692e..932d55f 100644
--- a/logd/LogBuffer.h
+++ b/logd/LogBuffer.h
@@ -99,10 +99,15 @@
 
     bool monotonic;
 
+    LogBufferElement* lastLoggedElements[LOG_ID_MAX];
+    LogBufferElement* droppedElements[LOG_ID_MAX];
+    void log(LogBufferElement* elem);
+
 public:
     LastLogTimes &mTimes;
 
     explicit LogBuffer(LastLogTimes *times);
+    ~LogBuffer();
     void init();
     bool isMonotonic() { return monotonic; }
 
diff --git a/logd/LogBufferElement.cpp b/logd/LogBufferElement.cpp
index ec91f2a..5e37a30 100644
--- a/logd/LogBufferElement.cpp
+++ b/logd/LogBufferElement.cpp
@@ -50,6 +50,19 @@
         0;
 }
 
+LogBufferElement::LogBufferElement(const LogBufferElement &elem) :
+        mTag(elem.mTag),
+        mUid(elem.mUid),
+        mPid(elem.mPid),
+        mTid(elem.mTid),
+        mSequence(elem.mSequence),
+        mRealTime(elem.mRealTime),
+        mMsgLen(elem.mMsgLen),
+        mLogId(elem.mLogId) {
+    mMsg = new char[mMsgLen];
+    memcpy(mMsg, elem.mMsg, mMsgLen);
+}
+
 LogBufferElement::~LogBufferElement() {
     delete [] mMsg;
 }
@@ -157,8 +170,6 @@
                           mDropped, (mDropped > 1) ? "s" : "");
 
     size_t hdrLen;
-    // LOG_ID_SECURITY not strictly needed since spam filter not activated,
-    // but required for accuracy.
     if (isBinary()) {
         hdrLen = sizeof(android_log_event_string_t);
     } else {
diff --git a/logd/LogBufferElement.h b/logd/LogBufferElement.h
index fb7fbed..f8ffacd 100644
--- a/logd/LogBufferElement.h
+++ b/logd/LogBufferElement.h
@@ -59,6 +59,7 @@
     LogBufferElement(log_id_t log_id, log_time realtime,
                      uid_t uid, pid_t pid, pid_t tid,
                      const char *msg, unsigned short len);
+    LogBufferElement(const LogBufferElement &elem);
     virtual ~LogBufferElement();
 
     bool isBinary(void) const {
@@ -79,6 +80,7 @@
         return mDropped = value;
     }
     unsigned short getMsgLen() const { return mMsg ? mMsgLen : 0; }
+    const char* getMsg() const { return mMsg; }
     uint64_t getSequence(void) const { return mSequence; }
     static uint64_t getCurrentSequence(void) { return sequence.load(memory_order_relaxed); }
     log_time getRealTime(void) const { return mRealTime; }
diff --git a/logd/LogKlog.cpp b/logd/LogKlog.cpp
index 0b49fc1..f224079 100644
--- a/logd/LogKlog.cpp
+++ b/logd/LogKlog.cpp
@@ -306,19 +306,15 @@
 static const char resumeStr[] = "PM: suspend exit ";
 static const char suspendedStr[] = "Suspended for ";
 
-static const char *strnstr(const char *s, size_t len, const char *needle) {
+const char* android::strnstr(const char* s, size_t len, const char* needle) {
     char c;
 
-    if (!len) {
-        return NULL;
-    }
+    if (!len) return NULL;
     if ((c = *needle++) != 0) {
         size_t needleLen = strlen(needle);
         do {
             do {
-                if (len <= needleLen) {
-                    return NULL;
-                }
+                if (len <= needleLen) return NULL;
                 --len;
             } while (*s++ != c);
         } while (fastcmp<memcmp>(s, needle, needleLen));
@@ -349,25 +345,25 @@
             return;
         }
 
-        const char *b;
-        if (((b = strnstr(cp, len, suspendStr)))
+        const char* b;
+        if (((b = android::strnstr(cp, len, suspendStr)))
                 && ((size_t)((b += sizeof(suspendStr) - 1) - cp) < len)) {
             len -= b - cp;
             calculateCorrection(now, b, len);
-        } else if (((b = strnstr(cp, len, resumeStr)))
+        } else if (((b = android::strnstr(cp, len, resumeStr)))
                 && ((size_t)((b += sizeof(resumeStr) - 1) - cp) < len)) {
             len -= b - cp;
             calculateCorrection(now, b, len);
-        } else if (((b = strnstr(cp, len, healthd)))
+        } else if (((b = android::strnstr(cp, len, healthd)))
                 && ((size_t)((b += sizeof(healthd) - 1) - cp) < len)
-                && ((b = strnstr(b, len -= b - cp, battery)))
+                && ((b = android::strnstr(b, len -= b - cp, battery)))
                 && ((size_t)((b += sizeof(battery) - 1) - cp) < len)) {
             // NB: healthd is roughly 150us late, so we use it instead to
             //     trigger a check for ntp-induced or hardware clock drift.
             log_time real(CLOCK_REALTIME);
             log_time mono(CLOCK_MONOTONIC);
             correction = (real < mono) ? log_time::EPOCH : (real - mono);
-        } else if (((b = strnstr(cp, len, suspendedStr)))
+        } else if (((b = android::strnstr(cp, len, suspendedStr)))
                 && ((size_t)((b += sizeof(suspendStr) - 1) - cp) < len)) {
             len -= b - cp;
             log_time real;
@@ -466,18 +462,14 @@
 
 // Passed the entire SYSLOG_ACTION_READ_ALL buffer and interpret a
 // compensated start time.
-void LogKlog::synchronize(const char *buf, size_t len) {
-    const char *cp = strnstr(buf, len, suspendStr);
+void LogKlog::synchronize(const char* buf, size_t len) {
+    const char* cp = android::strnstr(buf, len, suspendStr);
     if (!cp) {
-        cp = strnstr(buf, len, resumeStr);
-        if (!cp) {
-            return;
-        }
+        cp = android::strnstr(buf, len, resumeStr);
+        if (!cp) return;
     } else {
-        const char *rp = strnstr(buf, len, resumeStr);
-        if (rp && (rp < cp)) {
-            cp = rp;
-        }
+        const char* rp = android::strnstr(buf, len, resumeStr);
+        if (rp && (rp < cp)) cp = rp;
     }
 
     do {
@@ -491,7 +483,7 @@
     log_time now;
     sniffTime(now, &cp, len - (cp - buf), true);
 
-    const char *suspended = strnstr(buf, len, suspendedStr);
+    const char* suspended = android::strnstr(buf, len, suspendedStr);
     if (!suspended || (suspended > cp)) {
         return;
     }
@@ -581,12 +573,12 @@
 //  logd.klogd:
 // return -1 if message logd.klogd: <signature>
 //
-int LogKlog::log(const char *buf, size_t len) {
-    if (auditd && strnstr(buf, len, " audit(")) {
+int LogKlog::log(const char* buf, size_t len) {
+    if (auditd && android::strnstr(buf, len, " audit(")) {
         return 0;
     }
 
-    const char *p = buf;
+    const char* p = buf;
     int pri = parseKernelPrio(&p, len);
 
     log_time now;
@@ -594,7 +586,7 @@
 
     // sniff for start marker
     const char klogd_message[] = "logd.klogd: ";
-    const char *start = strnstr(p, len - (p - buf), klogd_message);
+    const char* start = android::strnstr(p, len - (p - buf), klogd_message);
     if (start) {
         uint64_t sig = strtoll(start + sizeof(klogd_message) - 1, NULL, 10);
         if (sig == signature.nsec()) {
diff --git a/logd/LogStatistics.cpp b/logd/LogStatistics.cpp
index 3c8bd75..ddbb64f 100644
--- a/logd/LogStatistics.cpp
+++ b/logd/LogStatistics.cpp
@@ -71,8 +71,17 @@
     mSizes[log_id] += size;
     ++mElements[log_id];
 
-    mSizesTotal[log_id] += size;
-    ++mElementsTotal[log_id];
+    if (element->getDropped()) {
+        ++mDroppedElements[log_id];
+    } else {
+        // When caller adding a chatty entry, they will have already
+        // called add() and subtract() for each entry as they are
+        // evaluated and trimmed, thus recording size and number of
+        // elements, but we must recognize the manufactured dropped
+        // entry as not contributing to the lifetime totals.
+        mSizesTotal[log_id] += size;
+        ++mElementsTotal[log_id];
+    }
 
     if (log_id == LOG_ID_KERNEL) {
         return;
diff --git a/logd/LogStatistics.h b/logd/LogStatistics.h
index cb7ae2b..8bf655b 100644
--- a/logd/LogStatistics.h
+++ b/logd/LogStatistics.h
@@ -265,7 +265,7 @@
         if (pid != element->getPid()) {
             pid = -1;
         }
-        EntryBase::add(element);
+        EntryBaseDropped::add(element);
     }
 
     std::string formatHeader(const std::string &name, log_id_t id) const;
@@ -419,7 +419,7 @@
         if (pid != element->getPid()) {
             pid = -1;
         }
-        EntryBase::add(element);
+        EntryBaseDropped::add(element);
     }
 
     std::string formatHeader(const std::string &name, log_id_t id) const;
diff --git a/logd/LogUtils.h b/logd/LogUtils.h
index ec68062..d300a2a 100644
--- a/logd/LogUtils.h
+++ b/logd/LogUtils.h
@@ -40,6 +40,9 @@
 // Furnished in main.cpp. Thread safe.
 const char *tagToName(size_t *len, uint32_t tag);
 
+// Furnished by LogKlog.cpp.
+const char* strnstr(const char* s, size_t len, const char* needle);
+
 }
 
 // Furnished in LogCommand.cpp
diff --git a/logd/tests/logd_test.cpp b/logd/tests/logd_test.cpp
index 254a3f8..4e621e3 100644
--- a/logd/tests/logd_test.cpp
+++ b/logd/tests/logd_test.cpp
@@ -778,3 +778,102 @@
 
     close(fd);
 }
+
+static inline int32_t get4LE(const char* src)
+{
+    return src[0] | (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
+}
+
+void __android_log_btwrite_multiple__helper(int count) {
+    log_time ts(CLOCK_MONOTONIC);
+
+    struct logger_list *logger_list;
+    ASSERT_TRUE(NULL != (logger_list = android_logger_list_open(
+        LOG_ID_EVENTS, ANDROID_LOG_RDONLY | ANDROID_LOG_NONBLOCK, 1000, 0)));
+
+    log_time ts1(CLOCK_MONOTONIC);
+
+    pid_t pid = fork();
+
+    if (pid == 0) {
+        // child
+        for (int i = count; i; --i) {
+            ASSERT_LT(0, __android_log_btwrite(0, EVENT_TYPE_LONG, &ts, sizeof(ts)));
+            usleep(100);
+        }
+        ASSERT_LT(0, __android_log_btwrite(0, EVENT_TYPE_LONG, &ts1, sizeof(ts1)));
+        usleep(1000000);
+
+        _exit(0);
+    }
+
+    siginfo_t info{};
+    ASSERT_EQ(0, TEMP_FAILURE_RETRY(waitid(P_PID, pid, &info, WEXITED)));
+
+    int expected_count = (count < 2) ? count : 2;
+    int expected_chatty_count = (count <= 2) ? 0 : 1;
+    int expected_expire_count = (count < 2) ? 0 : (count - 2);
+
+    count = 0;
+    int second_count = 0;
+    int chatty_count = 0;
+    int expire_count = 0;
+
+    for (;;) {
+        log_msg log_msg;
+        if (android_logger_list_read(logger_list, &log_msg) <= 0) break;
+
+        if ((log_msg.entry.pid != pid) ||
+            (log_msg.entry.len < (4 + 1 + 8)) ||
+            (log_msg.id() != LOG_ID_EVENTS)) continue;
+
+        char *eventData = log_msg.msg();
+        if (!eventData) continue;
+
+        uint32_t tag = get4LE(eventData);
+
+        if ((eventData[4] == EVENT_TYPE_LONG) && (log_msg.entry.len == (4 + 1 + 8))) {
+            if (tag != 0) continue;
+
+            log_time tx(eventData + 4 + 1);
+            if (ts == tx) {
+                ++count;
+            } else if (ts1 == tx) {
+                ++second_count;
+            }
+        } else if (eventData[4] == EVENT_TYPE_STRING) {
+            // chatty
+            if (tag != 1004) continue;
+            ++chatty_count;
+            // int len = get4LE(eventData + 4 + 1);
+            const char *cp = strstr(eventData + 4 + 1 + 4, " expire ");
+            if (!cp) continue;
+            unsigned val = 0;
+            sscanf(cp, " expire %u lines", &val);
+            expire_count += val;
+        }
+    }
+
+    EXPECT_EQ(expected_count, count);
+    EXPECT_EQ(1, second_count);
+    EXPECT_EQ(expected_chatty_count, chatty_count);
+    EXPECT_EQ(expected_expire_count, expire_count);
+
+    android_logger_list_close(logger_list);
+}
+
+TEST(logd, multiple_test_1) {
+    __android_log_btwrite_multiple__helper(1);
+}
+
+TEST(logd, multiple_test_2) {
+    __android_log_btwrite_multiple__helper(2);
+}
+
+TEST(logd, multiple_test_3) {
+    __android_log_btwrite_multiple__helper(3);
+}
+
+TEST(logd, multiple_test_10) {
+    __android_log_btwrite_multiple__helper(10);
+}