liblog: Instrument logging of logd write drops

- If logger system is prostrated, send an event message with the
  liblog tag from the associated UID and PID with a count of
  dropped messages once logging is resumed.
- Added to the README a description of the error return values.
- Describe in the README the appropriate mitigations for dropped
  messages.
- If the caller sees this message, then
  /proc/sys/net/unix/max_dgram_qlen is likely too small

Change-Id: Iaf387b9e5e1b6aa93bebc7481f9e8353732e3229
diff --git a/liblog/Android.mk b/liblog/Android.mk
index 5756c54..9353546 100644
--- a/liblog/Android.mk
+++ b/liblog/Android.mk
@@ -16,6 +16,8 @@
 LOCAL_PATH := $(my-dir)
 include $(CLEAR_VARS)
 
+liblog_cflags := $(shell sed -n 's/^\([0-9]*\)[ \t]*liblog[ \t].*/-DLIBLOG_LOG_TAG=\1/p' $(LOCAL_PATH)/event.logtags)
+
 ifneq ($(TARGET_USES_LOGD),false)
 liblog_sources := logd_write.c
 else
@@ -45,7 +47,7 @@
         uio.c
 endif
 
-liblog_host_sources := $(liblog_sources) fake_log_device.c
+liblog_host_sources := $(liblog_sources) fake_log_device.c event.logtags
 liblog_target_sources := $(liblog_sources) log_time.cpp log_is_loggable.c
 ifneq ($(TARGET_USES_LOGD),false)
 liblog_target_sources += log_read.c
@@ -57,7 +59,7 @@
 # ========================================================
 LOCAL_MODULE := liblog
 LOCAL_SRC_FILES := $(liblog_host_sources)
-LOCAL_CFLAGS := -DFAKE_LOG_DEVICE=1 -Werror
+LOCAL_CFLAGS := -DFAKE_LOG_DEVICE=1 -Werror $(liblog_cflags)
 LOCAL_MULTILIB := both
 include $(BUILD_HOST_STATIC_LIBRARY)
 
@@ -76,13 +78,13 @@
 include $(CLEAR_VARS)
 LOCAL_MODULE := liblog
 LOCAL_SRC_FILES := $(liblog_target_sources)
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := -Werror $(liblog_cflags)
 include $(BUILD_STATIC_LIBRARY)
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := liblog
 LOCAL_WHOLE_STATIC_LIBRARIES := liblog
-LOCAL_CFLAGS := -Werror
+LOCAL_CFLAGS := -Werror $(liblog_cflags)
 include $(BUILD_SHARED_LIBRARY)
 
 include $(call first-makefiles-under,$(LOCAL_PATH))
diff --git a/liblog/README b/liblog/README
index 461dfbe..f29ac04 100644
--- a/liblog/README
+++ b/liblog/README
@@ -131,6 +131,33 @@
        when  opening  the  sub-log.    It  is  recommended  to  open  the  log
        ANDROID_LOG_RDONLY in these cases.
 
+ERRORS
+       If messages fail, a negative error code will be returned to the caller.
+
+       The -ENOTCONN return code indicates that the logger daemon is stopped.
+
+       The  -EBADF return code indicates that the log access point can not be
+       opened, or the log buffer id is out of range.
+
+       For the  -EAGAIN  return code,  this means that the logging message was
+       temporarily backed-up either because of Denial Of Service (DOS) logging
+       pressure from some chatty application or service in the Android system,
+       or if too small of a value is set in /proc/sys/net/unix/max_dgram_qlen.
+       To aid in diagnosing the occurence of this,  a binary event from liblog
+       will be sent to the  log  daemon  once a  new  message  can get through
+       indicating how many  messages were  dropped  as a result.   Please take
+       action to resolve the structural problems at the source.
+
+       It is generally not advised for the caller to retry the  -EAGAIN return
+       code as  this  will  only  make the  problem(s)  worse  and  cause your
+       application to temporarily drop to the  logger daemon  priority,  BATCH
+       scheduling policy and background task cgroup. If you require a group of
+       messages to be passed atomically,  merge  them  into  one  message with
+       embedded newlines to the maximum length LOGGER_ENTRY_MAX_PAYLOAD.
+
+       Other return codes  from  writing operation can be returned.  Since the
+       library retries on EINTR, -EINTR should never be returned.
+
 SEE ALSO
        syslogd(8)
 
diff --git a/liblog/event.logtags b/liblog/event.logtags
new file mode 100644
index 0000000..72ecab1
--- /dev/null
+++ b/liblog/event.logtags
@@ -0,0 +1,36 @@
+# The entries in this file map a sparse set of log tag numbers to tag names.
+# This is installed on the device, in /system/etc, and parsed by logcat.
+#
+# Tag numbers are decimal integers, from 0 to 2^31.  (Let's leave the
+# negative values alone for now.)
+#
+# Tag names are one or more ASCII letters and numbers or underscores, i.e.
+# "[A-Z][a-z][0-9]_".  Do not include spaces or punctuation (the former
+# impacts log readability, the latter makes regex searches more annoying).
+#
+# Tag numbers and names are separated by whitespace.  Blank lines and lines
+# starting with '#' are ignored.
+#
+# Optionally, after the tag names can be put a description for the value(s)
+# of the tag. Description are in the format
+#    (<name>|data type[|data unit])
+# Multiple values are separated by commas.
+#
+# The data type is a number from the following values:
+# 1: int
+# 2: long
+# 3: string
+# 4: list
+#
+# The data unit is a number taken from the following list:
+# 1: Number of objects
+# 2: Number of bytes
+# 3: Number of milliseconds
+# 4: Number of allocations
+# 5: Id
+# 6: Percent
+# Default value for data of type int/long is 2 (bytes).
+#
+# TODO: generate ".java" and ".h" files with integer constants from this file.
+
+1005  liblog (dropped|1)
diff --git a/liblog/logd_write.c b/liblog/logd_write.c
index 0208c73..c8a6162 100644
--- a/liblog/logd_write.c
+++ b/liblog/logd_write.c
@@ -13,12 +13,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <endian.h>
 #include <errno.h>
 #include <fcntl.h>
 #if !defined(_WIN32)
 #include <pthread.h>
 #endif
 #include <stdarg.h>
+#include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -172,6 +174,7 @@
     size_t i, payload_size;
     static uid_t last_uid = AID_ROOT; /* logd *always* starts up as AID_ROOT */
     static pid_t last_pid = (pid_t) -1;
+    static atomic_int_fast32_t dropped;
 
     if (last_uid == AID_ROOT) { /* have we called to get the UID yet? */
         last_uid = getuid();
@@ -206,7 +209,6 @@
     pmsg_header.uid = last_uid;
     pmsg_header.pid = last_pid;
 
-    header.id = log_id;
     header.tid = gettid();
     header.realtime.tv_sec = ts.tv_sec;
     header.realtime.tv_nsec = ts.tv_nsec;
@@ -216,6 +218,28 @@
     newVec[1].iov_base   = (unsigned char *) &header;
     newVec[1].iov_len    = sizeof(header);
 
+    if (logd_fd > 0) {
+        int32_t snapshot = atomic_exchange_explicit(&dropped, 0, memory_order_relaxed);
+        if (snapshot) {
+            android_log_event_int_t buffer;
+
+            header.id = LOG_ID_EVENTS;
+            buffer.header.tag = htole32(LIBLOG_LOG_TAG);
+            buffer.payload.type = EVENT_TYPE_INT;
+            buffer.payload.data = htole32(snapshot);
+
+            newVec[2].iov_base = &buffer;
+            newVec[2].iov_len  = sizeof(buffer);
+
+            ret = TEMP_FAILURE_RETRY(writev(logd_fd, newVec + 1, 2));
+            if (ret != (ssize_t)(sizeof(header) + sizeof(buffer))) {
+                atomic_fetch_add_explicit(&dropped, snapshot, memory_order_relaxed);
+            }
+        }
+    }
+
+    header.id = log_id;
+
     for (payload_size = 0, i = header_length; i < nr + header_length; i++) {
         newVec[i].iov_base = vec[i - header_length].iov_base;
         payload_size += newVec[i].iov_len = vec[i - header_length].iov_len;
@@ -281,6 +305,8 @@
 
     if (ret > (ssize_t)sizeof(header)) {
         ret -= sizeof(header);
+    } else if (ret == -EAGAIN) {
+        atomic_fetch_add_explicit(&dropped, 1, memory_order_relaxed);
     }
 #endif