liblog: logcat: Add printable format modifier

(cherry pick from commit b932b2f8bac124b088edc93966281cca4567dc54)

- added printable format modifier:
  logcat -v printable
- opencoded borrowed individual utf8 validity checking algorithm
  from utf8_length() in libutils/Unicode.cpp
- if considered basic one-character ASCII, use popular \x escape
  sequences for non-printable
- logprint convert to C comments to drop mixed-mode

Bug: 19000361
Change-Id: I122a5b8fb41216fc0bc816178c0b768f3df56586
diff --git a/include/log/logprint.h b/include/log/logprint.h
index 96249e9..4b812cc 100644
--- a/include/log/logprint.h
+++ b/include/log/logprint.h
@@ -36,9 +36,10 @@
     FORMAT_TIME,
     FORMAT_THREADTIME,
     FORMAT_LONG,
-    /* The following two are modifiers to above formats */
+    /* The following three are modifiers to above formats */
     FORMAT_MODIFIER_COLOR,     /* converts priority to color */
     FORMAT_MODIFIER_TIME_USEC, /* switches from msec to usec time precision */
+    FORMAT_MODIFIER_PRINTABLE, /* converts non-printable to printable escapes */
 } AndroidLogPrintFormat;
 
 typedef struct AndroidLogFormat_t AndroidLogFormat;
diff --git a/liblog/logprint.c b/liblog/logprint.c
index a3f1d7e..c2f1545 100644
--- a/liblog/logprint.c
+++ b/liblog/logprint.c
@@ -32,6 +32,9 @@
 #include <log/logd.h>
 #include <log/logprint.h>
 
+/* open coded fragment, prevent circular dependencies */
+#define WEAK static
+
 typedef struct FilterInfo_t {
     char *mTag;
     android_LogPriority mPri;
@@ -44,6 +47,7 @@
     AndroidLogPrintFormat format;
     bool colored_output;
     bool usec_time_output;
+    bool printable_output;
 };
 
 /*
@@ -187,6 +191,7 @@
     p_ret->format = FORMAT_BRIEF;
     p_ret->colored_output = false;
     p_ret->usec_time_output = false;
+    p_ret->printable_output = false;
 
     return p_ret;
 }
@@ -212,13 +217,18 @@
 int android_log_setPrintFormat(AndroidLogFormat *p_format,
         AndroidLogPrintFormat format)
 {
-    if (format == FORMAT_MODIFIER_COLOR) {
+    switch (format) {
+    case FORMAT_MODIFIER_COLOR:
         p_format->colored_output = true;
         return 0;
-    }
-    if (format == FORMAT_MODIFIER_TIME_USEC) {
+    case FORMAT_MODIFIER_TIME_USEC:
         p_format->usec_time_output = true;
         return 0;
+    case FORMAT_MODIFIER_PRINTABLE:
+        p_format->printable_output = true;
+        return 0;
+    default:
+        break;
     }
     p_format->format = format;
     return 1;
@@ -241,6 +251,7 @@
     else if (strcmp(formatString, "long") == 0) format = FORMAT_LONG;
     else if (strcmp(formatString, "color") == 0) format = FORMAT_MODIFIER_COLOR;
     else if (strcmp(formatString, "usec") == 0) format = FORMAT_MODIFIER_TIME_USEC;
+    else if (strcmp(formatString, "printable") == 0) format = FORMAT_MODIFIER_PRINTABLE;
     else format = FORMAT_OFF;
 
     return format;
@@ -276,29 +287,35 @@
     }
 
     if(0 == strncmp("*", filterExpression, tagNameLength)) {
-        // This filter expression refers to the global filter
-        // The default level for this is DEBUG if the priority
-        // is unspecified
+        /*
+         * This filter expression refers to the global filter
+         * The default level for this is DEBUG if the priority
+         * is unspecified
+         */
         if (pri == ANDROID_LOG_DEFAULT) {
             pri = ANDROID_LOG_DEBUG;
         }
 
         p_format->global_pri = pri;
     } else {
-        // for filter expressions that don't refer to the global
-        // filter, the default is verbose if the priority is unspecified
+        /*
+         * for filter expressions that don't refer to the global
+         * filter, the default is verbose if the priority is unspecified
+         */
         if (pri == ANDROID_LOG_DEFAULT) {
             pri = ANDROID_LOG_VERBOSE;
         }
 
         char *tagName;
 
-// Presently HAVE_STRNDUP is never defined, so the second case is always taken
-// Darwin doesn't have strnup, everything else does
+/*
+ * Presently HAVE_STRNDUP is never defined, so the second case is always taken
+ * Darwin doesn't have strnup, everything else does
+ */
 #ifdef HAVE_STRNDUP
         tagName = strndup(filterExpression, tagNameLength);
 #else
-        //a few extra bytes copied...
+        /* a few extra bytes copied... */
         tagName = strdup(filterExpression);
         tagName[tagNameLength] = '\0';
 #endif /*HAVE_STRNDUP*/
@@ -335,9 +352,9 @@
     char *p_ret;
     int err;
 
-    // Yes, I'm using strsep
+    /* Yes, I'm using strsep */
     while (NULL != (p_ret = strsep(&p_cur, " \t,"))) {
-        // ignore whitespace-only entries
+        /* ignore whitespace-only entries */
         if(p_ret[0] != '\0') {
             err = android_log_addFilterRule(p_format, p_ret);
 
@@ -381,8 +398,10 @@
      * When that happens, we must null-terminate the message ourselves.
      */
     if (buf->len < 3) {
-        // An well-formed entry must consist of at least a priority
-        // and two null characters
+        /*
+         * An well-formed entry must consist of at least a priority
+         * and two null characters
+         */
         fprintf(stderr, "+++ LOG: entry too small\n");
         return -1;
     }
@@ -412,7 +431,7 @@
         return -1;
     }
     if (msgEnd == -1) {
-        // incoming message not null-terminated; force it
+        /* incoming message not null-terminated; force it */
         msgEnd = buf->len - 1;
         msg[msgEnd] = '\0';
     }
@@ -473,8 +492,6 @@
     type = *eventData++;
     eventDataLen--;
 
-    //fprintf(stderr, "--- type=%d (rem len=%d)\n", type, eventDataLen);
-
     switch (type) {
     case EVENT_TYPE_INT:
         /* 32-bit signed int */
@@ -735,6 +752,122 @@
     return 0;
 }
 
+/*
+ * One utf8 character at a time
+ *
+ * Returns the length of the utf8 character in the buffer,
+ * or -1 if illegal or truncated
+ *
+ * Open coded from libutils/Unicode.cpp, borrowed from utf8_length(),
+ * can not remove from here because of library circular dependencies.
+ * Expect one-day utf8_character_length with the same signature could
+ * _also_ be part of libutils/Unicode.cpp if its usefullness needs to
+ * propagate globally.
+ */
+WEAK ssize_t utf8_character_length(const char *src, size_t len)
+{
+    const char *cur = src;
+    const char first_char = *cur++;
+    static const uint32_t kUnicodeMaxCodepoint = 0x0010FFFF;
+    int32_t mask, to_ignore_mask;
+    size_t num_to_read;
+    uint32_t utf32;
+
+    if ((first_char & 0x80) == 0) { /* ASCII */
+        return 1;
+    }
+
+    /*
+     * (UTF-8's character must not be like 10xxxxxx,
+     *  but 110xxxxx, 1110xxxx, ... or 1111110x)
+     */
+    if ((first_char & 0x40) == 0) {
+        return -1;
+    }
+
+    for (utf32 = 1, num_to_read = 1, mask = 0x40, to_ignore_mask = 0x80;
+         num_to_read < 5 && (first_char & mask);
+         num_to_read++, to_ignore_mask |= mask, mask >>= 1) {
+        if (num_to_read > len) {
+            return -1;
+        }
+        if ((*cur & 0xC0) != 0x80) { /* can not be 10xxxxxx? */
+            return -1;
+        }
+        utf32 = (utf32 << 6) + (*cur++ & 0b00111111);
+    }
+    /* "first_char" must be (110xxxxx - 11110xxx) */
+    if (num_to_read >= 5) {
+        return -1;
+    }
+    to_ignore_mask |= mask;
+    utf32 |= ((~to_ignore_mask) & first_char) << (6 * (num_to_read - 1));
+    if (utf32 > kUnicodeMaxCodepoint) {
+        return -1;
+    }
+    return num_to_read;
+}
+
+/*
+ * Convert to printable from message to p buffer, return string length. If p is
+ * NULL, do not copy, but still return the expected string length.
+ */
+static size_t convertPrintable(char *p, const char *message, size_t messageLen)
+{
+    char *begin = p;
+    bool print = p != NULL;
+
+    while (messageLen) {
+        char buf[6];
+        ssize_t len = sizeof(buf) - 1;
+        if ((size_t)len > messageLen) {
+            len = messageLen;
+        }
+        len = utf8_character_length(message, len);
+
+        if (len < 0) {
+            snprintf(buf, sizeof(buf),
+                     ((messageLen > 1) && isdigit(message[1]))
+                         ? "\\%03o"
+                         : "\\%o",
+                     *message & 0377);
+            len = 1;
+        } else {
+            buf[0] = '\0';
+            if (len == 1) {
+                if (*message == '\a') {
+                    strcpy(buf, "\\a");
+                } else if (*message == '\b') {
+                    strcpy(buf, "\\b");
+                } else if (*message == '\t') {
+                    strcpy(buf, "\\t");
+                } else if (*message == '\v') {
+                    strcpy(buf, "\\v");
+                } else if (*message == '\f') {
+                    strcpy(buf, "\\f");
+                } else if (*message == '\r') {
+                    strcpy(buf, "\\r");
+                } else if (*message == '\\') {
+                    strcpy(buf, "\\\\");
+                } else if ((*message < ' ') || (*message & 0x80)) {
+                    snprintf(buf, sizeof(buf), "\\%o", *message & 0377);
+                }
+            }
+            if (!buf[0]) {
+                strncpy(buf, message, len);
+                buf[len] = '\0';
+            }
+        }
+        if (print) {
+            strcpy(p, buf);
+        }
+        p += strlen(buf);
+        message += len;
+        messageLen -= len;
+    }
+    return p - begin;
+}
+
 /**
  * Formats a log message into a buffer
  *
@@ -778,7 +911,7 @@
 #else
     ptm = localtime(&(entry->tv_sec));
 #endif
-    //strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%d %H:%M:%S", ptm);
+    /* strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%d %H:%M:%S", ptm); */
     strftime(timeBuf, sizeof(timeBuf), "%m-%d %H:%M:%S", ptm);
     len = strlen(timeBuf);
     if (p_format->usec_time_output) {
@@ -873,24 +1006,34 @@
     const char *pm;
 
     if (prefixSuffixIsHeaderFooter) {
-        // we're just wrapping message with a header/footer
+        /* we're just wrapping message with a header/footer */
         numLines = 1;
     } else {
         pm = entry->message;
         numLines = 0;
 
-        // The line-end finding here must match the line-end finding
-        // in for ( ... numLines...) loop below
+        /*
+         * The line-end finding here must match the line-end finding
+         * in for ( ... numLines...) loop below
+         */
         while (pm < (entry->message + entry->messageLen)) {
             if (*pm++ == '\n') numLines++;
         }
-        // plus one line for anything not newline-terminated at the end
+        /* plus one line for anything not newline-terminated at the end */
         if (pm > entry->message && *(pm-1) != '\n') numLines++;
     }
 
-    // this is an upper bound--newlines in message may be counted
-    // extraneously
-    bufferSize = (numLines * (prefixLen + suffixLen)) + entry->messageLen + 1;
+    /*
+     * this is an upper bound--newlines in message may be counted
+     * extraneously
+     */
+    bufferSize = (numLines * (prefixLen + suffixLen)) + 1;
+    if (p_format->printable_output) {
+        /* Calculate extra length to convert non-printable to printable */
+        bufferSize += convertPrintable(NULL, entry->message, entry->messageLen);
+    } else {
+        bufferSize += entry->messageLen;
+    }
 
     if (defaultBufferSize >= bufferSize) {
         ret = defaultBuffer;
@@ -910,8 +1053,12 @@
     if (prefixSuffixIsHeaderFooter) {
         strcat(p, prefixBuf);
         p += prefixLen;
-        strncat(p, entry->message, entry->messageLen);
-        p += entry->messageLen;
+        if (p_format->printable_output) {
+            p += convertPrintable(p, entry->message, entry->messageLen);
+        } else {
+            strncat(p, entry->message, entry->messageLen);
+            p += entry->messageLen;
+        }
         strcat(p, suffixBuf);
         p += suffixLen;
     } else {
@@ -920,15 +1067,19 @@
             size_t lineLen;
             lineStart = pm;
 
-            // Find the next end-of-line in message
+            /* Find the next end-of-line in message */
             while (pm < (entry->message + entry->messageLen)
                     && *pm != '\n') pm++;
             lineLen = pm - lineStart;
 
             strcat(p, prefixBuf);
             p += prefixLen;
-            strncat(p, lineStart, lineLen);
-            p += lineLen;
+            if (p_format->printable_output) {
+                p += convertPrintable(p, lineStart, lineLen);
+            } else {
+                strncat(p, lineStart, lineLen);
+                p += lineLen;
+            }
             strcat(p, suffixBuf);
             p += suffixLen;
 
diff --git a/logcat/logcat.cpp b/logcat/logcat.cpp
index 6d7740e..736e02e 100644
--- a/logcat/logcat.cpp
+++ b/logcat/logcat.cpp
@@ -253,7 +253,8 @@
                     "  -r <kbytes>     Rotate log every kbytes. Requires -f\n"
                     "  -n <count>      Sets max number of rotated logs to <count>, default 4\n"
                     "  -v <format>     Sets the log print format, where <format> is:\n\n"
-                    "                  brief color long process raw tag thread threadtime time usec\n\n"
+                    "                      brief color long printable process raw tag thread\n"
+                    "                      threadtime time usec\n\n"
                     "  -D              print dividers between each log buffer\n"
                     "  -c              clear (flush) the entire log and exit\n"
                     "  -d              dump the log and then exit (don't block)\n"