diff options
author | 2024-05-31 13:13:34 -0700 | |
---|---|---|
committer | 2024-06-04 16:55:51 -0700 | |
commit | 83e0e84dc15dafdd42d5336e3484a9f8ca95ee01 (patch) | |
tree | f5bca855f618a82438c66f06bcfa4c2d74179f5f | |
parent | 77cd3a2bf400c13b71d4ed31265a44bfd4b4c93b (diff) |
Make stack dumping errors display better.
Currently, the code first checks if a process is cached
and then checks if the process should be backtraced. For
example, the code should not backtrace the zygote process.
This CL moves the cached process check until after the
check to see if the process should be backtraced. Before
this change, the zygote was always in the list of processes,
and is always cached so it shows up as this anonymous
process that can't be backtraced. After this change, since
the zygote is not supposed to be backtraced, it is not present.
For any pids that would have been dumped but are bypassed or have
an error, make the header include the timestamp and the command line.
Improve the message about why a cached process is skipped.
For an unwind that fails:
- If it's a java process, try a native unwind.
- Display the time it took for the failed unwind.
- Check if the process is now cached and display that error message.
Bug: 339473422
Test: Trigger bugreport and verify everything continues to work.
Test: Fake an unwind failure and verify the output.
Test: Fake an unwind failure and fake a cached process and verify the output.
Test: Fake an unwind failure for a java thread, and do a native unwind.
Test: Ran the bugreports from above through the bugreport parser.
Change-Id: I3082f2ac619e179621ad3ff39f2e839d000b2576
-rw-r--r-- | cmds/dumpstate/dumpstate.cpp | 109 |
1 files changed, 91 insertions, 18 deletions
diff --git a/cmds/dumpstate/dumpstate.cpp b/cmds/dumpstate/dumpstate.cpp index 9b453826c4..5b817c1877 100644 --- a/cmds/dumpstate/dumpstate.cpp +++ b/cmds/dumpstate/dumpstate.cpp @@ -2186,6 +2186,74 @@ static void DumpstateOnboardingOnly() { ds.AddDir(LOGPERSIST_DATA_DIR, false); } +static std::string GetTimestamp(const timespec& ts) { + tm tm; + localtime_r(&ts.tv_sec, &tm); + + // Reserve enough space for the entire time string, includes the space + // for the '\0' to make the calculations below easier by using size for + // the total string size. + std::string str(sizeof("1970-01-01 00:00:00.123456789+0830"), '\0'); + size_t n = strftime(str.data(), str.size(), "%F %H:%M", &tm); + if (n == 0) { + return "TIMESTAMP FAILURE"; + } + int num_chars = snprintf(&str[n], str.size() - n, ":%02d.%09ld", tm.tm_sec, ts.tv_nsec); + if (num_chars > str.size() - n) { + return "TIMESTAMP FAILURE"; + } + n += static_cast<size_t>(num_chars); + if (strftime(&str[n], str.size() - n, "%z", &tm) == 0) { + return "TIMESTAMP FAILURE"; + } + return str; +} + +static std::string GetCmdline(pid_t pid) { + std::string cmdline; + if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid), + &cmdline)) { + return "UNKNOWN"; + } + // There are '\0' terminators between arguments, convert them to spaces. + // But start by skipping all trailing '\0' values. + size_t cur = cmdline.size() - 1; + while (cur != 0 && cmdline[cur] == '\0') { + cur--; + } + if (cur == 0) { + return "UNKNOWN"; + } + while ((cur = cmdline.rfind('\0', cur)) != std::string::npos) { + cmdline[cur] = ' '; + } + return cmdline; +} + +static void DumpPidHeader(int fd, pid_t pid, const timespec& ts) { + // For consistency, the header to this message matches the one + // dumped by debuggerd. + dprintf(fd, "\n----- pid %d at %s -----\n", pid, GetTimestamp(ts).c_str()); + dprintf(fd, "Cmd line: %s\n", GetCmdline(pid).c_str()); +} + +static void DumpPidFooter(int fd, pid_t pid) { + // For consistency, the footer to this message matches the one + // dumped by debuggerd. + dprintf(fd, "----- end %d -----\n", pid); +} + +static bool DumpBacktrace(int fd, pid_t pid, bool is_java_process) { + int ret = dump_backtrace_to_file_timeout( + pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd); + if (ret == -1 && is_java_process) { + // Tried to unwind as a java process, try a native unwind. + dprintf(fd, "Java unwind failed for pid %d, trying a native unwind.\n", pid); + ret = dump_backtrace_to_file_timeout(pid, kDebuggerdNativeBacktrace, 3, fd); + } + return ret != -1; +} + Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) { const std::string temp_file_pattern = ds.bugreport_internal_dir_ + "/dumptrace_XXXXXX"; const size_t buf_size = temp_file_pattern.length() + 1; @@ -2234,16 +2302,6 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) { continue; } - // Skip cached processes. - if (IsCached(pid)) { - // For consistency, the header and footer to this message match those - // dumped by debuggerd in the success case. - dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid); - dprintf(fd, "Dump skipped for cached process.\n"); - dprintf(fd, "---- end %d ----", pid); - continue; - } - const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid); std::string exe; if (!android::base::Readlink(link_name, &exe)) { @@ -2272,16 +2330,31 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) { break; } + timespec start_timespec; + clock_gettime(CLOCK_REALTIME, &start_timespec); + if (IsCached(pid)) { + DumpPidHeader(fd, pid, start_timespec); + dprintf(fd, "Process is cached, skipping backtrace due to high chance of timeout.\n"); + DumpPidFooter(fd, pid); + continue; + } + const uint64_t start = Nanotime(); - const int ret = dump_backtrace_to_file_timeout( - pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd); + if (!DumpBacktrace(fd, pid, is_java_process)) { + if (IsCached(pid)) { + DumpPidHeader(fd, pid, start_timespec); + dprintf(fd, "Backtrace failed, but process has become cached.\n"); + DumpPidFooter(fd, pid); + continue; + } - if (ret == -1) { - // For consistency, the header and footer to this message match those - // dumped by debuggerd in the success case. - dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid); - dprintf(fd, "Dump failed, likely due to a timeout.\n"); - dprintf(fd, "---- end %d ----", pid); + DumpPidHeader(fd, pid, start_timespec); + dprintf(fd, "Backtrace gathering failed, likely due to a timeout.\n"); + DumpPidFooter(fd, pid); + + dprintf(fd, "\n[dump %s stack %d: %.3fs elapsed]\n", + is_java_process ? "dalvik" : "native", pid, + (float)(Nanotime() - start) / NANOS_PER_SEC); timeout_failures++; continue; } |