summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Christopher Ferris <cferris@google.com> 2024-05-31 13:13:34 -0700
committer Christopher Ferris <cferris@google.com> 2024-06-04 16:55:51 -0700
commit83e0e84dc15dafdd42d5336e3484a9f8ca95ee01 (patch)
treef5bca855f618a82438c66f06bcfa4c2d74179f5f
parent77cd3a2bf400c13b71d4ed31265a44bfd4b4c93b (diff)
Make stack dumping errors display better.
Currently, the code first checks if a process is cached and then checks if the process should be backtraced. For example, the code should not backtrace the zygote process. This CL moves the cached process check until after the check to see if the process should be backtraced. Before this change, the zygote was always in the list of processes, and is always cached so it shows up as this anonymous process that can't be backtraced. After this change, since the zygote is not supposed to be backtraced, it is not present. For any pids that would have been dumped but are bypassed or have an error, make the header include the timestamp and the command line. Improve the message about why a cached process is skipped. For an unwind that fails: - If it's a java process, try a native unwind. - Display the time it took for the failed unwind. - Check if the process is now cached and display that error message. Bug: 339473422 Test: Trigger bugreport and verify everything continues to work. Test: Fake an unwind failure and verify the output. Test: Fake an unwind failure and fake a cached process and verify the output. Test: Fake an unwind failure for a java thread, and do a native unwind. Test: Ran the bugreports from above through the bugreport parser. Change-Id: I3082f2ac619e179621ad3ff39f2e839d000b2576
-rw-r--r--cmds/dumpstate/dumpstate.cpp109
1 files changed, 91 insertions, 18 deletions
diff --git a/cmds/dumpstate/dumpstate.cpp b/cmds/dumpstate/dumpstate.cpp
index 9b453826c4..5b817c1877 100644
--- a/cmds/dumpstate/dumpstate.cpp
+++ b/cmds/dumpstate/dumpstate.cpp
@@ -2186,6 +2186,74 @@ static void DumpstateOnboardingOnly() {
ds.AddDir(LOGPERSIST_DATA_DIR, false);
}
+static std::string GetTimestamp(const timespec& ts) {
+ tm tm;
+ localtime_r(&ts.tv_sec, &tm);
+
+ // Reserve enough space for the entire time string, includes the space
+ // for the '\0' to make the calculations below easier by using size for
+ // the total string size.
+ std::string str(sizeof("1970-01-01 00:00:00.123456789+0830"), '\0');
+ size_t n = strftime(str.data(), str.size(), "%F %H:%M", &tm);
+ if (n == 0) {
+ return "TIMESTAMP FAILURE";
+ }
+ int num_chars = snprintf(&str[n], str.size() - n, ":%02d.%09ld", tm.tm_sec, ts.tv_nsec);
+ if (num_chars > str.size() - n) {
+ return "TIMESTAMP FAILURE";
+ }
+ n += static_cast<size_t>(num_chars);
+ if (strftime(&str[n], str.size() - n, "%z", &tm) == 0) {
+ return "TIMESTAMP FAILURE";
+ }
+ return str;
+}
+
+static std::string GetCmdline(pid_t pid) {
+ std::string cmdline;
+ if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid),
+ &cmdline)) {
+ return "UNKNOWN";
+ }
+ // There are '\0' terminators between arguments, convert them to spaces.
+ // But start by skipping all trailing '\0' values.
+ size_t cur = cmdline.size() - 1;
+ while (cur != 0 && cmdline[cur] == '\0') {
+ cur--;
+ }
+ if (cur == 0) {
+ return "UNKNOWN";
+ }
+ while ((cur = cmdline.rfind('\0', cur)) != std::string::npos) {
+ cmdline[cur] = ' ';
+ }
+ return cmdline;
+}
+
+static void DumpPidHeader(int fd, pid_t pid, const timespec& ts) {
+ // For consistency, the header to this message matches the one
+ // dumped by debuggerd.
+ dprintf(fd, "\n----- pid %d at %s -----\n", pid, GetTimestamp(ts).c_str());
+ dprintf(fd, "Cmd line: %s\n", GetCmdline(pid).c_str());
+}
+
+static void DumpPidFooter(int fd, pid_t pid) {
+ // For consistency, the footer to this message matches the one
+ // dumped by debuggerd.
+ dprintf(fd, "----- end %d -----\n", pid);
+}
+
+static bool DumpBacktrace(int fd, pid_t pid, bool is_java_process) {
+ int ret = dump_backtrace_to_file_timeout(
+ pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
+ if (ret == -1 && is_java_process) {
+ // Tried to unwind as a java process, try a native unwind.
+ dprintf(fd, "Java unwind failed for pid %d, trying a native unwind.\n", pid);
+ ret = dump_backtrace_to_file_timeout(pid, kDebuggerdNativeBacktrace, 3, fd);
+ }
+ return ret != -1;
+}
+
Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
const std::string temp_file_pattern = ds.bugreport_internal_dir_ + "/dumptrace_XXXXXX";
const size_t buf_size = temp_file_pattern.length() + 1;
@@ -2234,16 +2302,6 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
continue;
}
- // Skip cached processes.
- if (IsCached(pid)) {
- // For consistency, the header and footer to this message match those
- // dumped by debuggerd in the success case.
- dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
- dprintf(fd, "Dump skipped for cached process.\n");
- dprintf(fd, "---- end %d ----", pid);
- continue;
- }
-
const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
std::string exe;
if (!android::base::Readlink(link_name, &exe)) {
@@ -2272,16 +2330,31 @@ Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
break;
}
+ timespec start_timespec;
+ clock_gettime(CLOCK_REALTIME, &start_timespec);
+ if (IsCached(pid)) {
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Process is cached, skipping backtrace due to high chance of timeout.\n");
+ DumpPidFooter(fd, pid);
+ continue;
+ }
+
const uint64_t start = Nanotime();
- const int ret = dump_backtrace_to_file_timeout(
- pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
+ if (!DumpBacktrace(fd, pid, is_java_process)) {
+ if (IsCached(pid)) {
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Backtrace failed, but process has become cached.\n");
+ DumpPidFooter(fd, pid);
+ continue;
+ }
- if (ret == -1) {
- // For consistency, the header and footer to this message match those
- // dumped by debuggerd in the success case.
- dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
- dprintf(fd, "Dump failed, likely due to a timeout.\n");
- dprintf(fd, "---- end %d ----", pid);
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Backtrace gathering failed, likely due to a timeout.\n");
+ DumpPidFooter(fd, pid);
+
+ dprintf(fd, "\n[dump %s stack %d: %.3fs elapsed]\n",
+ is_java_process ? "dalvik" : "native", pid,
+ (float)(Nanotime() - start) / NANOS_PER_SEC);
timeout_failures++;
continue;
}