summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Andreas Gampe <agampe@google.com> 2018-11-02 15:20:53 -0700
committer Andreas Gampe <agampe@google.com> 2018-11-05 08:28:21 -0800
commita5bd61e08d759adbb68781d96a763d4a15499c7b (patch)
tree4a503220215d2b549ca25f50787f8e7566a17680
parenta5175541c197e7bf9b03651ea5da4e64a2ac2d27 (diff)
ART: Add dump-on-timeout helper
Add a remote-backtrace util that activates on a timeout signal. Approach mostly derived from old debuggerd code, and tested by manually lowering the default timeout. Test: mmma art Test: m test-art-host Change-Id: I2c8c3426e5b1ed646d93cc8babeacaf5283a360d
-rw-r--r--test/Android.run-test.mk1
-rwxr-xr-xtest/etc/run-test-jar2
-rw-r--r--tools/timeout_dumper/Android.bp39
-rw-r--r--tools/timeout_dumper/timeout_dumper.cc590
4 files changed, 631 insertions, 1 deletions
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 64c1d4f1b8..148aea48ae 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -59,6 +59,7 @@ ART_TEST_TARGET_RUN_TEST_DEPENDENCIES += $(TARGET_OUT_JAVA_LIBRARIES)/conscrypt-
ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
$(ART_HOST_EXECUTABLES) \
$(HOST_OUT_EXECUTABLES)/hprof-conv \
+ $(HOST_OUT_EXECUTABLES)/timeout_dumper \
$(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagent) \
$(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagentd) \
$(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtistress) \
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index 900b1d7759..adb838d965 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -942,7 +942,7 @@ else
# Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump
# before abort. However, dumping threads might deadlock, so we also use the "-k"
# option to definitely kill the child.
- cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline"
+ cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s timeout_dumper $cmdline"
fi
if [ "$DEV_MODE" = "y" ]; then
diff --git a/tools/timeout_dumper/Android.bp b/tools/timeout_dumper/Android.bp
new file mode 100644
index 0000000000..dfd54421fd
--- /dev/null
+++ b/tools/timeout_dumper/Android.bp
@@ -0,0 +1,39 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+art_cc_binary {
+ name: "timeout_dumper",
+
+ host_supported: true,
+ target: {
+ darwin: {
+ enabled: false,
+ },
+ },
+ device_supported: false,
+
+ defaults: ["art_defaults"],
+
+ srcs: ["timeout_dumper.cc"],
+
+ shared_libs: [
+ "libbacktrace",
+ "libbase",
+ ],
+ sanitize: {
+ address: true,
+ },
+}
diff --git a/tools/timeout_dumper/timeout_dumper.cc b/tools/timeout_dumper/timeout_dumper.cc
new file mode 100644
index 0000000000..cb3eaa594e
--- /dev/null
+++ b/tools/timeout_dumper/timeout_dumper.cc
@@ -0,0 +1,590 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dirent.h>
+#include <poll.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <csignal>
+#include <cstdlib>
+#include <cstring>
+#include <thread>
+#include <memory>
+#include <set>
+
+#include <android-base/file.h>
+#include <android-base/logging.h>
+#include <android-base/macros.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+#include <android-base/unique_fd.h>
+#include <backtrace/Backtrace.h>
+#include <backtrace/BacktraceMap.h>
+
+namespace art {
+namespace {
+
+using android::base::StringPrintf;
+using android::base::unique_fd;
+
+constexpr bool kUseAddr2line = true;
+
+namespace timeout_signal {
+
+class SignalSet {
+ public:
+ SignalSet() {
+ if (sigemptyset(&set_) == -1) {
+ PLOG(FATAL) << "sigemptyset failed";
+ }
+ }
+
+ void Add(int signal) {
+ if (sigaddset(&set_, signal) == -1) {
+ PLOG(FATAL) << "sigaddset " << signal << " failed";
+ }
+ }
+
+ void Block() {
+ if (pthread_sigmask(SIG_BLOCK, &set_, nullptr) != 0) {
+ PLOG(FATAL) << "pthread_sigmask failed";
+ }
+ }
+
+ int Wait() {
+ // Sleep in sigwait() until a signal arrives. gdb causes EINTR failures.
+ int signal_number;
+ int rc = TEMP_FAILURE_RETRY(sigwait(&set_, &signal_number));
+ if (rc != 0) {
+ PLOG(FATAL) << "sigwait failed";
+ }
+ return signal_number;
+ }
+
+ private:
+ sigset_t set_;
+};
+
+int GetTimeoutSignal() {
+ return SIGRTMIN + 2;
+}
+
+} // namespace timeout_signal
+
+namespace addr2line {
+
+constexpr const char* kAddr2linePath =
+ "/prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.15-4.8/bin/x86_64-linux-addr2line";
+
+std::unique_ptr<std::string> FindAddr2line() {
+ const char* env_value = getenv("ANDROID_BUILD_TOP");
+ if (env_value != nullptr) {
+ std::string path = std::string(env_value) + kAddr2linePath;
+ if (access(path.c_str(), X_OK) == 0) {
+ return std::make_unique<std::string>(path);
+ }
+ }
+
+ std::string path = std::string(".") + kAddr2linePath;
+ if (access(path.c_str(), X_OK) == 0) {
+ return std::make_unique<std::string>(path);
+ }
+
+ constexpr const char* kHostAddr2line = "/usr/bin/addr2line";
+ if (access(kHostAddr2line, F_OK) == 0) {
+ return std::make_unique<std::string>(kHostAddr2line);
+ }
+
+ return nullptr;
+}
+
+// The state of an open pipe to addr2line. In "server" mode, addr2line takes input on stdin
+// and prints the result to stdout. This struct keeps the state of the open connection.
+struct Addr2linePipe {
+ Addr2linePipe(int in_fd, int out_fd, const std::string& file_name, pid_t pid)
+ : in(in_fd), out(out_fd), file(file_name), child_pid(pid), odd(true) {}
+
+ ~Addr2linePipe() {
+ kill(child_pid, SIGKILL);
+ }
+
+ unique_fd in; // The file descriptor that is connected to the output of addr2line.
+ unique_fd out; // The file descriptor that is connected to the input of addr2line.
+
+ const std::string file; // The file addr2line is working on, so that we know when to close
+ // and restart.
+ const pid_t child_pid; // The pid of the child, which we should kill when we're done.
+ bool odd; // Print state for indentation of lines.
+};
+
+std::unique_ptr<Addr2linePipe> Connect(const std::string& name, const char* args[]) {
+ int caller_to_addr2line[2];
+ int addr2line_to_caller[2];
+
+ if (pipe(caller_to_addr2line) == -1) {
+ return nullptr;
+ }
+ if (pipe(addr2line_to_caller) == -1) {
+ close(caller_to_addr2line[0]);
+ close(caller_to_addr2line[1]);
+ return nullptr;
+ }
+
+ pid_t pid = fork();
+ if (pid == -1) {
+ close(caller_to_addr2line[0]);
+ close(caller_to_addr2line[1]);
+ close(addr2line_to_caller[0]);
+ close(addr2line_to_caller[1]);
+ return nullptr;
+ }
+
+ if (pid == 0) {
+ dup2(caller_to_addr2line[0], STDIN_FILENO);
+ dup2(addr2line_to_caller[1], STDOUT_FILENO);
+
+ close(caller_to_addr2line[0]);
+ close(caller_to_addr2line[1]);
+ close(addr2line_to_caller[0]);
+ close(addr2line_to_caller[1]);
+
+ execv(args[0], const_cast<char* const*>(args));
+ exit(1);
+ } else {
+ close(caller_to_addr2line[0]);
+ close(addr2line_to_caller[1]);
+ return std::make_unique<Addr2linePipe>(addr2line_to_caller[0],
+ caller_to_addr2line[1],
+ name,
+ pid);
+ }
+}
+
+void WritePrefix(std::ostream& os, const char* prefix, bool odd) {
+ if (prefix != nullptr) {
+ os << prefix;
+ }
+ os << " ";
+ if (!odd) {
+ os << " ";
+ }
+}
+
+void Drain(size_t expected,
+ const char* prefix,
+ std::unique_ptr<Addr2linePipe>* pipe /* inout */,
+ std::ostream& os) {
+ DCHECK(pipe != nullptr);
+ DCHECK(pipe->get() != nullptr);
+ int in = pipe->get()->in.get();
+ DCHECK_GE(in, 0);
+
+ bool prefix_written = false;
+
+ for (;;) {
+ constexpr uint32_t kWaitTimeExpectedMilli = 500;
+ constexpr uint32_t kWaitTimeUnexpectedMilli = 50;
+
+ int timeout = expected > 0 ? kWaitTimeExpectedMilli : kWaitTimeUnexpectedMilli;
+ struct pollfd read_fd{in, POLLIN, 0};
+ int retval = TEMP_FAILURE_RETRY(poll(&read_fd, 1, timeout));
+ if (retval == -1) {
+ // An error occurred.
+ pipe->reset();
+ return;
+ }
+
+ if (retval == 0) {
+ // Timeout.
+ return;
+ }
+
+ if (!(read_fd.revents & POLLIN)) {
+ // addr2line call exited.
+ pipe->reset();
+ return;
+ }
+
+ constexpr size_t kMaxBuffer = 128; // Relatively small buffer. Should be OK as we're on an
+ // alt stack, but just to be sure...
+ char buffer[kMaxBuffer];
+ memset(buffer, 0, kMaxBuffer);
+ int bytes_read = TEMP_FAILURE_RETRY(read(in, buffer, kMaxBuffer - 1));
+ if (bytes_read <= 0) {
+ // This should not really happen...
+ pipe->reset();
+ return;
+ }
+ buffer[bytes_read] = '\0';
+
+ char* tmp = buffer;
+ while (*tmp != 0) {
+ if (!prefix_written) {
+ WritePrefix(os, prefix, (*pipe)->odd);
+ prefix_written = true;
+ }
+ char* new_line = strchr(tmp, '\n');
+ if (new_line == nullptr) {
+ os << tmp;
+
+ break;
+ } else {
+ char saved = *(new_line + 1);
+ *(new_line + 1) = 0;
+ os << tmp;
+ *(new_line + 1) = saved;
+
+ tmp = new_line + 1;
+ prefix_written = false;
+ (*pipe)->odd = !(*pipe)->odd;
+
+ if (expected > 0) {
+ expected--;
+ }
+ }
+ }
+ }
+}
+
+void Addr2line(const std::string& addr2line,
+ const std::string& map_src,
+ uintptr_t offset,
+ std::ostream& os,
+ const char* prefix,
+ std::unique_ptr<Addr2linePipe>* pipe /* inout */) {
+ DCHECK(pipe != nullptr);
+
+ if (map_src == "[vdso]" || android::base::EndsWith(map_src, ".vdex")) {
+ // addr2line will not work on the vdso.
+ // vdex files are special frames injected for the interpreter
+ // so they don't have any line number information available.
+ return;
+ }
+
+ if (*pipe == nullptr || (*pipe)->file != map_src) {
+ if (*pipe != nullptr) {
+ Drain(0, prefix, pipe, os);
+ }
+ pipe->reset(); // Close early.
+
+ const char* args[7] = {
+ addr2line.c_str(),
+ "--functions",
+ "--inlines",
+ "--demangle",
+ "-e",
+ map_src.c_str(),
+ nullptr
+ };
+ *pipe = Connect(map_src, args);
+ }
+
+ Addr2linePipe* pipe_ptr = pipe->get();
+ if (pipe_ptr == nullptr) {
+ // Failed...
+ return;
+ }
+
+ // Send the offset.
+ const std::string hex_offset = StringPrintf("%zx\n", offset);
+
+ if (!android::base::WriteFully(pipe_ptr->out.get(), hex_offset.data(), hex_offset.length())) {
+ // Error. :-(
+ pipe->reset();
+ return;
+ }
+
+ // Now drain (expecting two lines).
+ Drain(2U, prefix, pipe, os);
+}
+
+} // namespace addr2line
+
+namespace ptrace {
+
+std::set<pid_t> PtraceSiblings(pid_t pid) {
+ std::set<pid_t> ret;
+ std::string task_path = android::base::StringPrintf("/proc/%d/task", pid);
+
+ std::unique_ptr<DIR, int (*)(DIR*)> d(opendir(task_path.c_str()), closedir);
+
+ // Bail early if the task directory cannot be opened.
+ if (d == nullptr) {
+ PLOG(ERROR) << "Failed to scan task folder";
+ return ret;
+ }
+
+ struct dirent* de;
+ while ((de = readdir(d.get())) != nullptr) {
+ // Ignore "." and "..".
+ if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) {
+ continue;
+ }
+
+ char* end;
+ pid_t tid = strtoul(de->d_name, &end, 10);
+ if (*end) {
+ continue;
+ }
+
+ if (tid == pid) {
+ continue;
+ }
+
+ if (::ptrace(PTRACE_ATTACH, tid, 0, 0) != 0) {
+ PLOG(ERROR) << "Failed to attach to tid " << tid;
+ continue;
+ }
+
+ ret.insert(tid);
+ }
+ return ret;
+}
+
+} // namespace ptrace
+
+bool WaitForSigStoppedOrError(pid_t pid) {
+ int status;
+ pid_t res = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0));
+ if (res == -1) {
+ PLOG(ERROR) << "Failed to waitpid for " << pid;
+ return false;
+ }
+ if (!(WIFSTOPPED(status))) {
+ LOG(ERROR) << "Did not get expected stopped signal for " << pid;
+ return false;
+ }
+ return true;
+}
+
+#ifdef __LP64__
+constexpr bool kIs64Bit = true;
+#else
+constexpr bool kIs64Bit = false;
+#endif
+
+void DumpThread(pid_t pid,
+ pid_t tid,
+ const std::string* addr2line_path,
+ const char* prefix,
+ BacktraceMap* map) {
+ if (pid != tid && !WaitForSigStoppedOrError(tid)) {
+ return;
+ }
+
+ std::unique_ptr<Backtrace> backtrace(Backtrace::Create(pid, tid, map));
+ if (backtrace == nullptr) {
+ LOG(ERROR) << prefix << "(failed to create Backtrace for thread " << tid << ")";
+ return;
+ }
+ backtrace->SetSkipFrames(0);
+ if (!backtrace->Unwind(0, nullptr)) {
+ LOG(ERROR) << prefix << "(backtrace::Unwind failed for thread " << tid
+ << ": " << backtrace->GetErrorString(backtrace->GetError()) << ")";
+ return;
+ }
+ if (backtrace->NumFrames() == 0) {
+ LOG(ERROR) << prefix << "(no native stack frames for thread " << tid << ")";
+ return;
+ }
+
+ std::unique_ptr<addr2line::Addr2linePipe> addr2line_state;
+
+ for (Backtrace::const_iterator it = backtrace->begin();
+ it != backtrace->end(); ++it) {
+ std::ostringstream oss;
+ oss << prefix << StringPrintf("#%02zu pc ", it->num);
+ bool try_addr2line = false;
+ if (!BacktraceMap::IsValid(it->map)) {
+ oss << StringPrintf(kIs64Bit ? "%016" PRIx64 " ???" : "%08" PRIx64 " ???", it->pc);
+ } else {
+ oss << StringPrintf(kIs64Bit ? "%016" PRIx64 " " : "%08" PRIx64 " ", it->rel_pc);
+ if (it->map.name.empty()) {
+ oss << StringPrintf("<anonymous:%" PRIx64 ">", it->map.start);
+ } else {
+ oss << it->map.name;
+ }
+ if (it->map.offset != 0) {
+ oss << StringPrintf(" (offset %" PRIx64 ")", it->map.offset);
+ }
+ oss << " (";
+ if (!it->func_name.empty()) {
+ oss << it->func_name;
+ if (it->func_offset != 0) {
+ oss << "+" << it->func_offset;
+ }
+ // Functions found using the gdb jit interface will be in an empty
+ // map that cannot be found using addr2line.
+ if (!it->map.name.empty()) {
+ try_addr2line = true;
+ }
+ } else {
+ oss << "???";
+ }
+ oss << ")";
+ }
+ LOG(ERROR) << oss.str();
+ if (try_addr2line && addr2line_path != nullptr) {
+ addr2line::Addr2line(*addr2line_path,
+ it->map.name,
+ it->pc - it->map.start,
+ LOG_STREAM(ERROR),
+ prefix,
+ &addr2line_state);
+ }
+ }
+
+ if (addr2line_state != nullptr) {
+ addr2line::Drain(0, prefix, &addr2line_state, LOG_STREAM(ERROR));
+ }
+}
+
+bool WaitForMainSigStop(const std::atomic<bool>& saw_wif_stopped_for_main) {
+ constexpr uint32_t kWaitMicros = 10;
+ constexpr size_t kMaxLoopCount = 10 * 1000 * 1000 / kWaitMicros; // 10s wait.
+
+ for (size_t loop_count = 0; !saw_wif_stopped_for_main; ++loop_count) {
+ if (loop_count > kMaxLoopCount) {
+ LOG(ERROR) << "Waited too long for main pid to stop";
+ return false;
+ }
+
+ timespec tm;
+ tm.tv_sec = 0;
+ tm.tv_nsec = kWaitMicros * 1000;
+ nanosleep(&tm, nullptr);
+ }
+ return true;
+}
+
+void DumpProcess(pid_t forked_pid, const std::atomic<bool>& saw_wif_stopped_for_main) {
+ CHECK_EQ(0, ::ptrace(PTRACE_ATTACH, forked_pid, 0, 0));
+ std::set<pid_t> tids = ptrace::PtraceSiblings(forked_pid);
+ tids.insert(forked_pid);
+
+ // Check whether we have and should use addr2line.
+ std::unique_ptr<std::string> addr2line_path = addr2line::FindAddr2line();
+ if (addr2line_path != nullptr) {
+ LOG(ERROR) << "Found addr2line at " << *addr2line_path;
+ } else {
+ LOG(ERROR) << "Did not find usable addr2line";
+ }
+ bool use_addr2line = kUseAddr2line && addr2line_path != nullptr;
+ LOG(ERROR) << (use_addr2line ? "U" : "Not u") << "sing addr2line";
+
+ if (!WaitForMainSigStop(saw_wif_stopped_for_main)) {
+ return;
+ }
+
+ std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(forked_pid));
+ if (backtrace_map == nullptr) {
+ LOG(ERROR) << "Could not create BacktraceMap";
+ return;
+ }
+
+ for (pid_t tid : tids) {
+ LOG(ERROR) << "pid: " << forked_pid << " tid: " << tid;
+ DumpThread(forked_pid,
+ tid,
+ use_addr2line ? addr2line_path.get() : nullptr,
+ " ",
+ backtrace_map.get());
+ }
+}
+
+[[noreturn]]
+void WaitMainLoop(pid_t forked_pid, std::atomic<bool>* saw_wif_stopped_for_main) {
+ for (;;) {
+ // Consider switching to waitid to not get woken up for WIFSTOPPED.
+ int status;
+ pid_t res = TEMP_FAILURE_RETRY(waitpid(forked_pid, &status, 0));
+ if (res == -1) {
+ PLOG(FATAL) << "Failure during waitpid";
+ __builtin_unreachable();
+ }
+
+ if (WIFEXITED(status)) {
+ _exit(WEXITSTATUS(status));
+ __builtin_unreachable();
+ }
+ if (WIFSIGNALED(status)) {
+ _exit(1);
+ __builtin_unreachable();
+ }
+ if (WIFSTOPPED(status)) {
+ *saw_wif_stopped_for_main = true;
+ continue;
+ }
+ if (WIFCONTINUED(status)) {
+ continue;
+ }
+
+ LOG(FATAL) << "Unknown status " << std::hex << status;
+ }
+}
+
+[[noreturn]]
+void SetupAndWait(pid_t forked_pid) {
+ timeout_signal::SignalSet signals;
+ signals.Add(timeout_signal::GetTimeoutSignal());
+ signals.Block();
+
+ std::atomic<bool> saw_wif_stopped_for_main(false);
+
+ std::thread signal_catcher([&]() {
+ signals.Block();
+ int sig = signals.Wait();
+ CHECK_EQ(sig, timeout_signal::GetTimeoutSignal());
+
+ DumpProcess(forked_pid, saw_wif_stopped_for_main);
+
+ // Don't clean up. Just kill the child and exit.
+ kill(forked_pid, SIGKILL);
+ _exit(1);
+ });
+
+ WaitMainLoop(forked_pid, &saw_wif_stopped_for_main);
+}
+
+} // namespace
+} // namespace art
+
+int main(int argc ATTRIBUTE_UNUSED, char** argv) {
+ pid_t orig_ppid = getpid();
+
+ pid_t pid = fork();
+ if (pid == 0) {
+ if (prctl(PR_SET_PDEATHSIG, SIGTERM) == -1) {
+ _exit(1);
+ }
+
+ if (getppid() != orig_ppid) {
+ _exit(2);
+ }
+
+ execvp(argv[1], &argv[1]);
+
+ _exit(3);
+ __builtin_unreachable();
+ }
+
+ art::SetupAndWait(pid);
+ __builtin_unreachable();
+}