diff options
| author | 2018-11-02 15:20:53 -0700 | |
|---|---|---|
| committer | 2018-11-05 08:28:21 -0800 | |
| commit | a5bd61e08d759adbb68781d96a763d4a15499c7b (patch) | |
| tree | 4a503220215d2b549ca25f50787f8e7566a17680 | |
| parent | a5175541c197e7bf9b03651ea5da4e64a2ac2d27 (diff) | |
ART: Add dump-on-timeout helper
Add a remote-backtrace util that activates on a timeout signal.
Approach mostly derived from old debuggerd code, and tested by
manually lowering the default timeout.
Test: mmma art
Test: m test-art-host
Change-Id: I2c8c3426e5b1ed646d93cc8babeacaf5283a360d
| -rw-r--r-- | test/Android.run-test.mk | 1 | ||||
| -rwxr-xr-x | test/etc/run-test-jar | 2 | ||||
| -rw-r--r-- | tools/timeout_dumper/Android.bp | 39 | ||||
| -rw-r--r-- | tools/timeout_dumper/timeout_dumper.cc | 590 |
4 files changed, 631 insertions, 1 deletions
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 64c1d4f1b8..148aea48ae 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -59,6 +59,7 @@ ART_TEST_TARGET_RUN_TEST_DEPENDENCIES += $(TARGET_OUT_JAVA_LIBRARIES)/conscrypt- ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \ $(ART_HOST_EXECUTABLES) \ $(HOST_OUT_EXECUTABLES)/hprof-conv \ + $(HOST_OUT_EXECUTABLES)/timeout_dumper \ $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagent) \ $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagentd) \ $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtistress) \ diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index 900b1d7759..adb838d965 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -942,7 +942,7 @@ else # Note: We first send SIGRTMIN+2 (usually 36) to ART, which will induce a full thread dump # before abort. However, dumping threads might deadlock, so we also use the "-k" # option to definitely kill the child. - cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s $cmdline" + cmdline="timeout -k 120s -s SIGRTMIN+2 ${TIME_OUT_VALUE}s timeout_dumper $cmdline" fi if [ "$DEV_MODE" = "y" ]; then diff --git a/tools/timeout_dumper/Android.bp b/tools/timeout_dumper/Android.bp new file mode 100644 index 0000000000..dfd54421fd --- /dev/null +++ b/tools/timeout_dumper/Android.bp @@ -0,0 +1,39 @@ +// +// Copyright (C) 2018 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +art_cc_binary { + name: "timeout_dumper", + + host_supported: true, + target: { + darwin: { + enabled: false, + }, + }, + device_supported: false, + + defaults: ["art_defaults"], + + srcs: ["timeout_dumper.cc"], + + shared_libs: [ + "libbacktrace", + "libbase", + ], + sanitize: { + address: true, + }, +} diff --git a/tools/timeout_dumper/timeout_dumper.cc b/tools/timeout_dumper/timeout_dumper.cc new file mode 100644 index 0000000000..cb3eaa594e --- /dev/null +++ b/tools/timeout_dumper/timeout_dumper.cc @@ -0,0 +1,590 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <poll.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include <csignal> +#include <cstdlib> +#include <cstring> +#include <thread> +#include <memory> +#include <set> + +#include <android-base/file.h> +#include <android-base/logging.h> +#include <android-base/macros.h> +#include <android-base/stringprintf.h> +#include <android-base/strings.h> +#include <android-base/unique_fd.h> +#include <backtrace/Backtrace.h> +#include <backtrace/BacktraceMap.h> + +namespace art { +namespace { + +using android::base::StringPrintf; +using android::base::unique_fd; + +constexpr bool kUseAddr2line = true; + +namespace timeout_signal { + +class SignalSet { + public: + SignalSet() { + if (sigemptyset(&set_) == -1) { + PLOG(FATAL) << "sigemptyset failed"; + } + } + + void Add(int signal) { + if (sigaddset(&set_, signal) == -1) { + PLOG(FATAL) << "sigaddset " << signal << " failed"; + } + } + + void Block() { + if (pthread_sigmask(SIG_BLOCK, &set_, nullptr) != 0) { + PLOG(FATAL) << "pthread_sigmask failed"; + } + } + + int Wait() { + // Sleep in sigwait() until a signal arrives. gdb causes EINTR failures. + int signal_number; + int rc = TEMP_FAILURE_RETRY(sigwait(&set_, &signal_number)); + if (rc != 0) { + PLOG(FATAL) << "sigwait failed"; + } + return signal_number; + } + + private: + sigset_t set_; +}; + +int GetTimeoutSignal() { + return SIGRTMIN + 2; +} + +} // namespace timeout_signal + +namespace addr2line { + +constexpr const char* kAddr2linePath = + "/prebuilts/gcc/linux-x86/host/x86_64-linux-glibc2.15-4.8/bin/x86_64-linux-addr2line"; + +std::unique_ptr<std::string> FindAddr2line() { + const char* env_value = getenv("ANDROID_BUILD_TOP"); + if (env_value != nullptr) { + std::string path = std::string(env_value) + kAddr2linePath; + if (access(path.c_str(), X_OK) == 0) { + return std::make_unique<std::string>(path); + } + } + + std::string path = std::string(".") + kAddr2linePath; + if (access(path.c_str(), X_OK) == 0) { + return std::make_unique<std::string>(path); + } + + constexpr const char* kHostAddr2line = "/usr/bin/addr2line"; + if (access(kHostAddr2line, F_OK) == 0) { + return std::make_unique<std::string>(kHostAddr2line); + } + + return nullptr; +} + +// The state of an open pipe to addr2line. In "server" mode, addr2line takes input on stdin +// and prints the result to stdout. This struct keeps the state of the open connection. +struct Addr2linePipe { + Addr2linePipe(int in_fd, int out_fd, const std::string& file_name, pid_t pid) + : in(in_fd), out(out_fd), file(file_name), child_pid(pid), odd(true) {} + + ~Addr2linePipe() { + kill(child_pid, SIGKILL); + } + + unique_fd in; // The file descriptor that is connected to the output of addr2line. + unique_fd out; // The file descriptor that is connected to the input of addr2line. + + const std::string file; // The file addr2line is working on, so that we know when to close + // and restart. + const pid_t child_pid; // The pid of the child, which we should kill when we're done. + bool odd; // Print state for indentation of lines. +}; + +std::unique_ptr<Addr2linePipe> Connect(const std::string& name, const char* args[]) { + int caller_to_addr2line[2]; + int addr2line_to_caller[2]; + + if (pipe(caller_to_addr2line) == -1) { + return nullptr; + } + if (pipe(addr2line_to_caller) == -1) { + close(caller_to_addr2line[0]); + close(caller_to_addr2line[1]); + return nullptr; + } + + pid_t pid = fork(); + if (pid == -1) { + close(caller_to_addr2line[0]); + close(caller_to_addr2line[1]); + close(addr2line_to_caller[0]); + close(addr2line_to_caller[1]); + return nullptr; + } + + if (pid == 0) { + dup2(caller_to_addr2line[0], STDIN_FILENO); + dup2(addr2line_to_caller[1], STDOUT_FILENO); + + close(caller_to_addr2line[0]); + close(caller_to_addr2line[1]); + close(addr2line_to_caller[0]); + close(addr2line_to_caller[1]); + + execv(args[0], const_cast<char* const*>(args)); + exit(1); + } else { + close(caller_to_addr2line[0]); + close(addr2line_to_caller[1]); + return std::make_unique<Addr2linePipe>(addr2line_to_caller[0], + caller_to_addr2line[1], + name, + pid); + } +} + +void WritePrefix(std::ostream& os, const char* prefix, bool odd) { + if (prefix != nullptr) { + os << prefix; + } + os << " "; + if (!odd) { + os << " "; + } +} + +void Drain(size_t expected, + const char* prefix, + std::unique_ptr<Addr2linePipe>* pipe /* inout */, + std::ostream& os) { + DCHECK(pipe != nullptr); + DCHECK(pipe->get() != nullptr); + int in = pipe->get()->in.get(); + DCHECK_GE(in, 0); + + bool prefix_written = false; + + for (;;) { + constexpr uint32_t kWaitTimeExpectedMilli = 500; + constexpr uint32_t kWaitTimeUnexpectedMilli = 50; + + int timeout = expected > 0 ? kWaitTimeExpectedMilli : kWaitTimeUnexpectedMilli; + struct pollfd read_fd{in, POLLIN, 0}; + int retval = TEMP_FAILURE_RETRY(poll(&read_fd, 1, timeout)); + if (retval == -1) { + // An error occurred. + pipe->reset(); + return; + } + + if (retval == 0) { + // Timeout. + return; + } + + if (!(read_fd.revents & POLLIN)) { + // addr2line call exited. + pipe->reset(); + return; + } + + constexpr size_t kMaxBuffer = 128; // Relatively small buffer. Should be OK as we're on an + // alt stack, but just to be sure... + char buffer[kMaxBuffer]; + memset(buffer, 0, kMaxBuffer); + int bytes_read = TEMP_FAILURE_RETRY(read(in, buffer, kMaxBuffer - 1)); + if (bytes_read <= 0) { + // This should not really happen... + pipe->reset(); + return; + } + buffer[bytes_read] = '\0'; + + char* tmp = buffer; + while (*tmp != 0) { + if (!prefix_written) { + WritePrefix(os, prefix, (*pipe)->odd); + prefix_written = true; + } + char* new_line = strchr(tmp, '\n'); + if (new_line == nullptr) { + os << tmp; + + break; + } else { + char saved = *(new_line + 1); + *(new_line + 1) = 0; + os << tmp; + *(new_line + 1) = saved; + + tmp = new_line + 1; + prefix_written = false; + (*pipe)->odd = !(*pipe)->odd; + + if (expected > 0) { + expected--; + } + } + } + } +} + +void Addr2line(const std::string& addr2line, + const std::string& map_src, + uintptr_t offset, + std::ostream& os, + const char* prefix, + std::unique_ptr<Addr2linePipe>* pipe /* inout */) { + DCHECK(pipe != nullptr); + + if (map_src == "[vdso]" || android::base::EndsWith(map_src, ".vdex")) { + // addr2line will not work on the vdso. + // vdex files are special frames injected for the interpreter + // so they don't have any line number information available. + return; + } + + if (*pipe == nullptr || (*pipe)->file != map_src) { + if (*pipe != nullptr) { + Drain(0, prefix, pipe, os); + } + pipe->reset(); // Close early. + + const char* args[7] = { + addr2line.c_str(), + "--functions", + "--inlines", + "--demangle", + "-e", + map_src.c_str(), + nullptr + }; + *pipe = Connect(map_src, args); + } + + Addr2linePipe* pipe_ptr = pipe->get(); + if (pipe_ptr == nullptr) { + // Failed... + return; + } + + // Send the offset. + const std::string hex_offset = StringPrintf("%zx\n", offset); + + if (!android::base::WriteFully(pipe_ptr->out.get(), hex_offset.data(), hex_offset.length())) { + // Error. :-( + pipe->reset(); + return; + } + + // Now drain (expecting two lines). + Drain(2U, prefix, pipe, os); +} + +} // namespace addr2line + +namespace ptrace { + +std::set<pid_t> PtraceSiblings(pid_t pid) { + std::set<pid_t> ret; + std::string task_path = android::base::StringPrintf("/proc/%d/task", pid); + + std::unique_ptr<DIR, int (*)(DIR*)> d(opendir(task_path.c_str()), closedir); + + // Bail early if the task directory cannot be opened. + if (d == nullptr) { + PLOG(ERROR) << "Failed to scan task folder"; + return ret; + } + + struct dirent* de; + while ((de = readdir(d.get())) != nullptr) { + // Ignore "." and "..". + if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) { + continue; + } + + char* end; + pid_t tid = strtoul(de->d_name, &end, 10); + if (*end) { + continue; + } + + if (tid == pid) { + continue; + } + + if (::ptrace(PTRACE_ATTACH, tid, 0, 0) != 0) { + PLOG(ERROR) << "Failed to attach to tid " << tid; + continue; + } + + ret.insert(tid); + } + return ret; +} + +} // namespace ptrace + +bool WaitForSigStoppedOrError(pid_t pid) { + int status; + pid_t res = TEMP_FAILURE_RETRY(waitpid(pid, &status, 0)); + if (res == -1) { + PLOG(ERROR) << "Failed to waitpid for " << pid; + return false; + } + if (!(WIFSTOPPED(status))) { + LOG(ERROR) << "Did not get expected stopped signal for " << pid; + return false; + } + return true; +} + +#ifdef __LP64__ +constexpr bool kIs64Bit = true; +#else +constexpr bool kIs64Bit = false; +#endif + +void DumpThread(pid_t pid, + pid_t tid, + const std::string* addr2line_path, + const char* prefix, + BacktraceMap* map) { + if (pid != tid && !WaitForSigStoppedOrError(tid)) { + return; + } + + std::unique_ptr<Backtrace> backtrace(Backtrace::Create(pid, tid, map)); + if (backtrace == nullptr) { + LOG(ERROR) << prefix << "(failed to create Backtrace for thread " << tid << ")"; + return; + } + backtrace->SetSkipFrames(0); + if (!backtrace->Unwind(0, nullptr)) { + LOG(ERROR) << prefix << "(backtrace::Unwind failed for thread " << tid + << ": " << backtrace->GetErrorString(backtrace->GetError()) << ")"; + return; + } + if (backtrace->NumFrames() == 0) { + LOG(ERROR) << prefix << "(no native stack frames for thread " << tid << ")"; + return; + } + + std::unique_ptr<addr2line::Addr2linePipe> addr2line_state; + + for (Backtrace::const_iterator it = backtrace->begin(); + it != backtrace->end(); ++it) { + std::ostringstream oss; + oss << prefix << StringPrintf("#%02zu pc ", it->num); + bool try_addr2line = false; + if (!BacktraceMap::IsValid(it->map)) { + oss << StringPrintf(kIs64Bit ? "%016" PRIx64 " ???" : "%08" PRIx64 " ???", it->pc); + } else { + oss << StringPrintf(kIs64Bit ? "%016" PRIx64 " " : "%08" PRIx64 " ", it->rel_pc); + if (it->map.name.empty()) { + oss << StringPrintf("<anonymous:%" PRIx64 ">", it->map.start); + } else { + oss << it->map.name; + } + if (it->map.offset != 0) { + oss << StringPrintf(" (offset %" PRIx64 ")", it->map.offset); + } + oss << " ("; + if (!it->func_name.empty()) { + oss << it->func_name; + if (it->func_offset != 0) { + oss << "+" << it->func_offset; + } + // Functions found using the gdb jit interface will be in an empty + // map that cannot be found using addr2line. + if (!it->map.name.empty()) { + try_addr2line = true; + } + } else { + oss << "???"; + } + oss << ")"; + } + LOG(ERROR) << oss.str(); + if (try_addr2line && addr2line_path != nullptr) { + addr2line::Addr2line(*addr2line_path, + it->map.name, + it->pc - it->map.start, + LOG_STREAM(ERROR), + prefix, + &addr2line_state); + } + } + + if (addr2line_state != nullptr) { + addr2line::Drain(0, prefix, &addr2line_state, LOG_STREAM(ERROR)); + } +} + +bool WaitForMainSigStop(const std::atomic<bool>& saw_wif_stopped_for_main) { + constexpr uint32_t kWaitMicros = 10; + constexpr size_t kMaxLoopCount = 10 * 1000 * 1000 / kWaitMicros; // 10s wait. + + for (size_t loop_count = 0; !saw_wif_stopped_for_main; ++loop_count) { + if (loop_count > kMaxLoopCount) { + LOG(ERROR) << "Waited too long for main pid to stop"; + return false; + } + + timespec tm; + tm.tv_sec = 0; + tm.tv_nsec = kWaitMicros * 1000; + nanosleep(&tm, nullptr); + } + return true; +} + +void DumpProcess(pid_t forked_pid, const std::atomic<bool>& saw_wif_stopped_for_main) { + CHECK_EQ(0, ::ptrace(PTRACE_ATTACH, forked_pid, 0, 0)); + std::set<pid_t> tids = ptrace::PtraceSiblings(forked_pid); + tids.insert(forked_pid); + + // Check whether we have and should use addr2line. + std::unique_ptr<std::string> addr2line_path = addr2line::FindAddr2line(); + if (addr2line_path != nullptr) { + LOG(ERROR) << "Found addr2line at " << *addr2line_path; + } else { + LOG(ERROR) << "Did not find usable addr2line"; + } + bool use_addr2line = kUseAddr2line && addr2line_path != nullptr; + LOG(ERROR) << (use_addr2line ? "U" : "Not u") << "sing addr2line"; + + if (!WaitForMainSigStop(saw_wif_stopped_for_main)) { + return; + } + + std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(forked_pid)); + if (backtrace_map == nullptr) { + LOG(ERROR) << "Could not create BacktraceMap"; + return; + } + + for (pid_t tid : tids) { + LOG(ERROR) << "pid: " << forked_pid << " tid: " << tid; + DumpThread(forked_pid, + tid, + use_addr2line ? addr2line_path.get() : nullptr, + " ", + backtrace_map.get()); + } +} + +[[noreturn]] +void WaitMainLoop(pid_t forked_pid, std::atomic<bool>* saw_wif_stopped_for_main) { + for (;;) { + // Consider switching to waitid to not get woken up for WIFSTOPPED. + int status; + pid_t res = TEMP_FAILURE_RETRY(waitpid(forked_pid, &status, 0)); + if (res == -1) { + PLOG(FATAL) << "Failure during waitpid"; + __builtin_unreachable(); + } + + if (WIFEXITED(status)) { + _exit(WEXITSTATUS(status)); + __builtin_unreachable(); + } + if (WIFSIGNALED(status)) { + _exit(1); + __builtin_unreachable(); + } + if (WIFSTOPPED(status)) { + *saw_wif_stopped_for_main = true; + continue; + } + if (WIFCONTINUED(status)) { + continue; + } + + LOG(FATAL) << "Unknown status " << std::hex << status; + } +} + +[[noreturn]] +void SetupAndWait(pid_t forked_pid) { + timeout_signal::SignalSet signals; + signals.Add(timeout_signal::GetTimeoutSignal()); + signals.Block(); + + std::atomic<bool> saw_wif_stopped_for_main(false); + + std::thread signal_catcher([&]() { + signals.Block(); + int sig = signals.Wait(); + CHECK_EQ(sig, timeout_signal::GetTimeoutSignal()); + + DumpProcess(forked_pid, saw_wif_stopped_for_main); + + // Don't clean up. Just kill the child and exit. + kill(forked_pid, SIGKILL); + _exit(1); + }); + + WaitMainLoop(forked_pid, &saw_wif_stopped_for_main); +} + +} // namespace +} // namespace art + +int main(int argc ATTRIBUTE_UNUSED, char** argv) { + pid_t orig_ppid = getpid(); + + pid_t pid = fork(); + if (pid == 0) { + if (prctl(PR_SET_PDEATHSIG, SIGTERM) == -1) { + _exit(1); + } + + if (getppid() != orig_ppid) { + _exit(2); + } + + execvp(argv[1], &argv[1]); + + _exit(3); + __builtin_unreachable(); + } + + art::SetupAndWait(pid); + __builtin_unreachable(); +} |