diff options
author | 2018-06-08 15:11:57 -0700 | |
---|---|---|
committer | 2018-06-08 15:11:57 -0700 | |
commit | f11e2bd016d886a333345dea853ebda23a408d5c (patch) | |
tree | 905bd4d25d6ab2b046620ff459777d8dcfa5d7de /cmds/dumpstate/dumpstate.cpp | |
parent | aabd6b7fa343654cd85b3b2da392e424d037d15a (diff) | |
parent | 5c947cdf72270fd1f766b2248d526ebc8c7227f6 (diff) |
Merge pi-dev-plus-aosp-without-vendor into stage-aosp-master
Bug: 79597307
Change-Id: I6d6bee71b9424eb478780bbfc06b830eb8ded342
Diffstat (limited to 'cmds/dumpstate/dumpstate.cpp')
-rw-r--r-- | cmds/dumpstate/dumpstate.cpp | 640 |
1 files changed, 488 insertions, 152 deletions
diff --git a/cmds/dumpstate/dumpstate.cpp b/cmds/dumpstate/dumpstate.cpp index d94c649b11..2b6241566c 100644 --- a/cmds/dumpstate/dumpstate.cpp +++ b/cmds/dumpstate/dumpstate.cpp @@ -25,41 +25,68 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/poll.h> #include <sys/prctl.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/wait.h> #include <unistd.h> + +#include <chrono> +#include <functional> +#include <future> #include <memory> #include <regex> #include <set> #include <string> +#include <utility> #include <vector> #include <android-base/file.h> #include <android-base/properties.h> +#include <android-base/scopeguard.h> #include <android-base/stringprintf.h> #include <android-base/strings.h> #include <android-base/unique_fd.h> #include <android/hardware/dumpstate/1.0/IDumpstateDevice.h> +#include <android/hidl/manager/1.0/IServiceManager.h> #include <cutils/native_handle.h> #include <cutils/properties.h> +#include <dumpsys.h> +#include <hidl/ServiceManagement.h> #include <openssl/sha.h> #include <private/android_filesystem_config.h> #include <private/android_logger.h> - +#include <serviceutils/PriorityDumper.h> +#include <utils/StrongPointer.h> #include "DumpstateInternal.h" +#include "DumpstateSectionReporter.h" #include "DumpstateService.h" #include "dumpstate.h" using ::android::hardware::dumpstate::V1_0::IDumpstateDevice; +using ::std::literals::chrono_literals::operator""ms; +using ::std::literals::chrono_literals::operator""s; // TODO: remove once moved to namespace +using android::defaultServiceManager; +using android::Dumpsys; +using android::INVALID_OPERATION; +using android::IServiceManager; +using android::OK; +using android::sp; +using android::status_t; +using android::String16; +using android::String8; +using android::TIMED_OUT; +using android::UNKNOWN_ERROR; +using android::Vector; using android::os::dumpstate::CommandOptions; using android::os::dumpstate::DumpFileToFd; -using android::os::dumpstate::PropertiesHelper; +using android::os::dumpstate::DumpstateSectionReporter; using android::os::dumpstate::GetPidByName; +using android::os::dumpstate::PropertiesHelper; /* read before root is shed */ static char cmdline_buf[16384] = "(unknown)"; @@ -82,6 +109,7 @@ void add_mountinfo(); #define PROFILE_DATA_DIR_CUR "/data/misc/profiles/cur" #define PROFILE_DATA_DIR_REF "/data/misc/profiles/ref" #define WLUTIL "/vendor/xbin/wlutil" +#define WMTRACE_DATA_DIR "/data/misc/wmtrace" // TODO(narayan): Since this information has to be kept in sync // with tombstoned, we should just put it in a common header. @@ -100,8 +128,8 @@ static int RunCommand(const std::string& title, const std::vector<std::string>& } static void RunDumpsys(const std::string& title, const std::vector<std::string>& dumpsysArgs, const CommandOptions& options = Dumpstate::DEFAULT_DUMPSYS, - long dumpsysTimeout = 0) { - return ds.RunDumpsys(title, dumpsysArgs, options, dumpsysTimeout); + long dumpsysTimeoutMs = 0) { + return ds.RunDumpsys(title, dumpsysArgs, options, dumpsysTimeoutMs); } static int DumpFile(const std::string& title, const std::string& path) { return ds.DumpFile(title, path); @@ -112,14 +140,14 @@ static const std::string ZIP_ROOT_DIR = "FS"; // Must be hardcoded because dumpstate HAL implementation need SELinux access to it static const std::string kDumpstateBoardPath = "/bugreports/"; +static const std::string kProtoPath = "proto/"; +static const std::string kProtoExt = ".proto"; static const std::string kDumpstateBoardFiles[] = { "dumpstate_board.txt", "dumpstate_board.bin" }; static const int NUM_OF_DUMPS = arraysize(kDumpstateBoardFiles); -static const std::string kLsHalDebugPath = "/bugreports/dumpstate_lshal.txt"; - static constexpr char PROPERTY_EXTRA_OPTIONS[] = "dumpstate.options"; static constexpr char PROPERTY_LAST_ID[] = "dumpstate.last_id"; static constexpr char PROPERTY_VERSION[] = "dumpstate.version"; @@ -214,7 +242,7 @@ static bool AddDumps(const std::vector<DumpData>::const_iterator start, } if (ds.IsZipping() && add_to_zip) { - if (!ds.AddZipEntryFromFd(ZIP_ROOT_DIR + name, fd)) { + if (ds.AddZipEntryFromFd(ZIP_ROOT_DIR + name, fd, /* timeout = */ 0ms) != OK) { MYLOGE("Unable to add %s to zip file, addZipEntryFromFd failed\n", name.c_str()); } } else { @@ -644,12 +672,18 @@ static int dump_stat_from_fd(const char *title __unused, const char *path, int f return 0; } -/* timeout in ms */ -static unsigned long logcat_timeout(const char *name) { - log_id_t id = android_name_to_log_id(name); - unsigned long property_size = __android_logger_get_buffer_size(id); - /* Engineering margin is ten-fold our guess */ - return 10 * (property_size + worst_write_perf) / worst_write_perf; +static const long MINIMUM_LOGCAT_TIMEOUT_MS = 50000; + +/* timeout in ms to read a list of buffers */ +static unsigned long logcat_timeout(const std::vector<std::string>& buffers) { + unsigned long timeout_ms = 0; + for (const auto& buffer : buffers) { + log_id_t id = android_name_to_log_id(buffer.c_str()); + unsigned long property_size = __android_logger_get_buffer_size(id); + /* Engineering margin is ten-fold our guess */ + timeout_ms += 10 * (property_size + worst_write_perf) / worst_write_perf; + } + return timeout_ms > MINIMUM_LOGCAT_TIMEOUT_MS ? timeout_ms : MINIMUM_LOGCAT_TIMEOUT_MS; } void Dumpstate::PrintHeader() const { @@ -678,7 +712,9 @@ void Dumpstate::PrintHeader() const { printf("Kernel: "); DumpFileToFd(STDOUT_FILENO, "", "/proc/version"); printf("Command line: %s\n", strtok(cmdline_buf, "\n")); - ds.RunCommand("UPTIME", {"uptime"}, CommandOptions::DEFAULT); + printf("Uptime: "); + RunCommandToFd(STDOUT_FILENO, "", {"uptime", "-p"}, + CommandOptions::WithTimeout(1).Always().Build()); printf("Bugreport format version: %s\n", version_.c_str()); printf("Dumpstate info: id=%d pid=%d dry_run=%d args=%s extra_options=%s\n", id_, pid_, PropertiesHelper::IsDryRun(), args_.c_str(), extra_options_.c_str()); @@ -693,11 +729,12 @@ static const std::set<std::string> PROBLEMATIC_FILE_EXTENSIONS = { ".shb", ".sys", ".vb", ".vbe", ".vbs", ".vxd", ".wsc", ".wsf", ".wsh" }; -bool Dumpstate::AddZipEntryFromFd(const std::string& entry_name, int fd) { +status_t Dumpstate::AddZipEntryFromFd(const std::string& entry_name, int fd, + std::chrono::milliseconds timeout = 0ms) { if (!IsZipping()) { MYLOGD("Not adding zip entry %s from fd because it's not a zipped bugreport\n", entry_name.c_str()); - return false; + return INVALID_OPERATION; } std::string valid_name = entry_name; @@ -719,32 +756,55 @@ bool Dumpstate::AddZipEntryFromFd(const std::string& entry_name, int fd) { if (err != 0) { MYLOGE("zip_writer_->StartEntryWithTime(%s): %s\n", valid_name.c_str(), ZipWriter::ErrorCodeString(err)); - return false; + return UNKNOWN_ERROR; } + auto start = std::chrono::steady_clock::now(); + auto end = start + timeout; + struct pollfd pfd = {fd, POLLIN}; std::vector<uint8_t> buffer(65536); while (1) { + if (timeout.count() > 0) { + // lambda to recalculate the timeout. + auto time_left_ms = [end]() { + auto now = std::chrono::steady_clock::now(); + auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(end - now); + return std::max(diff.count(), 0LL); + }; + + int rc = TEMP_FAILURE_RETRY(poll(&pfd, 1, time_left_ms())); + if (rc < 0) { + MYLOGE("Error in poll while adding from fd to zip entry %s:%s", entry_name.c_str(), + strerror(errno)); + return -errno; + } else if (rc == 0) { + MYLOGE("Timed out adding from fd to zip entry %s:%s Timeout:%lldms", + entry_name.c_str(), strerror(errno), timeout.count()); + return TIMED_OUT; + } + } + ssize_t bytes_read = TEMP_FAILURE_RETRY(read(fd, buffer.data(), buffer.size())); if (bytes_read == 0) { break; } else if (bytes_read == -1) { MYLOGE("read(%s): %s\n", entry_name.c_str(), strerror(errno)); - return false; + return -errno; } err = zip_writer_->WriteBytes(buffer.data(), bytes_read); if (err) { MYLOGE("zip_writer_->WriteBytes(): %s\n", ZipWriter::ErrorCodeString(err)); - return false; + return UNKNOWN_ERROR; } } err = zip_writer_->FinishEntry(); if (err != 0) { MYLOGE("zip_writer_->FinishEntry(): %s\n", ZipWriter::ErrorCodeString(err)); - return false; + return UNKNOWN_ERROR; } - return true; + return OK; } bool Dumpstate::AddZipEntry(const std::string& entry_name, const std::string& entry_path) { @@ -755,12 +815,12 @@ bool Dumpstate::AddZipEntry(const std::string& entry_name, const std::string& en return false; } - return AddZipEntryFromFd(entry_name, fd.get()); + return (AddZipEntryFromFd(entry_name, fd.get()) == OK); } /* adds a file to the existing zipped bugreport */ static int _add_file_from_fd(const char* title __attribute__((unused)), const char* path, int fd) { - return ds.AddZipEntryFromFd(ZIP_ROOT_DIR + path, fd) ? 0 : 1; + return (ds.AddZipEntryFromFd(ZIP_ROOT_DIR + path, fd) == OK) ? 0 : 1; } void Dumpstate::AddDir(const std::string& dir, bool recursive) { @@ -816,41 +876,43 @@ static void DoKmsg() { } } +static void DoKernelLogcat() { + unsigned long timeout_ms = logcat_timeout({"kernel"}); + RunCommand( + "KERNEL LOG", + {"logcat", "-b", "kernel", "-v", "threadtime", "-v", "printable", "-v", "uid", "-d", "*:v"}, + CommandOptions::WithTimeoutInMs(timeout_ms).Build()); +} + static void DoLogcat() { - unsigned long timeout; + unsigned long timeout_ms; // DumpFile("EVENT LOG TAGS", "/etc/event-log-tags"); // calculate timeout - timeout = logcat_timeout("main") + logcat_timeout("system") + logcat_timeout("crash"); - if (timeout < 20000) { - timeout = 20000; - } + timeout_ms = logcat_timeout({"main", "system", "crash"}); RunCommand("SYSTEM LOG", - {"logcat", "-v", "threadtime", "-v", "printable", "-v", "uid", - "-d", "*:v"}, - CommandOptions::WithTimeout(timeout / 1000).Build()); - timeout = logcat_timeout("events"); - if (timeout < 20000) { - timeout = 20000; - } - RunCommand("EVENT LOG", - {"logcat", "-b", "events", "-v", "threadtime", "-v", "printable", "-v", "uid", - "-d", "*:v"}, - CommandOptions::WithTimeout(timeout / 1000).Build()); - timeout = logcat_timeout("radio"); - if (timeout < 20000) { - timeout = 20000; - } - RunCommand("RADIO LOG", - {"logcat", "-b", "radio", "-v", "threadtime", "-v", "printable", "-v", "uid", - "-d", "*:v"}, - CommandOptions::WithTimeout(timeout / 1000).Build()); + {"logcat", "-v", "threadtime", "-v", "printable", "-v", "uid", "-d", "*:v"}, + CommandOptions::WithTimeoutInMs(timeout_ms).Build()); + timeout_ms = logcat_timeout({"events"}); + RunCommand( + "EVENT LOG", + {"logcat", "-b", "events", "-v", "threadtime", "-v", "printable", "-v", "uid", "-d", "*:v"}, + CommandOptions::WithTimeoutInMs(timeout_ms).Build()); + timeout_ms = logcat_timeout({"stats"}); + RunCommand( + "STATS LOG", + {"logcat", "-b", "stats", "-v", "threadtime", "-v", "printable", "-v", "uid", "-d", "*:v"}, + CommandOptions::WithTimeoutInMs(timeout_ms).Build()); + timeout_ms = logcat_timeout({"radio"}); + RunCommand( + "RADIO LOG", + {"logcat", "-b", "radio", "-v", "threadtime", "-v", "printable", "-v", "uid", "-d", "*:v"}, + CommandOptions::WithTimeoutInMs(timeout_ms).Build()); RunCommand("LOG STATISTICS", {"logcat", "-b", "all", "-S"}); /* kernels must set CONFIG_PSTORE_PMSG, slice up pstore with device tree */ - RunCommand("LAST LOGCAT", - {"logcat", "-L", "-b", "all", "-v", "threadtime", "-v", "printable", "-v", "uid", - "-d", "*:v"}); + RunCommand("LAST LOGCAT", {"logcat", "-L", "-b", "all", "-v", "threadtime", "-v", "printable", + "-v", "uid", "-d", "*:v"}); } static void DumpIpTablesAsRoot() { @@ -968,11 +1030,191 @@ static void DumpIpAddrAndRules() { RunCommand("IP RULES v6", {"ip", "-6", "rule", "show"}); } +static void RunDumpsysTextByPriority(const std::string& title, int priority, + std::chrono::milliseconds timeout, + std::chrono::milliseconds service_timeout) { + auto start = std::chrono::steady_clock::now(); + sp<android::IServiceManager> sm = defaultServiceManager(); + Dumpsys dumpsys(sm.get()); + Vector<String16> args; + Dumpsys::setServiceArgs(args, /* asProto = */ false, priority); + Vector<String16> services = dumpsys.listServices(priority, /* supports_proto = */ false); + for (const String16& service : services) { + std::string path(title); + path.append(" - ").append(String8(service).c_str()); + DumpstateSectionReporter section_reporter(path, ds.listener_, ds.report_section_); + size_t bytes_written = 0; + status_t status = dumpsys.startDumpThread(service, args); + if (status == OK) { + dumpsys.writeDumpHeader(STDOUT_FILENO, service, priority); + std::chrono::duration<double> elapsed_seconds; + status = dumpsys.writeDump(STDOUT_FILENO, service, service_timeout, + /* as_proto = */ false, elapsed_seconds, bytes_written); + section_reporter.setSize(bytes_written); + dumpsys.writeDumpFooter(STDOUT_FILENO, service, elapsed_seconds); + bool dump_complete = (status == OK); + dumpsys.stopDumpThread(dump_complete); + } + section_reporter.setStatus(status); + + auto elapsed_duration = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::steady_clock::now() - start); + if (elapsed_duration > timeout) { + MYLOGE("*** command '%s' timed out after %llums\n", title.c_str(), + elapsed_duration.count()); + break; + } + } +} + +static void RunDumpsysText(const std::string& title, int priority, + std::chrono::milliseconds timeout, + std::chrono::milliseconds service_timeout) { + DurationReporter duration_reporter(title); + dprintf(STDOUT_FILENO, "------ %s (/system/bin/dumpsys) ------\n", title.c_str()); + fsync(STDOUT_FILENO); + RunDumpsysTextByPriority(title, priority, timeout, service_timeout); +} + +/* Dump all services registered with Normal or Default priority. */ +static void RunDumpsysTextNormalPriority(const std::string& title, + std::chrono::milliseconds timeout, + std::chrono::milliseconds service_timeout) { + DurationReporter duration_reporter(title); + dprintf(STDOUT_FILENO, "------ %s (/system/bin/dumpsys) ------\n", title.c_str()); + fsync(STDOUT_FILENO); + RunDumpsysTextByPriority(title, IServiceManager::DUMP_FLAG_PRIORITY_NORMAL, timeout, + service_timeout); + RunDumpsysTextByPriority(title, IServiceManager::DUMP_FLAG_PRIORITY_DEFAULT, timeout, + service_timeout); +} + +static void RunDumpsysProto(const std::string& title, int priority, + std::chrono::milliseconds timeout, + std::chrono::milliseconds service_timeout) { + if (!ds.IsZipping()) { + MYLOGD("Not dumping %s because it's not a zipped bugreport\n", title.c_str()); + return; + } + sp<android::IServiceManager> sm = defaultServiceManager(); + Dumpsys dumpsys(sm.get()); + Vector<String16> args; + Dumpsys::setServiceArgs(args, /* asProto = */ true, priority); + DurationReporter duration_reporter(title); + + auto start = std::chrono::steady_clock::now(); + Vector<String16> services = dumpsys.listServices(priority, /* supports_proto = */ true); + for (const String16& service : services) { + std::string path(kProtoPath); + path.append(String8(service).c_str()); + if (priority == IServiceManager::DUMP_FLAG_PRIORITY_CRITICAL) { + path.append("_CRITICAL"); + } else if (priority == IServiceManager::DUMP_FLAG_PRIORITY_HIGH) { + path.append("_HIGH"); + } + path.append(kProtoExt); + DumpstateSectionReporter section_reporter(path, ds.listener_, ds.report_section_); + status_t status = dumpsys.startDumpThread(service, args); + if (status == OK) { + status = ds.AddZipEntryFromFd(path, dumpsys.getDumpFd(), service_timeout); + bool dumpTerminated = (status == OK); + dumpsys.stopDumpThread(dumpTerminated); + } + ZipWriter::FileEntry file_entry; + ds.zip_writer_->GetLastEntry(&file_entry); + section_reporter.setSize(file_entry.compressed_size); + section_reporter.setStatus(status); + + auto elapsed_duration = std::chrono::duration_cast<std::chrono::milliseconds>( + std::chrono::steady_clock::now() - start); + if (elapsed_duration > timeout) { + MYLOGE("*** command '%s' timed out after %llums\n", title.c_str(), + elapsed_duration.count()); + break; + } + } +} + +// Runs dumpsys on services that must dump first and and will take less than 100ms to dump. +static void RunDumpsysCritical() { + RunDumpsysText("DUMPSYS CRITICAL", IServiceManager::DUMP_FLAG_PRIORITY_CRITICAL, + /* timeout= */ 5s, /* service_timeout= */ 500ms); + RunDumpsysProto("DUMPSYS CRITICAL PROTO", IServiceManager::DUMP_FLAG_PRIORITY_CRITICAL, + /* timeout= */ 5s, /* service_timeout= */ 500ms); +} + +// Runs dumpsys on services that must dump first but can take up to 250ms to dump. +static void RunDumpsysHigh() { + // TODO meminfo takes ~10s, connectivity takes ~5sec to dump. They are both + // high priority. Reduce timeout once they are able to dump in a shorter time or + // moved to a parallel task. + RunDumpsysText("DUMPSYS HIGH", IServiceManager::DUMP_FLAG_PRIORITY_HIGH, + /* timeout= */ 90s, /* service_timeout= */ 30s); + RunDumpsysProto("DUMPSYS HIGH PROTO", IServiceManager::DUMP_FLAG_PRIORITY_HIGH, + /* timeout= */ 5s, /* service_timeout= */ 1s); +} + +// Runs dumpsys on services that must dump but can take up to 10s to dump. +static void RunDumpsysNormal() { + RunDumpsysTextNormalPriority("DUMPSYS", /* timeout= */ 90s, /* service_timeout= */ 10s); + RunDumpsysProto("DUMPSYS PROTO", IServiceManager::DUMP_FLAG_PRIORITY_NORMAL, + /* timeout= */ 90s, /* service_timeout= */ 10s); +} + +static void DumpHals() { + using android::hidl::manager::V1_0::IServiceManager; + using android::hardware::defaultServiceManager; + + sp<IServiceManager> sm = defaultServiceManager(); + if (sm == nullptr) { + MYLOGE("Could not retrieve hwservicemanager to dump hals.\n"); + return; + } + + auto ret = sm->list([&](const auto& interfaces) { + for (const std::string& interface : interfaces) { + std::string cleanName = interface; + std::replace_if(cleanName.begin(), + cleanName.end(), + [](char c) { + return !isalnum(c) && + std::string("@-_:.").find(c) == std::string::npos; + }, '_'); + const std::string path = kDumpstateBoardPath + "lshal_debug_" + cleanName; + + { + auto fd = android::base::unique_fd( + TEMP_FAILURE_RETRY(open(path.c_str(), + O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC | O_NOFOLLOW, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))); + if (fd < 0) { + MYLOGE("Could not open %s to dump additional hal information.\n", path.c_str()); + continue; + } + RunCommandToFd(fd, + "", + {"lshal", "debug", "-E", interface}, + CommandOptions::WithTimeout(2).AsRootIfAvailable().Build()); + + bool empty = 0 == lseek(fd, 0, SEEK_END); + if (!empty) { + ds.AddZipEntry("lshal-debug/" + cleanName + ".txt", path); + } + } + + unlink(path.c_str()); + } + }); + + if (!ret.isOk()) { + MYLOGE("Could not list hals from hwservicemanager.\n"); + } +} + static void dumpstate() { DurationReporter duration_reporter("DUMPSTATE"); dump_dev_files("TRUSTY VERSION", "/sys/bus/platform/drivers/trusty", "trusty_version"); - /* TODO: Remove duplicate uptime call when tools use it from header */ RunCommand("UPTIME", {"uptime"}); DumpBlockStatFiles(); dump_emmc_ecsd("/d/mmc0/mmc0:0001/ext_csd"); @@ -993,22 +1235,14 @@ static void dumpstate() { DumpFile("KERNEL SYNC", "/d/sync"); RunCommand("PROCESSES AND THREADS", - {"ps", "-A", "-T", "-Z", "-O", "pri,nice,rtprio,sched,pcy"}); + {"ps", "-A", "-T", "-Z", "-O", "pri,nice,rtprio,sched,pcy,time"}); RunCommand("LIBRANK", {"librank"}, CommandOptions::AS_ROOT); if (ds.IsZipping()) { - RunCommand( - "HARDWARE HALS", - {"lshal", std::string("--debug=") + kLsHalDebugPath}, - CommandOptions::WithTimeout(10).AsRootIfAvailable().Build()); - - ds.AddZipEntry("lshal-debug.txt", kLsHalDebugPath); - - unlink(kLsHalDebugPath.c_str()); + RunCommand("HARDWARE HALS", {"lshal"}, CommandOptions::WithTimeout(2).AsRootIfAvailable().Build()); + DumpHals(); } else { - RunCommand( - "HARDWARE HALS", {"lshal", "--debug"}, - CommandOptions::WithTimeout(10).AsRootIfAvailable().Build()); + RunCommand("HARDWARE HALS", {"lshal", "--debug"}, CommandOptions::WithTimeout(10).AsRootIfAvailable().Build()); } RunCommand("PRINTENV", {"printenv"}); @@ -1020,7 +1254,12 @@ static void dumpstate() { RunCommand("LSMOD", {"lsmod"}); } - do_dmesg(); + if (__android_logger_property_get_bool( + "ro.logd.kernel", BOOL_DEFAULT_TRUE | BOOL_DEFAULT_FLAG_ENG | BOOL_DEFAULT_FLAG_SVELTE)) { + DoKernelLogcat(); + } else { + do_dmesg(); + } RunCommand("LIST OF OPEN FILES", {"lsof"}, CommandOptions::AS_ROOT); for_each_pid(do_showmap, "SMAPS OF ALL PROCESSES"); @@ -1059,15 +1298,11 @@ static void dumpstate() { RunCommand("IPv6 ND CACHE", {"ip", "-6", "neigh", "show"}); RunCommand("MULTICAST ADDRESSES", {"ip", "maddr"}); - RunDumpsys("NETWORK DIAGNOSTICS", {"connectivity", "--diag"}, - CommandOptions::WithTimeout(10).Build()); + RunDumpsysHigh(); RunCommand("SYSTEM PROPERTIES", {"getprop"}); - RunCommand("VOLD DUMP", {"vdc", "dump"}); - RunCommand("SECURE CONTAINERS", {"vdc", "asec", "list"}); - - RunCommand("STORAGED TASKIOINFO", {"storaged", "-u"}, CommandOptions::WithTimeout(10).Build()); + RunCommand("STORAGED IO INFO", {"storaged", "-u", "-p"}); RunCommand("FILESYSTEMS & FREE SPACE", {"df"}); @@ -1080,6 +1315,11 @@ static void dumpstate() { DumpFile("BINDER STATS", "/sys/kernel/debug/binder/stats"); DumpFile("BINDER STATE", "/sys/kernel/debug/binder/state"); + /* Add window and surface trace files. */ + if (!PropertiesHelper::IsUserBuild()) { + ds.AddDir(WMTRACE_DATA_DIR, false); + } + ds.DumpstateBoard(); /* Migrate the ril_dumpstate to a device specific dumpstate? */ @@ -1100,8 +1340,7 @@ static void dumpstate() { printf("== Android Framework Services\n"); printf("========================================================\n"); - RunDumpsys("DUMPSYS", {"--skip", "meminfo", "cpuinfo"}, CommandOptions::WithTimeout(90).Build(), - 10); + RunDumpsysNormal(); printf("========================================================\n"); printf("== Checkins\n"); @@ -1118,19 +1357,40 @@ static void dumpstate() { printf("== Running Application Activities\n"); printf("========================================================\n"); - RunDumpsys("APP ACTIVITIES", {"activity", "-v", "all"}); + // The following dumpsys internally collects output from running apps, so it can take a long + // time. So let's extend the timeout. + + const CommandOptions DUMPSYS_COMPONENTS_OPTIONS = CommandOptions::WithTimeout(60).Build(); + + RunDumpsys("APP ACTIVITIES", {"activity", "-v", "all"}, DUMPSYS_COMPONENTS_OPTIONS); + + printf("========================================================\n"); + printf("== Running Application Services (platform)\n"); + printf("========================================================\n"); + + RunDumpsys("APP SERVICES PLATFORM", {"activity", "service", "all-platform"}, + DUMPSYS_COMPONENTS_OPTIONS); printf("========================================================\n"); - printf("== Running Application Services\n"); + printf("== Running Application Services (non-platform)\n"); printf("========================================================\n"); - RunDumpsys("APP SERVICES", {"activity", "service", "all"}); + RunDumpsys("APP SERVICES NON-PLATFORM", {"activity", "service", "all-non-platform"}, + DUMPSYS_COMPONENTS_OPTIONS); printf("========================================================\n"); - printf("== Running Application Providers\n"); + printf("== Running Application Providers (platform)\n"); printf("========================================================\n"); - RunDumpsys("APP PROVIDERS", {"activity", "provider", "all"}); + RunDumpsys("APP PROVIDERS PLATFORM", {"activity", "provider", "all-platform"}, + DUMPSYS_COMPONENTS_OPTIONS); + + printf("========================================================\n"); + printf("== Running Application Providers (non-platform)\n"); + printf("========================================================\n"); + + RunDumpsys("APP PROVIDERS NON-PLATFORM", {"activity", "provider", "all-non-platform"}, + DUMPSYS_COMPONENTS_OPTIONS); printf("========================================================\n"); printf("== Dropbox crashes\n"); @@ -1147,10 +1407,8 @@ static void dumpstate() { printf("========================================================\n"); } -// This method collects dumpsys for telephony debugging only -static void DumpstateTelephonyOnly() { - DurationReporter duration_reporter("DUMPSTATE"); - +// This method collects common dumpsys for telephony and wifi +static void DumpstateRadioCommon() { DumpIpTablesAsRoot(); if (!DropRootUser()) { @@ -1166,6 +1424,14 @@ static void DumpstateTelephonyOnly() { RunDumpsys("NETWORK DIAGNOSTICS", {"connectivity", "--diag"}, CommandOptions::WithTimeout(10).Build()); +} + +// This method collects dumpsys for telephony debugging only +static void DumpstateTelephonyOnly() { + DurationReporter duration_reporter("DUMPSTATE"); + const CommandOptions DUMPSYS_COMPONENTS_OPTIONS = CommandOptions::WithTimeout(60).Build(); + + DumpstateRadioCommon(); RunCommand("SYSTEM PROPERTIES", {"getprop"}); @@ -1173,8 +1439,14 @@ static void DumpstateTelephonyOnly() { printf("== Android Framework Services\n"); printf("========================================================\n"); - RunDumpsys("DUMPSYS", {"connectivity"}, CommandOptions::WithTimeout(90).Build(), 10); - RunDumpsys("DUMPSYS", {"carrier_config"}, CommandOptions::WithTimeout(90).Build(), 10); + RunDumpsys("DUMPSYS", {"connectivity"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); + RunDumpsys("DUMPSYS", {"carrier_config"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); + RunDumpsys("DUMPSYS", {"wifi"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); + RunDumpsys("BATTERYSTATS", {"batterystats"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); printf("========================================================\n"); printf("== Running Application Services\n"); @@ -1183,6 +1455,33 @@ static void DumpstateTelephonyOnly() { RunDumpsys("TELEPHONY SERVICES", {"activity", "service", "TelephonyDebugService"}); printf("========================================================\n"); + printf("== Running Application Services (non-platform)\n"); + printf("========================================================\n"); + + RunDumpsys("APP SERVICES NON-PLATFORM", {"activity", "service", "all-non-platform"}, + DUMPSYS_COMPONENTS_OPTIONS); + + printf("========================================================\n"); + printf("== dumpstate: done (id %d)\n", ds.id_); + printf("========================================================\n"); +} + +// This method collects dumpsys for wifi debugging only +static void DumpstateWifiOnly() { + DurationReporter duration_reporter("DUMPSTATE"); + + DumpstateRadioCommon(); + + printf("========================================================\n"); + printf("== Android Framework Services\n"); + printf("========================================================\n"); + + RunDumpsys("DUMPSYS", {"connectivity"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); + RunDumpsys("DUMPSYS", {"wifi"}, CommandOptions::WithTimeout(90).Build(), + SEC_TO_MSEC(10)); + + printf("========================================================\n"); printf("== dumpstate: done (id %d)\n", ds.id_); printf("========================================================\n"); } @@ -1193,77 +1492,112 @@ void Dumpstate::DumpstateBoard() { printf("== Board\n"); printf("========================================================\n"); - ::android::sp<IDumpstateDevice> dumpstate_device(IDumpstateDevice::getService()); - if (dumpstate_device == nullptr) { - MYLOGE("No IDumpstateDevice implementation\n"); - return; - } - if (!IsZipping()) { MYLOGD("Not dumping board info because it's not a zipped bugreport\n"); return; } - std::string path[NUM_OF_DUMPS]; - android::base::unique_fd fd[NUM_OF_DUMPS]; - int numFds = 0; - + std::vector<std::string> paths; + std::vector<android::base::ScopeGuard<std::function<void()>>> remover; for (int i = 0; i < NUM_OF_DUMPS; i++) { - path[i] = kDumpstateBoardPath + kDumpstateBoardFiles[i]; - MYLOGI("Calling IDumpstateDevice implementation using path %s\n", path[i].c_str()); - - fd[i] = android::base::unique_fd( - TEMP_FAILURE_RETRY(open(path[i].c_str(), - O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC | O_NOFOLLOW, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))); - if (fd[i] < 0) { - MYLOGE("Could not open file %s: %s\n", path[i].c_str(), strerror(errno)); - return; - } else { - numFds++; - } + paths.emplace_back(kDumpstateBoardPath + kDumpstateBoardFiles[i]); + remover.emplace_back(android::base::make_scope_guard(std::bind( + [](std::string path) { + if (remove(path.c_str()) != 0 && errno != ENOENT) { + MYLOGE("Could not remove(%s): %s\n", path.c_str(), strerror(errno)); + } + }, + paths[i]))); } - native_handle_t *handle = native_handle_create(numFds, 0); + sp<IDumpstateDevice> dumpstate_device(IDumpstateDevice::getService()); + if (dumpstate_device == nullptr) { + MYLOGE("No IDumpstateDevice implementation\n"); + return; + } + + using ScopedNativeHandle = + std::unique_ptr<native_handle_t, std::function<void(native_handle_t*)>>; + ScopedNativeHandle handle(native_handle_create(static_cast<int>(paths.size()), 0), + [](native_handle_t* handle) { + native_handle_close(handle); + native_handle_delete(handle); + }); if (handle == nullptr) { MYLOGE("Could not create native_handle\n"); return; } - for (int i = 0; i < numFds; i++) { - handle->data[i] = fd[i].release(); - } + for (size_t i = 0; i < paths.size(); i++) { + MYLOGI("Calling IDumpstateDevice implementation using path %s\n", paths[i].c_str()); - // TODO: need a timeout mechanism so dumpstate does not hang on device implementation call. - android::hardware::Return<void> status = dumpstate_device->dumpstateBoard(handle); - if (!status.isOk()) { - MYLOGE("dumpstateBoard failed: %s\n", status.description().c_str()); - native_handle_close(handle); - native_handle_delete(handle); - return; + android::base::unique_fd fd(TEMP_FAILURE_RETRY( + open(paths[i].c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC | O_NOFOLLOW, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))); + if (fd < 0) { + MYLOGE("Could not open file %s: %s\n", paths[i].c_str(), strerror(errno)); + return; + } + handle.get()->data[i] = fd.release(); + } + + // Given that bugreport is required to diagnose failures, it's better to + // set an arbitrary amount of timeout for IDumpstateDevice than to block the + // rest of bugreport. In the timeout case, we will kill dumpstate board HAL + // and grab whatever dumped + std::packaged_task<bool()> + dumpstate_task([paths, dumpstate_device, &handle]() -> bool { + android::hardware::Return<void> status = dumpstate_device->dumpstateBoard(handle.get()); + if (!status.isOk()) { + MYLOGE("dumpstateBoard failed: %s\n", status.description().c_str()); + return false; + } + return true; + }); + + auto result = dumpstate_task.get_future(); + std::thread(std::move(dumpstate_task)).detach(); + + constexpr size_t timeout_sec = 30; + if (result.wait_for(std::chrono::seconds(timeout_sec)) != std::future_status::ready) { + MYLOGE("dumpstateBoard timed out after %zus, killing dumpstate vendor HAL\n", timeout_sec); + if (!android::base::SetProperty("ctl.interface_restart", + android::base::StringPrintf("%s/default", + IDumpstateDevice::descriptor))) { + MYLOGE("Couldn't restart dumpstate HAL\n"); + } + } + // Wait some time for init to kill dumpstate vendor HAL + constexpr size_t killing_timeout_sec = 10; + if (result.wait_for(std::chrono::seconds(killing_timeout_sec)) != std::future_status::ready) { + MYLOGE("killing dumpstateBoard timed out after %zus, continue and " + "there might be racing in content\n", killing_timeout_sec); } - for (int i = 0; i < numFds; i++) { + auto file_sizes = std::make_unique<ssize_t[]>(paths.size()); + for (size_t i = 0; i < paths.size(); i++) { struct stat s; - if (fstat(handle->data[i], &s) == -1) { - MYLOGE("Failed to fstat %s: %d\n", kDumpstateBoardFiles[i].c_str(), errno); - } else if (s.st_size > 0) { - AddZipEntry(kDumpstateBoardFiles[i], path[i]); - } else { - MYLOGE("Ignoring empty %s\n", kDumpstateBoardFiles[i].c_str()); + if (fstat(handle.get()->data[i], &s) == -1) { + MYLOGE("Failed to fstat %s: %s\n", kDumpstateBoardFiles[i].c_str(), + strerror(errno)); + file_sizes[i] = -1; + continue; } + file_sizes[i] = s.st_size; } - printf("*** See dumpstate-board.txt entry ***\n"); - - native_handle_close(handle); - native_handle_delete(handle); - - for (int i = 0; i < numFds; i++) { - if (remove(path[i].c_str()) != 0) { - MYLOGE("Could not remove(%s): %s\n", path[i].c_str(), strerror(errno)); + for (size_t i = 0; i < paths.size(); i++) { + if (file_sizes[i] == -1) { + continue; } + if (file_sizes[i] == 0) { + MYLOGE("Ignoring empty %s\n", kDumpstateBoardFiles[i].c_str()); + continue; + } + AddZipEntry(kDumpstateBoardFiles[i], paths[i]); } + + printf("*** See dumpstate-board.txt entry ***\n"); } static void ShowUsageAndExit(int exitCode = 1) { @@ -1294,20 +1628,8 @@ static void ExitOnInvalidArgs() { ShowUsageAndExit(); } -static void sig_handler(int) { - _exit(EXIT_FAILURE); -} - static void register_sig_handler() { - struct sigaction sa; - sigemptyset(&sa.sa_mask); - sa.sa_flags = 0; - sa.sa_handler = sig_handler; - sigaction(SIGPIPE, &sa, NULL); // broken pipe - sigaction(SIGSEGV, &sa, NULL); // segment fault - sigaction(SIGINT, &sa, NULL); // ctrl-c - sigaction(SIGTERM, &sa, NULL); // killed - sigaction(SIGQUIT, &sa, NULL); // quit + signal(SIGPIPE, SIG_IGN); } bool Dumpstate::FinishZipFile() { @@ -1419,7 +1741,8 @@ static void Vibrate(int duration_ms) { // clang-format on } -int main(int argc, char *argv[]) { +/** Main entry point for dumpstate. */ +int run_main(int argc, char* argv[]) { int do_add_date = 0; int do_zip_file = 0; int do_vibrate = 1; @@ -1432,6 +1755,9 @@ int main(int argc, char *argv[]) { bool show_header_only = false; bool do_start_service = false; bool telephony_only = false; + bool wifi_only = false; + int dup_stdout_fd; + int dup_stderr_fd; /* set as high priority, and protect from OOM killer */ setpriority(PRIO_PROCESS, 0, -20); @@ -1500,8 +1826,12 @@ int main(int argc, char *argv[]) { } else if (ds.extra_options_ == "bugreportwear") { do_start_service = true; ds.update_progress_ = true; + do_zip_file = 1; } else if (ds.extra_options_ == "bugreporttelephony") { telephony_only = true; + } else if (ds.extra_options_ == "bugreportwifi") { + wifi_only = true; + do_zip_file = 1; } else { MYLOGE("Unknown extra option: %s\n", ds.extra_options_.c_str()); } @@ -1620,6 +1950,8 @@ int main(int argc, char *argv[]) { if (telephony_only) { ds.base_name_ += "-telephony"; + } else if (wifi_only) { + ds.base_name_ += "-wifi"; } if (do_fb) { @@ -1701,11 +2033,13 @@ int main(int argc, char *argv[]) { } if (is_redirecting) { + TEMP_FAILURE_RETRY(dup_stderr_fd = dup(fileno(stderr))); redirect_to_file(stderr, const_cast<char*>(ds.log_path_.c_str())); if (chown(ds.log_path_.c_str(), AID_SHELL, AID_SHELL)) { MYLOGE("Unable to change ownership of dumpstate log file %s: %s\n", ds.log_path_.c_str(), strerror(errno)); } + TEMP_FAILURE_RETRY(dup_stdout_fd = dup(fileno(stdout))); /* TODO: rather than generating a text file now and zipping it later, it would be more efficient to redirect stdout to the zip entry directly, but the libziparchive doesn't support that option yet. */ @@ -1727,6 +2061,8 @@ int main(int argc, char *argv[]) { if (telephony_only) { DumpstateTelephonyOnly(); ds.DumpstateBoard(); + } else if (wifi_only) { + DumpstateWifiOnly(); } else { // Dumps systrace right away, otherwise it will be filled with unnecessary events. // First try to dump anrd trace if the daemon is running. Otherwise, dump @@ -1737,10 +2073,7 @@ int main(int argc, char *argv[]) { // Invoking the following dumpsys calls before dump_traces() to try and // keep the system stats as close to its initial state as possible. - RunDumpsys("DUMPSYS MEMINFO", {"meminfo", "-a"}, - CommandOptions::WithTimeout(90).DropRoot().Build()); - RunDumpsys("DUMPSYS CPUINFO", {"cpuinfo", "-a"}, - CommandOptions::WithTimeout(10).DropRoot().Build()); + RunDumpsysCritical(); // TODO: Drop root user and move into dumpstate() once b/28633932 is fixed. dump_raft(); @@ -1771,6 +2104,9 @@ int main(int argc, char *argv[]) { RunCommand("DETAILED SOCKET STATE", {"ss", "-eionptu"}, CommandOptions::WithTimeout(10).Build()); + // Run iotop as root to show top 100 IO threads + RunCommand("IOTOP", {"iotop", "-n", "1", "-m", "100"}); + if (!DropRootUser()) { return -1; } @@ -1780,7 +2116,7 @@ int main(int argc, char *argv[]) { /* close output if needed */ if (is_redirecting) { - fclose(stdout); + TEMP_FAILURE_RETRY(dup2(dup_stdout_fd, fileno(stdout))); } /* rename or zip the (now complete) .tmp file to its final location */ @@ -1913,7 +2249,7 @@ int main(int argc, char *argv[]) { MYLOGI("done (id %d)\n", ds.id_); if (is_redirecting) { - fclose(stderr); + TEMP_FAILURE_RETRY(dup2(dup_stderr_fd, fileno(stderr))); } if (use_control_socket && ds.control_socket_fd_ != -1) { |