diff options
| author | 2018-04-05 12:17:20 -0700 | |
|---|---|---|
| committer | 2018-04-05 15:43:59 -0700 | |
| commit | 587eac9815d8994f0d9f0b7584402aae353bcb2c (patch) | |
| tree | 914a9f1a4a180258d0e1f32a66002594f90f01c7 | |
| parent | 92792400df94977b06b631aac87892cc65a1882c (diff) | |
dumpstate: Kill vendor dumpstate HAL and grab partial dump in timeout
dumpstate_board should not block forever but it is also a valuable
source for debugging system problems. In timeout case, we should kill
dumpstate HAL in case it hangs (e.g. cat from a pipe or so), so the
following calls can be served by HAL service.
Bug: 77489941
Test: simulate delay in dumpstate HAL and get BR, see partial content
from dumpstate_board.tx and below log from dumpstate_log.txt
dumpstateBoard timed out after 10s, killing dumpstate vendor HAL
dumpstateBoard failed: Status(EX_TRANSACTION_FAILED): 'DEAD_OBJECT: '
Change-Id: Iadfd0c234c26714b6a2f1aa2941a85d4e01d2cbc
| -rw-r--r-- | cmds/dumpstate/dumpstate.cpp | 114 |
1 files changed, 62 insertions, 52 deletions
diff --git a/cmds/dumpstate/dumpstate.cpp b/cmds/dumpstate/dumpstate.cpp index 19bf216a57..5ee543cb6f 100644 --- a/cmds/dumpstate/dumpstate.cpp +++ b/cmds/dumpstate/dumpstate.cpp @@ -1236,7 +1236,6 @@ static void RunDumpsysNormal() { } static void DumpHals() { - using android::sp; using android::hidl::manager::V1_0::IServiceManager; using android::hardware::defaultServiceManager; @@ -1560,69 +1559,80 @@ void Dumpstate::DumpstateBoard() { paths[i]))); } - // Given that bugreport is required to diagnose failures, it's better to - // drop the result of IDumpstateDevice than to block the rest of bugreport - // for an arbitrary amount of time. - std::packaged_task<std::unique_ptr<ssize_t[]>()> - dumpstate_task([paths]() -> std::unique_ptr<ssize_t[]> { - ::android::sp<IDumpstateDevice> dumpstate_device(IDumpstateDevice::getService()); - if (dumpstate_device == nullptr) { - MYLOGE("No IDumpstateDevice implementation\n"); - return nullptr; - } + sp<IDumpstateDevice> dumpstate_device(IDumpstateDevice::getService()); + if (dumpstate_device == nullptr) { + MYLOGE("No IDumpstateDevice implementation\n"); + return; + } - using ScopedNativeHandle = - std::unique_ptr<native_handle_t, std::function<void(native_handle_t*)>>; - ScopedNativeHandle handle(native_handle_create(static_cast<int>(paths.size()), 0), - [](native_handle_t* handle) { - native_handle_close(handle); - native_handle_delete(handle); - }); - if (handle == nullptr) { - MYLOGE("Could not create native_handle\n"); - return nullptr; - } + using ScopedNativeHandle = + std::unique_ptr<native_handle_t, std::function<void(native_handle_t*)>>; + ScopedNativeHandle handle(native_handle_create(static_cast<int>(paths.size()), 0), + [](native_handle_t* handle) { + native_handle_close(handle); + native_handle_delete(handle); + }); + if (handle == nullptr) { + MYLOGE("Could not create native_handle\n"); + return; + } - for (size_t i = 0; i < paths.size(); i++) { - MYLOGI("Calling IDumpstateDevice implementation using path %s\n", paths[i].c_str()); + for (size_t i = 0; i < paths.size(); i++) { + MYLOGI("Calling IDumpstateDevice implementation using path %s\n", paths[i].c_str()); - android::base::unique_fd fd(TEMP_FAILURE_RETRY( - open(paths[i].c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC | O_NOFOLLOW, - S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))); - if (fd < 0) { - MYLOGE("Could not open file %s: %s\n", paths[i].c_str(), strerror(errno)); - return nullptr; - } - handle.get()->data[i] = fd.release(); - } + android::base::unique_fd fd(TEMP_FAILURE_RETRY( + open(paths[i].c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC | O_NOFOLLOW, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))); + if (fd < 0) { + MYLOGE("Could not open file %s: %s\n", paths[i].c_str(), strerror(errno)); + return; + } + handle.get()->data[i] = fd.release(); + } + // Given that bugreport is required to diagnose failures, it's better to + // set an arbitrary amount of timeout for IDumpstateDevice than to block the + // rest of bugreport. In the timeout case, we will kill dumpstate board HAL + // and grab whatever dumped + std::packaged_task<bool()> + dumpstate_task([paths, dumpstate_device, &handle]() -> bool { android::hardware::Return<void> status = dumpstate_device->dumpstateBoard(handle.get()); if (!status.isOk()) { MYLOGE("dumpstateBoard failed: %s\n", status.description().c_str()); - return nullptr; + return false; } - auto file_sizes = std::make_unique<ssize_t[]>(paths.size()); - for (size_t i = 0; i < paths.size(); i++) { - struct stat s; - if (fstat(handle.get()->data[i], &s) == -1) { - MYLOGE("Failed to fstat %s: %s\n", kDumpstateBoardFiles[i].c_str(), - strerror(errno)); - file_sizes[i] = -1; - continue; - } - file_sizes[i] = s.st_size; - } - return file_sizes; + return true; }); + auto result = dumpstate_task.get_future(); std::thread(std::move(dumpstate_task)).detach(); - if (result.wait_for(30s) != std::future_status::ready) { - MYLOGE("dumpstateBoard timed out after 30s\n"); - return; + + constexpr size_t timeout_sec = 30; + if (result.wait_for(std::chrono::seconds(timeout_sec)) != std::future_status::ready) { + MYLOGE("dumpstateBoard timed out after %zus, killing dumpstate vendor HAL\n", timeout_sec); + if (!android::base::SetProperty("ctl.interface_restart", + android::base::StringPrintf("%s/default", + IDumpstateDevice::descriptor))) { + MYLOGE("Couldn't restart dumpstate HAL\n"); + } } - std::unique_ptr<ssize_t[]> file_sizes = result.get(); - if (file_sizes == nullptr) { - return; + // Wait some time for init to kill dumpstate vendor HAL + constexpr size_t killing_timeout_sec = 10; + if (result.wait_for(std::chrono::seconds(killing_timeout_sec)) != std::future_status::ready) { + MYLOGE("killing dumpstateBoard timed out after %zus, continue and " + "there might be racing in content\n", killing_timeout_sec); + } + + auto file_sizes = std::make_unique<ssize_t[]>(paths.size()); + for (size_t i = 0; i < paths.size(); i++) { + struct stat s; + if (fstat(handle.get()->data[i], &s) == -1) { + MYLOGE("Failed to fstat %s: %s\n", kDumpstateBoardFiles[i].c_str(), + strerror(errno)); + file_sizes[i] = -1; + continue; + } + file_sizes[i] = s.st_size; } for (size_t i = 0; i < paths.size(); i++) { |