| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include <csignal> |
| #include "mediautils/TimerThread.h" |
| #define LOG_TAG "TimeCheck" |
| |
| #include <optional> |
| |
| #include <android-base/logging.h> |
| #include <android-base/strings.h> |
| #include <audio_utils/clock.h> |
| #include <mediautils/EventLog.h> |
| #include <mediautils/FixedString.h> |
| #include <mediautils/MethodStatistics.h> |
| #include <mediautils/TimeCheck.h> |
| #include <mediautils/TidWrapper.h> |
| #include <utils/Log.h> |
| |
| #if defined(__ANDROID__) |
| #include "debuggerd/handler.h" |
| #endif |
| |
| |
| namespace android::mediautils { |
| // This function appropriately signals a pid to dump a backtrace if we are |
| // running on device (and the HAL exists). If we are not running on an Android |
| // device, there is no HAL to signal (so we do nothing). |
| static inline void signalAudioHAL([[maybe_unused]] pid_t pid) { |
| #if defined(__ANDROID__) |
| sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); |
| #endif |
| } |
| |
| /** |
| * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. |
| */ |
| std::string formatTime(std::chrono::system_clock::time_point t) { |
| auto time_string = audio_utils_time_string_from_ns( |
| std::chrono::nanoseconds(t.time_since_epoch()).count()); |
| |
| // The time string is 19 characters (including null termination). |
| // Example: "03-27 16:47:06.187" |
| // MM DD HH MM SS MS |
| // We offset by 6 to get HH:MM:SS.MSc |
| // |
| return time_string.time + 6; // offset to remove month/day. |
| } |
| |
| /** |
| * Finds the end of the common time prefix. |
| * |
| * This is as an option to remove the common time prefix to avoid |
| * unnecessary duplicated strings. |
| * |
| * \param time1 a time string |
| * \param time2 a time string |
| * \return the position where the common time prefix ends. For abbreviated |
| * printing of time2, offset the character pointer by this position. |
| */ |
| static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) { |
| const size_t endPos = std::min(time1.size(), time2.size()); |
| size_t i; |
| |
| // Find location of the first mismatch between strings |
| for (i = 0; ; ++i) { |
| if (i == endPos) { |
| return i; // strings match completely to the length of one of the strings. |
| } |
| if (time1[i] != time2[i]) { |
| break; |
| } |
| if (time1[i] == '\0') { |
| return i; // "printed" strings match completely. No need to check further. |
| } |
| } |
| |
| // Go backwards until we find a delimeter or space. |
| for (; i > 0 |
| && isdigit(time1[i]) // still a number |
| && time1[i - 1] != ' ' |
| ; --i) { |
| } |
| return i; |
| } |
| |
| /** |
| * Returns the unique suffix of time2 that isn't present in time1. |
| * |
| * If time2 is identical to time1, then an empty string_view is returned. |
| * This method is used to elide the common prefix when printing times. |
| */ |
| std::string_view timeSuffix(std::string_view time1, std::string_view time2) { |
| const size_t pos = commonTimePrefixPosition(time1, time2); |
| return time2.substr(pos); |
| } |
| |
| // Audio HAL server pids vector used to generate audio HAL processes tombstone |
| // when audioserver watchdog triggers. |
| // We use a lockless storage to avoid potential deadlocks in the context of watchdog |
| // trigger. |
| // Protection again simultaneous writes is not needed given one update takes place |
| // during AudioFlinger construction and other comes necessarily later once the IAudioFlinger |
| // interface is available. |
| // The use of an atomic index just guaranties that current vector is fully initialized |
| // when read. |
| /* static */ |
| void TimeCheck::accessAudioHalPids(std::vector<pid_t>* pids, bool update) { |
| static constexpr int kNumAudioHalPidsVectors = 3; |
| static std::vector<pid_t> audioHalPids[kNumAudioHalPidsVectors]; |
| static std::atomic<unsigned> curAudioHalPids = 0; |
| |
| if (update) { |
| audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids; |
| } else { |
| *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors]; |
| } |
| } |
| |
| /* static */ |
| void TimeCheck::setAudioHalPids(const std::vector<pid_t>& pids) { |
| accessAudioHalPids(&(const_cast<std::vector<pid_t>&>(pids)), true); |
| } |
| |
| /* static */ |
| std::vector<pid_t> TimeCheck::getAudioHalPids() { |
| std::vector<pid_t> pids; |
| accessAudioHalPids(&pids, false); |
| return pids; |
| } |
| |
| /* static */ |
| TimerThread& TimeCheck::getTimeCheckThread() { |
| static TimerThread sTimeCheckThread{}; |
| return sTimeCheckThread; |
| } |
| |
| /* static */ |
| std::string TimeCheck::toString() { |
| // note pending and retired are individually locked for maximum concurrency, |
| // snapshot is not instantaneous at a single time. |
| return getTimeCheckThread().getSnapshotAnalysis().toString(); |
| } |
| |
| TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration, |
| Duration secondChanceDuration, bool crashOnTimeout) |
| : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>( |
| tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration, |
| secondChanceDuration, std::chrono::system_clock::now(), getThreadIdWrapper()) } |
| , mTimerHandle(requestedTimeoutDuration.count() == 0 |
| /* for TimeCheck we don't consider a non-zero secondChanceDuration here */ |
| ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag) |
| : getTimeCheckThread().scheduleTask( |
| mTimeCheckHandler->tag, |
| // Pass in all the arguments by value to this task for safety. |
| // The thread could call the callback before the constructor is finished. |
| // The destructor is not blocked on callback. |
| [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) { |
| timeCheckHandler->onTimeout(timerHandle); |
| }, |
| requestedTimeoutDuration, |
| secondChanceDuration)) {} |
| |
| TimeCheck::~TimeCheck() { |
| if (mTimeCheckHandler) { |
| mTimeCheckHandler->onCancel(mTimerHandle); |
| } |
| } |
| |
| /* static */ |
| std::string TimeCheck::analyzeTimeouts( |
| float requestedTimeoutMs, float elapsedSteadyMs, float elapsedSystemMs) { |
| // Track any OS clock issues with suspend. |
| // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if |
| // a suspend occurs; however, we always expect the timeout ms should always be slightly |
| // less than the elapsed steady ms regardless of whether a suspend occurs or not. |
| |
| std::string s("Timeout ms "); |
| s.append(std::to_string(requestedTimeoutMs)) |
| .append(" elapsed steady ms ").append(std::to_string(elapsedSteadyMs)) |
| .append(" elapsed system ms ").append(std::to_string(elapsedSystemMs)); |
| |
| // Is there something unusual? |
| static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f; |
| |
| if (requestedTimeoutMs > elapsedSteadyMs || requestedTimeoutMs > elapsedSystemMs) { |
| s.append("\nError: early expiration - " |
| "requestedTimeoutMs should be less than elapsed time"); |
| } |
| |
| if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| s.append("\nWarning: steady time should not advance faster than system time"); |
| } |
| |
| // This has been found in suspend stress testing. |
| if (elapsedSteadyMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| s.append("\nWarning: steady time significantly exceeds timeout " |
| "- possible thread stall or aborted suspend"); |
| } |
| |
| // This has been found in suspend stress testing. |
| if (elapsedSystemMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| s.append("\nInformation: system time significantly exceeds timeout " |
| "- possible suspend"); |
| } |
| return s; |
| } |
| |
| // To avoid any potential race conditions, the timer handle |
| // (expiration = clock steady start + timeout) is passed into the callback. |
| void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const |
| { |
| if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) { |
| const std::chrono::steady_clock::time_point endSteadyTime = |
| std::chrono::steady_clock::now(); |
| const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| endSteadyTime - timerHandle + timeoutDuration).count(); |
| // send the elapsed steady time for statistics. |
| onTimer(false /* timeout */, elapsedSteadyMs); |
| } |
| } |
| |
| // To avoid any potential race conditions, the timer handle |
| // (expiration = clock steady start + timeout) is passed into the callback. |
| void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const |
| { |
| const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now(); |
| const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now(); |
| // timerHandle incorporates the timeout |
| const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| endSteadyTime - (timerHandle - timeoutDuration)).count(); |
| const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>( |
| endSystemTime - startSystemTime).count(); |
| const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>( |
| timeoutDuration).count(); |
| const float secondChanceMs = std::chrono::duration_cast<FloatMs>( |
| secondChanceDuration).count(); |
| |
| if (onTimer) { |
| onTimer(true /* timeout */, elapsedSteadyMs); |
| } |
| |
| if (!crashOnTimeout) return; |
| |
| // Generate the TimerThread summary string early before sending signals to the |
| // HAL processes which can affect thread behavior. |
| const auto snapshotAnalysis = getTimeCheckThread().getSnapshotAnalysis(4 /* retiredCount */); |
| |
| // Generate audio HAL processes tombstones and allow time to complete |
| // before forcing restart |
| std::vector<pid_t> pids = TimeCheck::getAudioHalPids(); |
| std::string halPids = "HAL pids [ "; |
| if (pids.size() != 0) { |
| for (const auto& pid : pids) { |
| ALOGI("requesting tombstone for pid: %d", pid); |
| halPids.append(std::to_string(pid)).append(" "); |
| signalAudioHAL(pid); |
| } |
| sleep(1); |
| } else { |
| ALOGI("No HAL process pid available, skipping tombstones"); |
| } |
| halPids.append("]"); |
| |
| LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str()); |
| |
| // Create abort message string - caution: this can be very large. |
| const std::string abortMessage = std::string("TimeCheck timeout for ") |
| .append(tag) |
| .append(" scheduled ").append(formatTime(startSystemTime)) |
| .append(" on thread ").append(std::to_string(tid)).append("\n") |
| .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs, |
| elapsedSteadyMs, elapsedSystemMs)).append("\n") |
| .append(halPids).append("\n") |
| .append(snapshotAnalysis.toString()); |
| |
| // In many cases, the initial timeout stack differs from the abort backtrace because |
| // (1) the time difference between initial timeout and the final abort signal |
| // and (2) signalling the HAL audio service may cause |
| // the thread to unblock and continue. |
| |
| // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: |
| // Log message text may be truncated to less than an |
| // implementation-specific limit (1023 bytes). |
| // |
| // Here, we send the string through android-base/logging.h LOG() |
| // to avoid the size limitation. LOG(FATAL) does an abort whereas |
| // LOG(FATAL_WITHOUT_ABORT) does not abort. |
| |
| static constexpr pid_t invalidPid = TimerThread::SnapshotAnalysis::INVALID_PID; |
| pid_t tidToAbort = invalidPid; |
| if (snapshotAnalysis.suspectTid != invalidPid) { |
| tidToAbort = snapshotAnalysis.suspectTid; |
| } else if (snapshotAnalysis.timeoutTid != invalidPid) { |
| tidToAbort = snapshotAnalysis.timeoutTid; |
| } |
| |
| LOG(FATAL_WITHOUT_ABORT) << abortMessage; |
| const auto ret = abortTid(tidToAbort); |
| if (ret < 0) { |
| LOG(FATAL) << "TimeCheck thread signal failed, aborting process. " |
| "errno: " << errno << base::ErrnoNumberAsString(errno); |
| } |
| } |
| |
| // Automatically create a TimeCheck class for a class and method. |
| // This is used for Audio HAL support. |
| mediautils::TimeCheck makeTimeCheckStatsForClassMethod( |
| std::string_view className, std::string_view methodName) { |
| std::shared_ptr<MethodStatistics<std::string>> statistics = |
| mediautils::getStatisticsForClass(className); |
| if (!statistics) return {}; // empty TimeCheck. |
| return mediautils::TimeCheck( |
| FixedString62(className).append("::").append(methodName), |
| [ safeMethodName = FixedString30(methodName), |
| stats = std::move(statistics) ] |
| (bool timeout, float elapsedMs) { |
| if (timeout) { |
| ; // ignored, there is no timeout value. |
| } else { |
| stats->event(safeMethodName.asStringView(), elapsedMs); |
| } |
| }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */); |
| } |
| |
| } // namespace android::mediautils |