diff options
| author | 2025-02-13 10:59:59 -0800 | |
|---|---|---|
| committer | 2025-02-13 10:59:59 -0800 | |
| commit | 35a8b07c110dcf75a0f74554d0d0ee343655abf8 (patch) | |
| tree | cd0d59d26c1103bb210444d5207a947c31422dcd | |
| parent | 155f88b8911f293d0611fc9f9c76b1b67dd046f2 (diff) | |
| parent | b63a35a0495ea4beaa36aad7d81728daacee3be4 (diff) | |
Extend suspend timeout for debug activities am: 4bcbbe50ab am: b63a35a049
Original change: https://android-review.googlesource.com/c/platform/art/+/3463910
Change-Id: I0ce959085b814157e74b0fb00d74468639d89aa4
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
| -rw-r--r-- | libartbase/base/utils.cc | 12 | ||||
| -rw-r--r-- | libartbase/base/utils.h | 12 | ||||
| -rw-r--r-- | libartbase/base/utils_test.cc | 7 | ||||
| -rw-r--r-- | runtime/runtime.cc | 4 | ||||
| -rw-r--r-- | runtime/runtime.h | 2 | ||||
| -rw-r--r-- | runtime/signal_catcher.h | 2 | ||||
| -rw-r--r-- | runtime/thread.cc | 2 | ||||
| -rw-r--r-- | runtime/thread_list.cc | 26 |
8 files changed, 56 insertions, 11 deletions
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc index e77d37d29b..efb727f950 100644 --- a/libartbase/base/utils.cc +++ b/libartbase/base/utils.cc @@ -424,6 +424,18 @@ std::string GetOsThreadStatQuick(pid_t tid) { #endif } +char GetStateFromStatString(const std::string& stat_output) { + size_t rparen_pos = stat_output.find(")"); + if (rparen_pos == std::string::npos || rparen_pos >= stat_output.length() - 3) { + return '?'; + } + size_t state_pos = stat_output.find_first_not_of(" ", rparen_pos + 1); + if (rparen_pos == std::string::npos) { + return '?'; + } + return stat_output[state_pos]; +} + std::string GetOtherThreadOsStats() { #if defined(__linux__) DIR* dir = opendir("/proc/self/task"); diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h index 4b86651f95..fdb7f6650a 100644 --- a/libartbase/base/utils.h +++ b/libartbase/base/utils.h @@ -152,14 +152,18 @@ inline void ForceRead(const T* pointer) { // there is an I/O error. std::string GetProcessStatus(const char* key); -// Copy a prefix of /proc/tid/stat of the given length into buf. Return the number of bytes -// actually read, 0 on error. +// Copy a prefix of /proc/pid/task/tid/stat of the given length into buf. Return the number of +// bytes actually read, 0 on error. size_t GetOsThreadStat(pid_t tid, char* buf, size_t len); -// Return a short prefix of /proc/tid/stat as quickly and robustly as possible. Used for debugging -// timing issues and possibly issues with /proc itself. Always atomic. +// Return a short prefix of /proc/pid/task/tid/stat as quickly and robustly as possible. Used for +// debugging timing issues and possibly issues with /proc itself. Always atomic. std::string GetOsThreadStatQuick(pid_t tid); +// Given a /proc/.../stat string or prefix, such as those returned by the above, return the single +// character representation of the thread state from that string, or '?' if it can't be found. +char GetStateFromStatString(const std::string& stat_output); + // Return a concatenation of the output of GetOsThreadStatQuick(tid) for all other tids. // Less robust against concurrent change, but individual stat strings should still always // be consistent. Called only when we are nearly certain to crash anyway. diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc index ab8627fa7d..a422fc5369 100644 --- a/libartbase/base/utils_test.cc +++ b/libartbase/base/utils_test.cc @@ -124,6 +124,13 @@ TEST_F(UtilsTest, GetProcessStatus) { EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName")); } +TEST_F(UtilsTest, GetOsThreadStatQuick) { + std::string my_stat = GetOsThreadStatQuick(GetTid()); + EXPECT_GT(my_stat.length(), 20); + EXPECT_LT(my_stat.length(), 1000); + EXPECT_EQ('R', GetStateFromStatString(my_stat)); +} + TEST_F(UtilsTest, StringSplit) { auto range = SplitString("[ab[c[[d[e[", '['); auto it = range.begin(); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index a16bde3469..e2c30d619c 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -2450,8 +2450,8 @@ void Runtime::DumpDeoptimizations(std::ostream& os) { } } -std::optional<uint64_t> Runtime::SiqQuitNanoTime() const { - return signal_catcher_ != nullptr ? signal_catcher_->SiqQuitNanoTime() : std::nullopt; +std::optional<uint64_t> Runtime::SigQuitNanoTime() const { + return signal_catcher_ != nullptr ? signal_catcher_->SigQuitNanoTime() : std::nullopt; } void Runtime::DumpForSigQuit(std::ostream& os) { diff --git a/runtime/runtime.h b/runtime/runtime.h index e43d1d3432..63a0707424 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -314,7 +314,7 @@ class Runtime { void DetachCurrentThread(bool should_run_callbacks = true) REQUIRES(!Locks::mutator_lock_); // If we are handling SIQQUIT return the time when we received it. - std::optional<uint64_t> SiqQuitNanoTime() const; + std::optional<uint64_t> SigQuitNanoTime() const; void DumpDeoptimizations(std::ostream& os); void DumpForSigQuit(std::ostream& os); diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h index 79014ea022..4d5d71ede8 100644 --- a/runtime/signal_catcher.h +++ b/runtime/signal_catcher.h @@ -42,7 +42,7 @@ class SignalCatcher { void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); - std::optional<uint64_t> SiqQuitNanoTime() const { return sigquit_nanotime_; } + std::optional<uint64_t> SigQuitNanoTime() const { return sigquit_nanotime_; } private: // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock. diff --git a/runtime/thread.cc b/runtime/thread.cc index 89465979da..5fc0ce6790 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2439,7 +2439,7 @@ Thread::DumpOrder Thread::DumpStack(std::ostream& os, /*check_suspended=*/ !force_dump_stack, /*dump_locks=*/ !force_dump_stack); Runtime* runtime = Runtime::Current(); - std::optional<uint64_t> start = runtime != nullptr ? runtime->SiqQuitNanoTime() : std::nullopt; + std::optional<uint64_t> start = runtime != nullptr ? runtime->SigQuitNanoTime() : std::nullopt; if (start.has_value()) { os << "DumpLatencyMs: " << static_cast<float>(nanotime - start.value()) / 1000000.0 << "\n"; } diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 2acf58bfce..b4f79ee5a8 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -720,7 +720,7 @@ static constexpr unsigned kSuspendBarrierIters = kShortSuspendTimeouts ? 5 : 20; #if ART_USE_FUTEXES -// Returns true if it timed out. +// Returns true if it timed out. Times out after timeout_ns/kSuspendBarrierIters nsecs static bool WaitOnceForSuspendBarrier(AtomicInteger* barrier, int32_t cur_val, uint64_t timeout_ns) { @@ -804,10 +804,23 @@ std::optional<std::string> ThreadList::WaitForSuspendBarrier(AtomicInteger* barr return std::nullopt; } + // Extra timeout to compensate for concurrent thread dumps, so that we are less likely to time + // out during ANR dumps. + uint64_t dump_adjustment_ns = 0; + // Total timeout increment if we see a concurrent thread dump. Distributed evenly across + // remaining iterations. + static constexpr uint64_t kDumpWaitNSecs = 30'000'000'000ull; // 30 seconds + // Replacement timeout if thread is stopped for tracing, probably by a debugger. + static constexpr uint64_t kTracingWaitNSecs = 7'200'000'000'000ull; // wait a bit < 2 hours; + // Long wait; gather information in case of timeout. std::string sampled_state = collect_state ? GetOsThreadStatQuick(t) : ""; + if (collect_state && GetStateFromStatString(sampled_state) == 't') { + LOG(WARNING) << "Thread suspension nearly timed out due to Tracing stop (debugger attached?)"; + timeout_ns = kTracingWaitNSecs; + } while (i < kSuspendBarrierIters) { - if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns)) { + if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns + dump_adjustment_ns)) { ++i; #if ART_USE_FUTEXES if (!kShortSuspendTimeouts) { @@ -820,6 +833,15 @@ std::optional<std::string> ThreadList::WaitForSuspendBarrier(AtomicInteger* barr DCHECK_EQ(cur_val, 0); return std::nullopt; } + std::optional<uint64_t> last_sigquit_nanotime = Runtime::Current()->SigQuitNanoTime(); + if (last_sigquit_nanotime.has_value() && i < kSuspendBarrierIters) { + // Adjust dump_adjustment_ns to reflect the number of iterations we have left and how long + // ago we started dumping threads. + uint64_t new_unscaled_adj = kDumpWaitNSecs + last_sigquit_nanotime.value() - NanoTime(); + // Scale by the fraction of iterations still remaining. + dump_adjustment_ns = new_unscaled_adj * kSuspendBarrierIters / kSuspendBarrierIters - i; + } + // Keep the old dump_adjustment_ns if SigQuitNanoTime() was cleared. } uint64_t final_wait_time = NanoTime() - start_time; uint64_t total_wait_time = attempt_of_4 == 0 ? |