summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2025-02-13 10:59:59 -0800
committer Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2025-02-13 10:59:59 -0800
commit35a8b07c110dcf75a0f74554d0d0ee343655abf8 (patch)
treecd0d59d26c1103bb210444d5207a947c31422dcd
parent155f88b8911f293d0611fc9f9c76b1b67dd046f2 (diff)
parentb63a35a0495ea4beaa36aad7d81728daacee3be4 (diff)
Extend suspend timeout for debug activities am: 4bcbbe50ab am: b63a35a049
Original change: https://android-review.googlesource.com/c/platform/art/+/3463910 Change-Id: I0ce959085b814157e74b0fb00d74468639d89aa4 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--libartbase/base/utils.cc12
-rw-r--r--libartbase/base/utils.h12
-rw-r--r--libartbase/base/utils_test.cc7
-rw-r--r--runtime/runtime.cc4
-rw-r--r--runtime/runtime.h2
-rw-r--r--runtime/signal_catcher.h2
-rw-r--r--runtime/thread.cc2
-rw-r--r--runtime/thread_list.cc26
8 files changed, 56 insertions, 11 deletions
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc
index e77d37d29b..efb727f950 100644
--- a/libartbase/base/utils.cc
+++ b/libartbase/base/utils.cc
@@ -424,6 +424,18 @@ std::string GetOsThreadStatQuick(pid_t tid) {
#endif
}
+char GetStateFromStatString(const std::string& stat_output) {
+ size_t rparen_pos = stat_output.find(")");
+ if (rparen_pos == std::string::npos || rparen_pos >= stat_output.length() - 3) {
+ return '?';
+ }
+ size_t state_pos = stat_output.find_first_not_of(" ", rparen_pos + 1);
+ if (rparen_pos == std::string::npos) {
+ return '?';
+ }
+ return stat_output[state_pos];
+}
+
std::string GetOtherThreadOsStats() {
#if defined(__linux__)
DIR* dir = opendir("/proc/self/task");
diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h
index 4b86651f95..fdb7f6650a 100644
--- a/libartbase/base/utils.h
+++ b/libartbase/base/utils.h
@@ -152,14 +152,18 @@ inline void ForceRead(const T* pointer) {
// there is an I/O error.
std::string GetProcessStatus(const char* key);
-// Copy a prefix of /proc/tid/stat of the given length into buf. Return the number of bytes
-// actually read, 0 on error.
+// Copy a prefix of /proc/pid/task/tid/stat of the given length into buf. Return the number of
+// bytes actually read, 0 on error.
size_t GetOsThreadStat(pid_t tid, char* buf, size_t len);
-// Return a short prefix of /proc/tid/stat as quickly and robustly as possible. Used for debugging
-// timing issues and possibly issues with /proc itself. Always atomic.
+// Return a short prefix of /proc/pid/task/tid/stat as quickly and robustly as possible. Used for
+// debugging timing issues and possibly issues with /proc itself. Always atomic.
std::string GetOsThreadStatQuick(pid_t tid);
+// Given a /proc/.../stat string or prefix, such as those returned by the above, return the single
+// character representation of the thread state from that string, or '?' if it can't be found.
+char GetStateFromStatString(const std::string& stat_output);
+
// Return a concatenation of the output of GetOsThreadStatQuick(tid) for all other tids.
// Less robust against concurrent change, but individual stat strings should still always
// be consistent. Called only when we are nearly certain to crash anyway.
diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc
index ab8627fa7d..a422fc5369 100644
--- a/libartbase/base/utils_test.cc
+++ b/libartbase/base/utils_test.cc
@@ -124,6 +124,13 @@ TEST_F(UtilsTest, GetProcessStatus) {
EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName"));
}
+TEST_F(UtilsTest, GetOsThreadStatQuick) {
+ std::string my_stat = GetOsThreadStatQuick(GetTid());
+ EXPECT_GT(my_stat.length(), 20);
+ EXPECT_LT(my_stat.length(), 1000);
+ EXPECT_EQ('R', GetStateFromStatString(my_stat));
+}
+
TEST_F(UtilsTest, StringSplit) {
auto range = SplitString("[ab[c[[d[e[", '[');
auto it = range.begin();
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index a16bde3469..e2c30d619c 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -2450,8 +2450,8 @@ void Runtime::DumpDeoptimizations(std::ostream& os) {
}
}
-std::optional<uint64_t> Runtime::SiqQuitNanoTime() const {
- return signal_catcher_ != nullptr ? signal_catcher_->SiqQuitNanoTime() : std::nullopt;
+std::optional<uint64_t> Runtime::SigQuitNanoTime() const {
+ return signal_catcher_ != nullptr ? signal_catcher_->SigQuitNanoTime() : std::nullopt;
}
void Runtime::DumpForSigQuit(std::ostream& os) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index e43d1d3432..63a0707424 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -314,7 +314,7 @@ class Runtime {
void DetachCurrentThread(bool should_run_callbacks = true) REQUIRES(!Locks::mutator_lock_);
// If we are handling SIQQUIT return the time when we received it.
- std::optional<uint64_t> SiqQuitNanoTime() const;
+ std::optional<uint64_t> SigQuitNanoTime() const;
void DumpDeoptimizations(std::ostream& os);
void DumpForSigQuit(std::ostream& os);
diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h
index 79014ea022..4d5d71ede8 100644
--- a/runtime/signal_catcher.h
+++ b/runtime/signal_catcher.h
@@ -42,7 +42,7 @@ class SignalCatcher {
void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
!Locks::thread_suspend_count_lock_);
- std::optional<uint64_t> SiqQuitNanoTime() const { return sigquit_nanotime_; }
+ std::optional<uint64_t> SigQuitNanoTime() const { return sigquit_nanotime_; }
private:
// NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 89465979da..5fc0ce6790 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2439,7 +2439,7 @@ Thread::DumpOrder Thread::DumpStack(std::ostream& os,
/*check_suspended=*/ !force_dump_stack,
/*dump_locks=*/ !force_dump_stack);
Runtime* runtime = Runtime::Current();
- std::optional<uint64_t> start = runtime != nullptr ? runtime->SiqQuitNanoTime() : std::nullopt;
+ std::optional<uint64_t> start = runtime != nullptr ? runtime->SigQuitNanoTime() : std::nullopt;
if (start.has_value()) {
os << "DumpLatencyMs: " << static_cast<float>(nanotime - start.value()) / 1000000.0 << "\n";
}
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index 2acf58bfce..b4f79ee5a8 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -720,7 +720,7 @@ static constexpr unsigned kSuspendBarrierIters = kShortSuspendTimeouts ? 5 : 20;
#if ART_USE_FUTEXES
-// Returns true if it timed out.
+// Returns true if it timed out. Times out after timeout_ns/kSuspendBarrierIters nsecs
static bool WaitOnceForSuspendBarrier(AtomicInteger* barrier,
int32_t cur_val,
uint64_t timeout_ns) {
@@ -804,10 +804,23 @@ std::optional<std::string> ThreadList::WaitForSuspendBarrier(AtomicInteger* barr
return std::nullopt;
}
+ // Extra timeout to compensate for concurrent thread dumps, so that we are less likely to time
+ // out during ANR dumps.
+ uint64_t dump_adjustment_ns = 0;
+ // Total timeout increment if we see a concurrent thread dump. Distributed evenly across
+ // remaining iterations.
+ static constexpr uint64_t kDumpWaitNSecs = 30'000'000'000ull; // 30 seconds
+ // Replacement timeout if thread is stopped for tracing, probably by a debugger.
+ static constexpr uint64_t kTracingWaitNSecs = 7'200'000'000'000ull; // wait a bit < 2 hours;
+
// Long wait; gather information in case of timeout.
std::string sampled_state = collect_state ? GetOsThreadStatQuick(t) : "";
+ if (collect_state && GetStateFromStatString(sampled_state) == 't') {
+ LOG(WARNING) << "Thread suspension nearly timed out due to Tracing stop (debugger attached?)";
+ timeout_ns = kTracingWaitNSecs;
+ }
while (i < kSuspendBarrierIters) {
- if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns)) {
+ if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns + dump_adjustment_ns)) {
++i;
#if ART_USE_FUTEXES
if (!kShortSuspendTimeouts) {
@@ -820,6 +833,15 @@ std::optional<std::string> ThreadList::WaitForSuspendBarrier(AtomicInteger* barr
DCHECK_EQ(cur_val, 0);
return std::nullopt;
}
+ std::optional<uint64_t> last_sigquit_nanotime = Runtime::Current()->SigQuitNanoTime();
+ if (last_sigquit_nanotime.has_value() && i < kSuspendBarrierIters) {
+ // Adjust dump_adjustment_ns to reflect the number of iterations we have left and how long
+ // ago we started dumping threads.
+ uint64_t new_unscaled_adj = kDumpWaitNSecs + last_sigquit_nanotime.value() - NanoTime();
+ // Scale by the fraction of iterations still remaining.
+ dump_adjustment_ns = new_unscaled_adj * kSuspendBarrierIters / kSuspendBarrierIters - i;
+ }
+ // Keep the old dump_adjustment_ns if SigQuitNanoTime() was cleared.
}
uint64_t final_wait_time = NanoTime() - start_time;
uint64_t total_wait_time = attempt_of_4 == 0 ?