diff options
author | 2025-02-07 14:15:41 +0000 | |
---|---|---|
committer | 2025-02-18 12:15:33 -0800 | |
commit | ff7c1a9d5b07f1a761fba6fe89b0d446b0eea468 (patch) | |
tree | 5587e97e5a82438baa83958c24f942317a759e1a | |
parent | 73067c5af49735755b6da2cc27c0dd082c248c9b (diff) |
Fix lock level violations when dumping low overhead traces
There were two lock level violations in the code that dumps low overhead
traces
1. We should not wait for checkpoints to finish while holding the
trace_lock_. This CL fixes it by waiting for the checkpoints after
releasing the trace_lock_. Since we release trace_lock_ while waiting
we need additional synchronization to make sure trace_data_ isn't
deleted while checkpoints are still running. This is done using a
ConditionVariable and waiting on it if a trace dump is in progress
before deleting the trace_data_.
2. We should not enter a ScopedObjectAccess (that acquires mutator lock)
while holding the bottom level trace_data_lock_. We now take a
snapshot of the traced_methods and traced_threads when holding the
trace_data_lock_ and then dump thread and method information outside
the lock. Dumping methods requires mutator lock to get the
information about the method.
Bug: 352518093
Test: Manual testing with feature enabled and kDebugLocking enabled.
Change-Id: I42a61f4d43eb985a93bc7884c09f771a5fbcd8f2
-rw-r--r-- | runtime/trace_profile.cc | 77 | ||||
-rw-r--r-- | runtime/trace_profile.h | 27 |
2 files changed, 87 insertions, 17 deletions
diff --git a/runtime/trace_profile.cc b/runtime/trace_profile.cc index 2fcf475b80..4c0abbf1da 100644 --- a/runtime/trace_profile.cc +++ b/runtime/trace_profile.cc @@ -76,6 +76,18 @@ void TraceData::AddTracedThread(Thread* thread) { traced_threads_.emplace(thread_id, thread_name); } +void TraceData::MaybeWaitForTraceDumpToFinish() { + if (!trace_dump_in_progress_) { + return; + } + trace_dump_condition_.Wait(Thread::Current()); +} + +void TraceData::SignalTraceDumpComplete() { + trace_dump_in_progress_ = false; + trace_dump_condition_.Broadcast(Thread::Current()); +} + void TraceProfiler::AllocateBuffer(Thread* thread) { if (!art_flags::always_enable_profile_code()) { return; @@ -299,6 +311,10 @@ void TraceProfiler::StopLocked() { return; } + // We should not delete trace_data_ when there is an ongoing trace dump. So + // wait for any in progress trace dump to finish. + trace_data_->MaybeWaitForTraceDumpToFinish(); + static FunctionClosure reset_buffer([](Thread* thread) { auto buffer = thread->GetMethodTraceBuffer(); if (buffer != nullptr) { @@ -393,25 +409,33 @@ void TraceProfiler::Dump(const char* filename) { } void TraceProfiler::Dump(std::unique_ptr<File>&& trace_file, std::ostringstream& os) { - MutexLock mu(Thread::Current(), *Locks::trace_lock_); - if (!profile_in_progress_) { - if (trace_file != nullptr && !trace_file->Close()) { - PLOG(WARNING) << "Failed to close file."; - } - return; - } - Thread* self = Thread::Current(); - // Collect long running methods from all the threads; Runtime* runtime = Runtime::Current(); - TraceDumpCheckpoint checkpoint(trace_data_, trace_file); - size_t threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(&checkpoint); + size_t threads_running_checkpoint = 0; + std::unique_ptr<TraceDumpCheckpoint> checkpoint; + { + MutexLock mu(self, *Locks::trace_lock_); + if (!profile_in_progress_ || trace_data_->IsTraceDumpInProgress()) { + if (trace_file != nullptr && !trace_file->Close()) { + PLOG(WARNING) << "Failed to close file."; + } + return; + } + + trace_data_->SetTraceDumpInProgress(); + + // Collect long running methods from all the threads; + checkpoint.reset(new TraceDumpCheckpoint(trace_data_, trace_file)); + threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(checkpoint.get()); + } + + // Wait for all threads to dump their data. if (threads_running_checkpoint != 0) { - checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); + checkpoint->WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint); } + checkpoint->FinishTraceDump(os); - trace_data_->DumpData(os); if (trace_file != nullptr) { std::string info = os.str(); if (!trace_file->WriteFully(info.c_str(), info.length())) { @@ -591,16 +615,35 @@ void TraceDumpCheckpoint::WaitForThreadsToRunThroughCheckpoint(size_t threads_ru barrier_.Increment(self, threads_running_checkpoint); } +void TraceDumpCheckpoint::FinishTraceDump(std::ostringstream& os) { + // Dump all the data. + trace_data_->DumpData(os); + + // Any trace stop requests will be blocked while a dump is in progress. So + // broadcast the completion condition for any waiting requests. + MutexLock mu(Thread::Current(), *Locks::trace_lock_); + trace_data_->SignalTraceDumpComplete(); +} + void TraceData::DumpData(std::ostringstream& os) { - MutexLock mu(Thread::Current(), trace_data_lock_); - if (long_running_methods_.length() > 0) { - os << long_running_methods_; + std::unordered_set<ArtMethod*> methods; + std::unordered_map<size_t, std::string> threads; + { + // We cannot dump method information while holding trace_lock_, since we have to also + // acquire a mutator lock. Take a snapshot of thread and method information. + MutexLock mu(Thread::Current(), trace_data_lock_); + if (long_running_methods_.length() > 0) { + os << long_running_methods_; + } + + methods = traced_methods_; + threads = traced_threads_; } // Dump the information about traced_methods and threads { ScopedObjectAccess soa(Thread::Current()); - DumpThreadMethodInfo(traced_threads_, traced_methods_, os); + DumpThreadMethodInfo(threads, methods, os); } } diff --git a/runtime/trace_profile.h b/runtime/trace_profile.h index 24883b217e..df8ddec387 100644 --- a/runtime/trace_profile.h +++ b/runtime/trace_profile.h @@ -48,6 +48,8 @@ class TraceData { explicit TraceData(LowOverheadTraceType trace_type) : trace_type_(trace_type), trace_end_time_(0), + trace_dump_in_progress_(false), + trace_dump_condition_("trace dump condition", *Locks::trace_lock_), trace_data_lock_("Trace Data lock", LockLevel::kGenericBottomLock) {} LowOverheadTraceType GetTraceType() const { @@ -83,6 +85,22 @@ class TraceData { void AddTracedThread(Thread* thread); + // If there is no trace dump in progress this returns immediately. Otherwise + // it waits on a condition variable waiting for the trace dump to finish. + void MaybeWaitForTraceDumpToFinish() REQUIRES(Locks::trace_lock_); + + // Called when a trace dump is finished to notify any waiting requests. This + // also resets the trace_dump_in_progress_ to false. + void SignalTraceDumpComplete() REQUIRES(Locks::trace_lock_); + + void SetTraceDumpInProgress() REQUIRES(Locks::trace_lock_) { + trace_dump_in_progress_ = true; + } + + bool IsTraceDumpInProgress() const REQUIRES(Locks::trace_lock_) { + return trace_dump_in_progress_; + } + private: // This is used to hold the initial methods on stack and also long running methods when there is a // buffer overflow. @@ -100,6 +118,14 @@ class TraceData { // thread. std::unordered_map<size_t, std::string> traced_threads_ GUARDED_BY(trace_data_lock_); + // This specifies if a trace dump is in progress. We release the trace_lock_ + // when waiting for the checkpoints to finish. We shouldn't delete trace data + // when a dump is in progress. trace_dump_in_progress_ and + // trace_dump_condition_ are used to make sure we wait for any in progress + // trace dumps to finish before deleting the trace data. + bool trace_dump_in_progress_ GUARDED_BY(Locks::trace_lock_); + ConditionVariable trace_dump_condition_ GUARDED_BY(Locks::trace_lock_); + // Lock to synchronize access to traced_methods_, traced_threads_ and long_running_methods_ which // can be accessed simultaneously by multiple threads when running TraceDumpCheckpoint. Mutex trace_data_lock_; @@ -115,6 +141,7 @@ class TraceDumpCheckpoint final : public Closure { void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_); void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint); + void FinishTraceDump(std::ostringstream& os); private: // The barrier to be passed through and for the requestor to wait upon. |