diff options
Diffstat (limited to 'perfetto_hprof')
| -rw-r--r-- | perfetto_hprof/Android.bp | 8 | ||||
| -rw-r--r-- | perfetto_hprof/perfetto_hprof.cc | 371 |
2 files changed, 284 insertions, 95 deletions
diff --git a/perfetto_hprof/Android.bp b/perfetto_hprof/Android.bp index a81a4fa5e0..8cfc7d4bea 100644 --- a/perfetto_hprof/Android.bp +++ b/perfetto_hprof/Android.bp @@ -50,6 +50,7 @@ cc_defaults { compile_multilib: "both", shared_libs: [ + "libartpalette", "libbase", "liblog", ], @@ -68,6 +69,12 @@ cc_defaults { header_libs: [ "libnativehelper_header_only", ], + // FIXME: Workaround LTO build breakage + // http://b/241700157 + lto: { + never: true, + }, + } art_cc_library { @@ -81,6 +88,7 @@ art_cc_library { apex_available: [ "com.android.art", "com.android.art.debug", + "test_broken_com.android.art", ], } diff --git a/perfetto_hprof/perfetto_hprof.cc b/perfetto_hprof/perfetto_hprof.cc index 669fb0cac8..906362ab29 100644 --- a/perfetto_hprof/perfetto_hprof.cc +++ b/perfetto_hprof/perfetto_hprof.cc @@ -18,9 +18,8 @@ #include "perfetto_hprof.h" -#include <android-base/logging.h> -#include <base/fast_exit.h> #include <fcntl.h> +#include <fnmatch.h> #include <inttypes.h> #include <sched.h> #include <signal.h> @@ -36,6 +35,11 @@ #include <optional> #include <type_traits> +#include "android-base/file.h" +#include "android-base/logging.h" +#include "android-base/properties.h" +#include "base/fast_exit.h" +#include "base/systrace.h" #include "gc/heap-visit-objects-inl.h" #include "gc/heap.h" #include "gc/scoped_gc_critical_section.h" @@ -86,6 +90,8 @@ static art::ConditionVariable& GetStateCV() { static int requested_tracing_session_id = 0; static State g_state = State::kUninitialized; +static bool g_oome_triggered = false; +static uint32_t g_oome_sessions_pending = 0; // Pipe to signal from the signal handler into a worker thread that handles the // dump requests. @@ -151,19 +157,52 @@ bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) { return false; } +uint64_t GetCurrentBootClockNs() { + struct timespec ts = {}; + if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) { + LOG(FATAL) << "Failed to get boottime."; + } + return ts.tv_sec * 1000000000LL + ts.tv_nsec; +} + +bool IsDebugBuild() { + std::string build_type = android::base::GetProperty("ro.build.type", ""); + return !build_type.empty() && build_type != "user"; +} + +// Verifies the manifest restrictions are respected. +// For regular heap dumps this is already handled by heapprofd. +bool IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig& ds_config) { + if (art::Runtime::Current()->IsJavaDebuggable() || IsDebugBuild()) { + return true; + } + + if (ds_config.session_initiator() == + perfetto::DataSourceConfig::SESSION_INITIATOR_TRUSTED_SYSTEM) { + return art::Runtime::Current()->IsProfileable() || art::Runtime::Current()->IsSystemServer(); + } else { + return art::Runtime::Current()->IsProfileableFromShell(); + } +} + class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { public: constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy = perfetto::BufferExhaustedPolicy::kStall; + + explicit JavaHprofDataSource(bool is_oome_heap) : is_oome_heap_(is_oome_heap) {} + void OnSetup(const SetupArgs& args) override { - uint64_t normalized_cfg_tracing_session_id = - args.config->tracing_session_id() % std::numeric_limits<int32_t>::max(); - if (requested_tracing_session_id < 0) { - LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id; - return; - } - if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_cfg_tracing_session_id) { - return; + if (!is_oome_heap_) { + uint64_t normalized_tracing_session_id = + args.config->tracing_session_id() % std::numeric_limits<int32_t>::max(); + if (requested_tracing_session_id < 0) { + LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id; + return; + } + if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_tracing_session_id) { + return; + } } // This is on the heap as it triggers -Wframe-larger-than. @@ -174,24 +213,35 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { dump_smaps_ = cfg->dump_smaps(); for (auto it = cfg->ignored_types(); it; ++it) { std::string name = (*it).ToStdString(); - ignored_types_.emplace_back(std::move(name)); + ignored_types_.emplace_back(art::InversePrettyDescriptor(name)); } // This tracing session ID matches the requesting tracing session ID, so we know heapprofd // has verified it targets this process. - enabled_ = true; + enabled_ = + !is_oome_heap_ || (IsOomeHeapDumpAllowed(*args.config) && IsOomeDumpEnabled(*cfg.get())); } bool dump_smaps() { return dump_smaps_; } + + // Per-DataSource enable bit. Invoked by the ::Trace method. bool enabled() { return enabled_; } void OnStart(const StartArgs&) override { - if (!enabled()) { - return; - } art::MutexLock lk(art_thread(), GetStateMutex()); + // In case there are multiple tracing sessions waiting for an OOME error, + // there will be a data source instance for each of them. Before the + // transition to kStart and signaling the dumping thread, we need to make + // sure all the data sources are ready. + if (is_oome_heap_ && g_oome_sessions_pending > 0) { + --g_oome_sessions_pending; + } if (g_state == State::kWaitForStart) { - g_state = State::kStart; - GetStateCV().Broadcast(art_thread()); + // WriteHeapPackets is responsible for checking whether the DataSource is\ + // actually enabled. + if (!is_oome_heap_ || g_oome_sessions_pending == 0) { + g_state = State::kStart; + GetStateCV().Broadcast(art_thread()); + } } } @@ -232,10 +282,26 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { } private: + static bool IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder& cfg) { + std::string cmdline; + if (!android::base::ReadFileToString("/proc/self/cmdline", &cmdline)) { + return false; + } + const char* argv0 = cmdline.c_str(); + + for (auto it = cfg.process_cmdline(); it; ++it) { + std::string pattern = (*it).ToStdString(); + if (fnmatch(pattern.c_str(), argv0, FNM_NOESCAPE) == 0) { + return true; + } + } + return false; + } + + bool is_oome_heap_ = false; bool enabled_ = false; bool dump_smaps_ = false; std::vector<std::string> ignored_types_; - static art::Thread* self_; art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock}; bool is_finished_ = false; @@ -243,27 +309,40 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> { std::function<void()> async_stop_; }; -art::Thread* JavaHprofDataSource::self_ = nullptr; - - -void WaitForDataSource(art::Thread* self) { +void SetupDataSource(const std::string& ds_name, bool is_oome_heap) { perfetto::TracingInitArgs args; args.backends = perfetto::BackendType::kSystemBackend; perfetto::Tracing::Initialize(args); perfetto::DataSourceDescriptor dsd; - dsd.set_name("android.java_hprof"); + dsd.set_name(ds_name); dsd.set_will_notify_on_stop(true); - JavaHprofDataSource::Register(dsd); - - LOG(INFO) << "waiting for data source"; + JavaHprofDataSource::Register(dsd, is_oome_heap); + LOG(INFO) << "registered data source " << ds_name; +} +// Waits for the data source OnStart +void WaitForDataSource(art::Thread* self) { art::MutexLock lk(self, GetStateMutex()); while (g_state != State::kStart) { GetStateCV().Wait(self); } } +// Waits for the data source OnStart with a timeout. Returns false on timeout. +bool TimedWaitForDataSource(art::Thread* self, int64_t timeout_ms) { + const uint64_t cutoff_ns = GetCurrentBootClockNs() + timeout_ms * 1000000; + art::MutexLock lk(self, GetStateMutex()); + while (g_state != State::kStart) { + const uint64_t current_ns = GetCurrentBootClockNs(); + if (current_ns >= cutoff_ns) { + return false; + } + GetStateCV().TimedWait(self, (cutoff_ns - current_ns) / 1000000, 0); + } + return true; +} + // Helper class to write Java heap dumps to `ctx`. The whole heap dump can be // split into more perfetto.protos.HeapGraph messages, to avoid making each // message too big. @@ -333,8 +412,9 @@ class Writer { class ReferredObjectsFinder { public: explicit ReferredObjectsFinder( - std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects) - : referred_objects_(referred_objects) {} + std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects, + bool emit_field_ids) + : referred_objects_(referred_objects), emit_field_ids_(emit_field_ids) {} // For art::mirror::Object::VisitReferences. void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset, @@ -352,7 +432,7 @@ class ReferredObjectsFinder { field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value()); } std::string field_name = ""; - if (field != nullptr) { + if (field != nullptr && emit_field_ids_) { field_name = field->PrettyField(/*with_type=*/true); } referred_objects_->emplace_back(std::move(field_name), ref); @@ -367,6 +447,8 @@ class ReferredObjectsFinder { // We can use a raw Object* pointer here, because there are no concurrent GC threads after the // fork. std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_; + // Prettifying field names is expensive; avoid if field name will not be used. + bool emit_field_ids_; }; class RootFinder : public art::SingleRootVisitor { @@ -461,7 +543,7 @@ std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS { } void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) { - FILE* smaps = fopen("/proc/self/smaps", "r"); + FILE* smaps = fopen("/proc/self/smaps", "re"); if (smaps != nullptr) { auto trace_packet = ctx->NewTracePacket(); auto* smaps_packet = trace_packet->set_smaps_packet(); @@ -504,10 +586,11 @@ size_t EncodedSize(uint64_t n) { // Returns all the references that `*obj` (an object of type `*klass`) is holding. std::vector<std::pair<std::string, art::mirror::Object*>> GetReferences(art::mirror::Object* obj, - art::mirror::Class* klass) + art::mirror::Class* klass, + bool emit_field_ids) REQUIRES_SHARED(art::Locks::mutator_lock_) { std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects; - ReferredObjectsFinder objf(&referred_objects); + ReferredObjectsFinder objf(&referred_objects, emit_field_ids); if (klass->GetClassFlags() != art::mirror::kClassFlagNormal && klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference) { @@ -718,17 +801,16 @@ class HeapGraphDumper { art::mirror::Class* klass, perfetto::protos::pbzero::HeapGraphObject* object_proto) REQUIRES_SHARED(art::Locks::mutator_lock_) { + const bool emit_field_ids = klass->GetClassFlags() != art::mirror::kClassFlagObjectArray && + klass->GetClassFlags() != art::mirror::kClassFlagNormal && + klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference; std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects = - GetReferences(obj, klass); + GetReferences(obj, klass, emit_field_ids); art::mirror::Object* min_nonnull_ptr = FilterIgnoredReferencesAndFindMin(referred_objects); uint64_t base_obj_id = EncodeBaseObjId(referred_objects, min_nonnull_ptr); - const bool emit_field_ids = klass->GetClassFlags() != art::mirror::kClassFlagObjectArray && - klass->GetClassFlags() != art::mirror::kClassFlagNormal && - klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference; - for (const auto& p : referred_objects) { const std::string& field_name = p.first; art::mirror::Object* referred_obj = p.second; @@ -806,8 +888,9 @@ class HeapGraphDumper { return false; } art::mirror::Class* klass = obj->GetClass(); - return std::find(ignored_types_.begin(), ignored_types_.end(), PrettyType(klass)) != - ignored_types_.end(); + std::string temp; + std::string_view name(klass->GetDescriptor(&temp)); + return std::find(ignored_types_.begin(), ignored_types_.end(), name) != ignored_types_.end(); } // Name of classes whose instances should be ignored. @@ -831,10 +914,45 @@ class HeapGraphDumper { uint64_t prev_object_id_ = 0; }; -void DumpPerfetto(art::Thread* self) { - pid_t parent_pid = getpid(); - LOG(INFO) << "preparing to dump heap for " << parent_pid; +// waitpid with a timeout implemented by ~busy-waiting +// See b/181031512 for rationale. +void BusyWaitpid(pid_t pid, uint32_t timeout_ms) { + for (size_t i = 0;; ++i) { + if (i == timeout_ms) { + // The child hasn't exited. + // Give up and SIGKILL it. The next waitpid should succeed. + LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL."; + kill(pid, SIGKILL); + } + int stat_loc; + pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG); + if (wait_result == -1 && errno != EINTR) { + if (errno != ECHILD) { + // This hopefully never happens (should only be EINVAL). + PLOG(FATAL_WITHOUT_ABORT) << "waitpid"; + } + // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait. + // The child is no longer here either way, so that's good enough for us. + break; + } else if (wait_result > 0) { + break; + } else { // wait_result == 0 || errno == EINTR. + usleep(1000); + } + } +} + +enum class ResumeParentPolicy { + IMMEDIATELY, + DEFERRED +}; +void ForkAndRun(art::Thread* self, + ResumeParentPolicy resume_parent_policy, + const std::function<void(pid_t child)>& parent_runnable, + const std::function<void(pid_t parent, uint64_t timestamp)>& child_runnable) { + pid_t parent_pid = getpid(); + LOG(INFO) << "forking for " << parent_pid; // Need to take a heap dump while GC isn't running. See the comment in // Heap::VisitObjects(). Also we need the critical section to avoid visiting // the same object twice. See b/34967844. @@ -859,41 +977,20 @@ void DumpPerfetto(art::Thread* self) { } if (pid != 0) { // Parent - // Stop the thread suspension as soon as possible to allow the rest of the application to - // continue while we waitpid here. - ssa.reset(); - gcs.reset(); - for (size_t i = 0;; ++i) { - if (i == 1000) { - // The child hasn't exited for 1 second (and all it was supposed to do was fork itself). - // Give up and SIGKILL it. The next waitpid should succeed. - LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL."; - kill(pid, SIGKILL); - } - // Busy waiting here will introduce some extra latency, but that is okay because we have - // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which - // is not needed for progress of the app itself. - int stat_loc; - pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG); - if (wait_result == -1 && errno != EINTR) { - if (errno != ECHILD) { - // This hopefully never happens (should only be EINVAL). - PLOG(FATAL_WITHOUT_ABORT) << "waitpid"; - } - // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait. - // The child is no longer here either way, so that's good enough for us. - break; - } else if (wait_result > 0) { - break; - } else { // wait_result == 0 || errno == EINTR. - usleep(1000); - } + if (resume_parent_policy == ResumeParentPolicy::IMMEDIATELY) { + // Stop the thread suspension as soon as possible to allow the rest of the application to + // continue while we waitpid here. + ssa.reset(); + gcs.reset(); + } + parent_runnable(pid); + if (resume_parent_policy != ResumeParentPolicy::IMMEDIATELY) { + ssa.reset(); + gcs.reset(); } return; } - // The following code is only executed by the child of the original process. - // Uninstall signal handler, so we don't trigger a profile on it. if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) { close(g_signal_pipe_fds[0]); @@ -902,25 +999,14 @@ void DumpPerfetto(art::Thread* self) { return; } - // Daemon creates a new process that is the grand-child of the original process, and exits. - if (daemon(0, 0) == -1) { - PLOG(FATAL) << "daemon"; - } - - // The following code is only executed by the grand-child of the original process. - - // Make sure that this is the first thing we do after forking, so if anything - // below hangs, the fork will go away from the watchdog. - ArmWatchdogOrDie(); - - struct timespec ts = {}; - if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) { - LOG(FATAL) << "Failed to get boottime."; - } - uint64_t timestamp = ts.tv_sec * 1000000000LL + ts.tv_nsec; - - WaitForDataSource(self); + uint64_t ts = GetCurrentBootClockNs(); + child_runnable(parent_pid, ts); + // Prevent the `atexit` handlers from running. We do not want to call cleanup + // functions the parent process has registered. + art::FastExit(0); +} +void WriteHeapPackets(pid_t parent_pid, uint64_t timestamp) { JavaHprofDataSource::Trace( [parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx) NO_THREAD_SAFETY_ANALYSIS { @@ -968,11 +1054,101 @@ void DumpPerfetto(art::Thread* self) { } } }); +} - LOG(INFO) << "finished dumping heap for " << parent_pid; - // Prevent the `atexit` handlers from running. We do not want to call cleanup - // functions the parent process has registered. - art::FastExit(0); +void DumpPerfetto(art::Thread* self) { + ForkAndRun( + self, + ResumeParentPolicy::IMMEDIATELY, + // parent thread + [](pid_t child) { + // Busy waiting here will introduce some extra latency, but that is okay because we have + // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which + // is not needed for progress of the app itself. + // We daemonize the child process, so effectively we only need to wait + // for it to fork and exit. + BusyWaitpid(child, 1000); + }, + // child thread + [self](pid_t dumped_pid, uint64_t timestamp) { + // Daemon creates a new process that is the grand-child of the original process, and exits. + if (daemon(0, 0) == -1) { + PLOG(FATAL) << "daemon"; + } + // The following code is only executed by the grand-child of the original process. + + // Make sure that this is the first thing we do after forking, so if anything + // below hangs, the fork will go away from the watchdog. + ArmWatchdogOrDie(); + SetupDataSource("android.java_hprof", false); + WaitForDataSource(self); + WriteHeapPackets(dumped_pid, timestamp); + LOG(INFO) << "finished dumping heap for " << dumped_pid; + }); +} + +void DumpPerfettoOutOfMemory() REQUIRES_SHARED(art::Locks::mutator_lock_) { + art::Thread* self = art::Thread::Current(); + if (!self) { + LOG(FATAL_WITHOUT_ABORT) << "no thread in DumpPerfettoOutOfMemory"; + return; + } + + // Ensure that there is an active, armed tracing session + uint32_t session_cnt = + android::base::GetUintProperty<uint32_t>("traced.oome_heap_session.count", 0); + if (session_cnt == 0) { + return; + } + { + // OutOfMemoryErrors are reentrant, make sure we do not fork and process + // more than once. + art::MutexLock lk(self, GetStateMutex()); + if (g_oome_triggered) { + return; + } + g_oome_triggered = true; + g_oome_sessions_pending = session_cnt; + } + + art::ScopedThreadSuspension sts(self, art::ThreadState::kSuspended); + // If we fork & resume the original process execution it will most likely exit + // ~immediately due to the OOME error thrown. When the system detects that + // that, it will cleanup by killing all processes in the cgroup (including + // the process we just forked). + // We need to avoid the race between the heap dump and the process group + // cleanup, and the only way to do this is to avoid resuming the original + // process until the heap dump is complete. + // Given we are already about to crash anyway, the diagnostic data we get + // outweighs the cost of introducing some latency. + ForkAndRun( + self, + ResumeParentPolicy::DEFERRED, + // parent process + [](pid_t child) { + // waitpid to reap the zombie + // we are explicitly waiting for the child to exit + // The reason for the timeout on top of the watchdog is that it is + // possible (albeit unlikely) that even the watchdog will fail to be + // activated in the case of an atfork handler. + BusyWaitpid(child, kWatchdogTimeoutSec * 1000); + }, + // child process + [self](pid_t dumped_pid, uint64_t timestamp) { + ArmWatchdogOrDie(); + art::ScopedTrace trace("perfetto_hprof oome"); + SetupDataSource("android.java_hprof.oom", true); + perfetto::Tracing::ActivateTriggers({"com.android.telemetry.art-outofmemory"}, 500); + + // A pre-armed tracing session might not exist, so we should wait for a + // limited amount of time before we decide to let the execution continue. + if (!TimedWaitForDataSource(self, 1000)) { + LOG(INFO) << "OOME hprof timeout (state " << g_state << ")"; + return; + } + WriteHeapPackets(dumped_pid, timestamp); + LOG(INFO) << "OOME hprof complete for " << dumped_pid; + }); } // The plugin initialization function. @@ -1062,10 +1238,15 @@ extern "C" bool ArtPlugin_Initialize() { }); th.detach(); + // Register the OOM error handler. + art::Runtime::Current()->SetOutOfMemoryErrorHook(perfetto_hprof::DumpPerfettoOutOfMemory); + return true; } extern "C" bool ArtPlugin_Deinitialize() { + art::Runtime::Current()->SetOutOfMemoryErrorHook(nullptr); + if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) { PLOG(ERROR) << "failed to reset signal handler"; // We cannot close the pipe if the signal handler wasn't unregistered, |