summaryrefslogtreecommitdiff
path: root/perfetto_hprof
diff options
context:
space:
mode:
Diffstat (limited to 'perfetto_hprof')
-rw-r--r--perfetto_hprof/Android.bp8
-rw-r--r--perfetto_hprof/perfetto_hprof.cc371
2 files changed, 284 insertions, 95 deletions
diff --git a/perfetto_hprof/Android.bp b/perfetto_hprof/Android.bp
index a81a4fa5e0..8cfc7d4bea 100644
--- a/perfetto_hprof/Android.bp
+++ b/perfetto_hprof/Android.bp
@@ -50,6 +50,7 @@ cc_defaults {
compile_multilib: "both",
shared_libs: [
+ "libartpalette",
"libbase",
"liblog",
],
@@ -68,6 +69,12 @@ cc_defaults {
header_libs: [
"libnativehelper_header_only",
],
+ // FIXME: Workaround LTO build breakage
+ // http://b/241700157
+ lto: {
+ never: true,
+ },
+
}
art_cc_library {
@@ -81,6 +88,7 @@ art_cc_library {
apex_available: [
"com.android.art",
"com.android.art.debug",
+ "test_broken_com.android.art",
],
}
diff --git a/perfetto_hprof/perfetto_hprof.cc b/perfetto_hprof/perfetto_hprof.cc
index 669fb0cac8..906362ab29 100644
--- a/perfetto_hprof/perfetto_hprof.cc
+++ b/perfetto_hprof/perfetto_hprof.cc
@@ -18,9 +18,8 @@
#include "perfetto_hprof.h"
-#include <android-base/logging.h>
-#include <base/fast_exit.h>
#include <fcntl.h>
+#include <fnmatch.h>
#include <inttypes.h>
#include <sched.h>
#include <signal.h>
@@ -36,6 +35,11 @@
#include <optional>
#include <type_traits>
+#include "android-base/file.h"
+#include "android-base/logging.h"
+#include "android-base/properties.h"
+#include "base/fast_exit.h"
+#include "base/systrace.h"
#include "gc/heap-visit-objects-inl.h"
#include "gc/heap.h"
#include "gc/scoped_gc_critical_section.h"
@@ -86,6 +90,8 @@ static art::ConditionVariable& GetStateCV() {
static int requested_tracing_session_id = 0;
static State g_state = State::kUninitialized;
+static bool g_oome_triggered = false;
+static uint32_t g_oome_sessions_pending = 0;
// Pipe to signal from the signal handler into a worker thread that handles the
// dump requests.
@@ -151,19 +157,52 @@ bool ShouldSampleSmapsEntry(const perfetto::profiling::SmapsEntry& e) {
return false;
}
+uint64_t GetCurrentBootClockNs() {
+ struct timespec ts = {};
+ if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
+ LOG(FATAL) << "Failed to get boottime.";
+ }
+ return ts.tv_sec * 1000000000LL + ts.tv_nsec;
+}
+
+bool IsDebugBuild() {
+ std::string build_type = android::base::GetProperty("ro.build.type", "");
+ return !build_type.empty() && build_type != "user";
+}
+
+// Verifies the manifest restrictions are respected.
+// For regular heap dumps this is already handled by heapprofd.
+bool IsOomeHeapDumpAllowed(const perfetto::DataSourceConfig& ds_config) {
+ if (art::Runtime::Current()->IsJavaDebuggable() || IsDebugBuild()) {
+ return true;
+ }
+
+ if (ds_config.session_initiator() ==
+ perfetto::DataSourceConfig::SESSION_INITIATOR_TRUSTED_SYSTEM) {
+ return art::Runtime::Current()->IsProfileable() || art::Runtime::Current()->IsSystemServer();
+ } else {
+ return art::Runtime::Current()->IsProfileableFromShell();
+ }
+}
+
class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
public:
constexpr static perfetto::BufferExhaustedPolicy kBufferExhaustedPolicy =
perfetto::BufferExhaustedPolicy::kStall;
+
+ explicit JavaHprofDataSource(bool is_oome_heap) : is_oome_heap_(is_oome_heap) {}
+
void OnSetup(const SetupArgs& args) override {
- uint64_t normalized_cfg_tracing_session_id =
- args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
- if (requested_tracing_session_id < 0) {
- LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
- return;
- }
- if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_cfg_tracing_session_id) {
- return;
+ if (!is_oome_heap_) {
+ uint64_t normalized_tracing_session_id =
+ args.config->tracing_session_id() % std::numeric_limits<int32_t>::max();
+ if (requested_tracing_session_id < 0) {
+ LOG(ERROR) << "invalid requested tracing session id " << requested_tracing_session_id;
+ return;
+ }
+ if (static_cast<uint64_t>(requested_tracing_session_id) != normalized_tracing_session_id) {
+ return;
+ }
}
// This is on the heap as it triggers -Wframe-larger-than.
@@ -174,24 +213,35 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
dump_smaps_ = cfg->dump_smaps();
for (auto it = cfg->ignored_types(); it; ++it) {
std::string name = (*it).ToStdString();
- ignored_types_.emplace_back(std::move(name));
+ ignored_types_.emplace_back(art::InversePrettyDescriptor(name));
}
// This tracing session ID matches the requesting tracing session ID, so we know heapprofd
// has verified it targets this process.
- enabled_ = true;
+ enabled_ =
+ !is_oome_heap_ || (IsOomeHeapDumpAllowed(*args.config) && IsOomeDumpEnabled(*cfg.get()));
}
bool dump_smaps() { return dump_smaps_; }
+
+ // Per-DataSource enable bit. Invoked by the ::Trace method.
bool enabled() { return enabled_; }
void OnStart(const StartArgs&) override {
- if (!enabled()) {
- return;
- }
art::MutexLock lk(art_thread(), GetStateMutex());
+ // In case there are multiple tracing sessions waiting for an OOME error,
+ // there will be a data source instance for each of them. Before the
+ // transition to kStart and signaling the dumping thread, we need to make
+ // sure all the data sources are ready.
+ if (is_oome_heap_ && g_oome_sessions_pending > 0) {
+ --g_oome_sessions_pending;
+ }
if (g_state == State::kWaitForStart) {
- g_state = State::kStart;
- GetStateCV().Broadcast(art_thread());
+ // WriteHeapPackets is responsible for checking whether the DataSource is\
+ // actually enabled.
+ if (!is_oome_heap_ || g_oome_sessions_pending == 0) {
+ g_state = State::kStart;
+ GetStateCV().Broadcast(art_thread());
+ }
}
}
@@ -232,10 +282,26 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
}
private:
+ static bool IsOomeDumpEnabled(const perfetto::protos::pbzero::JavaHprofConfig::Decoder& cfg) {
+ std::string cmdline;
+ if (!android::base::ReadFileToString("/proc/self/cmdline", &cmdline)) {
+ return false;
+ }
+ const char* argv0 = cmdline.c_str();
+
+ for (auto it = cfg.process_cmdline(); it; ++it) {
+ std::string pattern = (*it).ToStdString();
+ if (fnmatch(pattern.c_str(), argv0, FNM_NOESCAPE) == 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool is_oome_heap_ = false;
bool enabled_ = false;
bool dump_smaps_ = false;
std::vector<std::string> ignored_types_;
- static art::Thread* self_;
art::Mutex finish_mutex_{"perfetto_hprof_ds_mutex", art::LockLevel::kGenericBottomLock};
bool is_finished_ = false;
@@ -243,27 +309,40 @@ class JavaHprofDataSource : public perfetto::DataSource<JavaHprofDataSource> {
std::function<void()> async_stop_;
};
-art::Thread* JavaHprofDataSource::self_ = nullptr;
-
-
-void WaitForDataSource(art::Thread* self) {
+void SetupDataSource(const std::string& ds_name, bool is_oome_heap) {
perfetto::TracingInitArgs args;
args.backends = perfetto::BackendType::kSystemBackend;
perfetto::Tracing::Initialize(args);
perfetto::DataSourceDescriptor dsd;
- dsd.set_name("android.java_hprof");
+ dsd.set_name(ds_name);
dsd.set_will_notify_on_stop(true);
- JavaHprofDataSource::Register(dsd);
-
- LOG(INFO) << "waiting for data source";
+ JavaHprofDataSource::Register(dsd, is_oome_heap);
+ LOG(INFO) << "registered data source " << ds_name;
+}
+// Waits for the data source OnStart
+void WaitForDataSource(art::Thread* self) {
art::MutexLock lk(self, GetStateMutex());
while (g_state != State::kStart) {
GetStateCV().Wait(self);
}
}
+// Waits for the data source OnStart with a timeout. Returns false on timeout.
+bool TimedWaitForDataSource(art::Thread* self, int64_t timeout_ms) {
+ const uint64_t cutoff_ns = GetCurrentBootClockNs() + timeout_ms * 1000000;
+ art::MutexLock lk(self, GetStateMutex());
+ while (g_state != State::kStart) {
+ const uint64_t current_ns = GetCurrentBootClockNs();
+ if (current_ns >= cutoff_ns) {
+ return false;
+ }
+ GetStateCV().TimedWait(self, (cutoff_ns - current_ns) / 1000000, 0);
+ }
+ return true;
+}
+
// Helper class to write Java heap dumps to `ctx`. The whole heap dump can be
// split into more perfetto.protos.HeapGraph messages, to avoid making each
// message too big.
@@ -333,8 +412,9 @@ class Writer {
class ReferredObjectsFinder {
public:
explicit ReferredObjectsFinder(
- std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects)
- : referred_objects_(referred_objects) {}
+ std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects,
+ bool emit_field_ids)
+ : referred_objects_(referred_objects), emit_field_ids_(emit_field_ids) {}
// For art::mirror::Object::VisitReferences.
void operator()(art::ObjPtr<art::mirror::Object> obj, art::MemberOffset offset,
@@ -352,7 +432,7 @@ class ReferredObjectsFinder {
field = art::ArtField::FindInstanceFieldWithOffset(obj->GetClass(), offset.Uint32Value());
}
std::string field_name = "";
- if (field != nullptr) {
+ if (field != nullptr && emit_field_ids_) {
field_name = field->PrettyField(/*with_type=*/true);
}
referred_objects_->emplace_back(std::move(field_name), ref);
@@ -367,6 +447,8 @@ class ReferredObjectsFinder {
// We can use a raw Object* pointer here, because there are no concurrent GC threads after the
// fork.
std::vector<std::pair<std::string, art::mirror::Object*>>* referred_objects_;
+ // Prettifying field names is expensive; avoid if field name will not be used.
+ bool emit_field_ids_;
};
class RootFinder : public art::SingleRootVisitor {
@@ -461,7 +543,7 @@ std::string PrettyType(art::mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS {
}
void DumpSmaps(JavaHprofDataSource::TraceContext* ctx) {
- FILE* smaps = fopen("/proc/self/smaps", "r");
+ FILE* smaps = fopen("/proc/self/smaps", "re");
if (smaps != nullptr) {
auto trace_packet = ctx->NewTracePacket();
auto* smaps_packet = trace_packet->set_smaps_packet();
@@ -504,10 +586,11 @@ size_t EncodedSize(uint64_t n) {
// Returns all the references that `*obj` (an object of type `*klass`) is holding.
std::vector<std::pair<std::string, art::mirror::Object*>> GetReferences(art::mirror::Object* obj,
- art::mirror::Class* klass)
+ art::mirror::Class* klass,
+ bool emit_field_ids)
REQUIRES_SHARED(art::Locks::mutator_lock_) {
std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects;
- ReferredObjectsFinder objf(&referred_objects);
+ ReferredObjectsFinder objf(&referred_objects, emit_field_ids);
if (klass->GetClassFlags() != art::mirror::kClassFlagNormal &&
klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference) {
@@ -718,17 +801,16 @@ class HeapGraphDumper {
art::mirror::Class* klass,
perfetto::protos::pbzero::HeapGraphObject* object_proto)
REQUIRES_SHARED(art::Locks::mutator_lock_) {
+ const bool emit_field_ids = klass->GetClassFlags() != art::mirror::kClassFlagObjectArray &&
+ klass->GetClassFlags() != art::mirror::kClassFlagNormal &&
+ klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference;
std::vector<std::pair<std::string, art::mirror::Object*>> referred_objects =
- GetReferences(obj, klass);
+ GetReferences(obj, klass, emit_field_ids);
art::mirror::Object* min_nonnull_ptr = FilterIgnoredReferencesAndFindMin(referred_objects);
uint64_t base_obj_id = EncodeBaseObjId(referred_objects, min_nonnull_ptr);
- const bool emit_field_ids = klass->GetClassFlags() != art::mirror::kClassFlagObjectArray &&
- klass->GetClassFlags() != art::mirror::kClassFlagNormal &&
- klass->GetClassFlags() != art::mirror::kClassFlagPhantomReference;
-
for (const auto& p : referred_objects) {
const std::string& field_name = p.first;
art::mirror::Object* referred_obj = p.second;
@@ -806,8 +888,9 @@ class HeapGraphDumper {
return false;
}
art::mirror::Class* klass = obj->GetClass();
- return std::find(ignored_types_.begin(), ignored_types_.end(), PrettyType(klass)) !=
- ignored_types_.end();
+ std::string temp;
+ std::string_view name(klass->GetDescriptor(&temp));
+ return std::find(ignored_types_.begin(), ignored_types_.end(), name) != ignored_types_.end();
}
// Name of classes whose instances should be ignored.
@@ -831,10 +914,45 @@ class HeapGraphDumper {
uint64_t prev_object_id_ = 0;
};
-void DumpPerfetto(art::Thread* self) {
- pid_t parent_pid = getpid();
- LOG(INFO) << "preparing to dump heap for " << parent_pid;
+// waitpid with a timeout implemented by ~busy-waiting
+// See b/181031512 for rationale.
+void BusyWaitpid(pid_t pid, uint32_t timeout_ms) {
+ for (size_t i = 0;; ++i) {
+ if (i == timeout_ms) {
+ // The child hasn't exited.
+ // Give up and SIGKILL it. The next waitpid should succeed.
+ LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
+ kill(pid, SIGKILL);
+ }
+ int stat_loc;
+ pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
+ if (wait_result == -1 && errno != EINTR) {
+ if (errno != ECHILD) {
+ // This hopefully never happens (should only be EINVAL).
+ PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
+ }
+ // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
+ // The child is no longer here either way, so that's good enough for us.
+ break;
+ } else if (wait_result > 0) {
+ break;
+ } else { // wait_result == 0 || errno == EINTR.
+ usleep(1000);
+ }
+ }
+}
+
+enum class ResumeParentPolicy {
+ IMMEDIATELY,
+ DEFERRED
+};
+void ForkAndRun(art::Thread* self,
+ ResumeParentPolicy resume_parent_policy,
+ const std::function<void(pid_t child)>& parent_runnable,
+ const std::function<void(pid_t parent, uint64_t timestamp)>& child_runnable) {
+ pid_t parent_pid = getpid();
+ LOG(INFO) << "forking for " << parent_pid;
// Need to take a heap dump while GC isn't running. See the comment in
// Heap::VisitObjects(). Also we need the critical section to avoid visiting
// the same object twice. See b/34967844.
@@ -859,41 +977,20 @@ void DumpPerfetto(art::Thread* self) {
}
if (pid != 0) {
// Parent
- // Stop the thread suspension as soon as possible to allow the rest of the application to
- // continue while we waitpid here.
- ssa.reset();
- gcs.reset();
- for (size_t i = 0;; ++i) {
- if (i == 1000) {
- // The child hasn't exited for 1 second (and all it was supposed to do was fork itself).
- // Give up and SIGKILL it. The next waitpid should succeed.
- LOG(ERROR) << "perfetto_hprof child timed out. Sending SIGKILL.";
- kill(pid, SIGKILL);
- }
- // Busy waiting here will introduce some extra latency, but that is okay because we have
- // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
- // is not needed for progress of the app itself.
- int stat_loc;
- pid_t wait_result = waitpid(pid, &stat_loc, WNOHANG);
- if (wait_result == -1 && errno != EINTR) {
- if (errno != ECHILD) {
- // This hopefully never happens (should only be EINVAL).
- PLOG(FATAL_WITHOUT_ABORT) << "waitpid";
- }
- // If we get ECHILD, the parent process was handling SIGCHLD, or did a wildcard wait.
- // The child is no longer here either way, so that's good enough for us.
- break;
- } else if (wait_result > 0) {
- break;
- } else { // wait_result == 0 || errno == EINTR.
- usleep(1000);
- }
+ if (resume_parent_policy == ResumeParentPolicy::IMMEDIATELY) {
+ // Stop the thread suspension as soon as possible to allow the rest of the application to
+ // continue while we waitpid here.
+ ssa.reset();
+ gcs.reset();
+ }
+ parent_runnable(pid);
+ if (resume_parent_policy != ResumeParentPolicy::IMMEDIATELY) {
+ ssa.reset();
+ gcs.reset();
}
return;
}
-
// The following code is only executed by the child of the original process.
-
// Uninstall signal handler, so we don't trigger a profile on it.
if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
close(g_signal_pipe_fds[0]);
@@ -902,25 +999,14 @@ void DumpPerfetto(art::Thread* self) {
return;
}
- // Daemon creates a new process that is the grand-child of the original process, and exits.
- if (daemon(0, 0) == -1) {
- PLOG(FATAL) << "daemon";
- }
-
- // The following code is only executed by the grand-child of the original process.
-
- // Make sure that this is the first thing we do after forking, so if anything
- // below hangs, the fork will go away from the watchdog.
- ArmWatchdogOrDie();
-
- struct timespec ts = {};
- if (clock_gettime(CLOCK_BOOTTIME, &ts) != 0) {
- LOG(FATAL) << "Failed to get boottime.";
- }
- uint64_t timestamp = ts.tv_sec * 1000000000LL + ts.tv_nsec;
-
- WaitForDataSource(self);
+ uint64_t ts = GetCurrentBootClockNs();
+ child_runnable(parent_pid, ts);
+ // Prevent the `atexit` handlers from running. We do not want to call cleanup
+ // functions the parent process has registered.
+ art::FastExit(0);
+}
+void WriteHeapPackets(pid_t parent_pid, uint64_t timestamp) {
JavaHprofDataSource::Trace(
[parent_pid, timestamp](JavaHprofDataSource::TraceContext ctx)
NO_THREAD_SAFETY_ANALYSIS {
@@ -968,11 +1054,101 @@ void DumpPerfetto(art::Thread* self) {
}
}
});
+}
- LOG(INFO) << "finished dumping heap for " << parent_pid;
- // Prevent the `atexit` handlers from running. We do not want to call cleanup
- // functions the parent process has registered.
- art::FastExit(0);
+void DumpPerfetto(art::Thread* self) {
+ ForkAndRun(
+ self,
+ ResumeParentPolicy::IMMEDIATELY,
+ // parent thread
+ [](pid_t child) {
+ // Busy waiting here will introduce some extra latency, but that is okay because we have
+ // already unsuspended all other threads. This runs on the perfetto_hprof_listener, which
+ // is not needed for progress of the app itself.
+ // We daemonize the child process, so effectively we only need to wait
+ // for it to fork and exit.
+ BusyWaitpid(child, 1000);
+ },
+ // child thread
+ [self](pid_t dumped_pid, uint64_t timestamp) {
+ // Daemon creates a new process that is the grand-child of the original process, and exits.
+ if (daemon(0, 0) == -1) {
+ PLOG(FATAL) << "daemon";
+ }
+ // The following code is only executed by the grand-child of the original process.
+
+ // Make sure that this is the first thing we do after forking, so if anything
+ // below hangs, the fork will go away from the watchdog.
+ ArmWatchdogOrDie();
+ SetupDataSource("android.java_hprof", false);
+ WaitForDataSource(self);
+ WriteHeapPackets(dumped_pid, timestamp);
+ LOG(INFO) << "finished dumping heap for " << dumped_pid;
+ });
+}
+
+void DumpPerfettoOutOfMemory() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+ art::Thread* self = art::Thread::Current();
+ if (!self) {
+ LOG(FATAL_WITHOUT_ABORT) << "no thread in DumpPerfettoOutOfMemory";
+ return;
+ }
+
+ // Ensure that there is an active, armed tracing session
+ uint32_t session_cnt =
+ android::base::GetUintProperty<uint32_t>("traced.oome_heap_session.count", 0);
+ if (session_cnt == 0) {
+ return;
+ }
+ {
+ // OutOfMemoryErrors are reentrant, make sure we do not fork and process
+ // more than once.
+ art::MutexLock lk(self, GetStateMutex());
+ if (g_oome_triggered) {
+ return;
+ }
+ g_oome_triggered = true;
+ g_oome_sessions_pending = session_cnt;
+ }
+
+ art::ScopedThreadSuspension sts(self, art::ThreadState::kSuspended);
+ // If we fork & resume the original process execution it will most likely exit
+ // ~immediately due to the OOME error thrown. When the system detects that
+ // that, it will cleanup by killing all processes in the cgroup (including
+ // the process we just forked).
+ // We need to avoid the race between the heap dump and the process group
+ // cleanup, and the only way to do this is to avoid resuming the original
+ // process until the heap dump is complete.
+ // Given we are already about to crash anyway, the diagnostic data we get
+ // outweighs the cost of introducing some latency.
+ ForkAndRun(
+ self,
+ ResumeParentPolicy::DEFERRED,
+ // parent process
+ [](pid_t child) {
+ // waitpid to reap the zombie
+ // we are explicitly waiting for the child to exit
+ // The reason for the timeout on top of the watchdog is that it is
+ // possible (albeit unlikely) that even the watchdog will fail to be
+ // activated in the case of an atfork handler.
+ BusyWaitpid(child, kWatchdogTimeoutSec * 1000);
+ },
+ // child process
+ [self](pid_t dumped_pid, uint64_t timestamp) {
+ ArmWatchdogOrDie();
+ art::ScopedTrace trace("perfetto_hprof oome");
+ SetupDataSource("android.java_hprof.oom", true);
+ perfetto::Tracing::ActivateTriggers({"com.android.telemetry.art-outofmemory"}, 500);
+
+ // A pre-armed tracing session might not exist, so we should wait for a
+ // limited amount of time before we decide to let the execution continue.
+ if (!TimedWaitForDataSource(self, 1000)) {
+ LOG(INFO) << "OOME hprof timeout (state " << g_state << ")";
+ return;
+ }
+ WriteHeapPackets(dumped_pid, timestamp);
+ LOG(INFO) << "OOME hprof complete for " << dumped_pid;
+ });
}
// The plugin initialization function.
@@ -1062,10 +1238,15 @@ extern "C" bool ArtPlugin_Initialize() {
});
th.detach();
+ // Register the OOM error handler.
+ art::Runtime::Current()->SetOutOfMemoryErrorHook(perfetto_hprof::DumpPerfettoOutOfMemory);
+
return true;
}
extern "C" bool ArtPlugin_Deinitialize() {
+ art::Runtime::Current()->SetOutOfMemoryErrorHook(nullptr);
+
if (sigaction(kJavaHeapprofdSignal, &g_orig_act, nullptr) != 0) {
PLOG(ERROR) << "failed to reset signal handler";
// We cannot close the pipe if the signal handler wasn't unregistered,