ART/Perfetto Java Heap Profiler
Adding a Sampling Java Heap Profiler to ART and its interface to Perfetto.
This cl is the first cl (CL1) in a series of cls described as below to implement the full ART/Perfetto Java Heap Profiler.
CL1: ART Java Heap Profiler. This is the main ART sampling profiler code. Tested using the ART testrunner as below as well as VLOG.
CL2: Uncomment APEX code ART side. CL3: Add APEX code Perfetto side.
CL2 and CL3 will be submitted simultaneously to avoid build failures and to add APEX dependencies.
CL4: Uncomment Perfetto API code. To be reviewed by fmayer@ (Perfetto Team). Further Testing, could be full feature testing including Perfetto at this point.
CL5: Further tests and/or optimizations can be added as needed.
Test: Passing Tests
test/testrunner/testrunner.py --host --debug -b
test/testrunner/testrunner.py --host --debug -b --64 -t 004-ThreadStress
test/testrunner/testrunner.py --host --runtime-option=-XX:PerfettoJavaHeapStackProf=true --debug -b
test/testrunner/testrunner.py --host --runtime-option=-XX:PerfettoJavaHeapStackProf=true --debug -b --64 -t 004-ThreadStress
Individualized and VLOG testing.
Bug: 160214819
Change-Id: I2be4c4e715ce8c3c8ac545e3e14332198b9c2295
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8ad00ad..ca82dcf 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -142,6 +142,7 @@
"interpreter/shadow_frame.cc",
"interpreter/unstarted_runtime.cc",
"java_frame_root_info.cc",
+ "javaheapprof/javaheapsampler.cc",
"jit/debugger_interface.cc",
"jit/jit.cc",
"jit/jit_code_cache.cc",
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4a03e61..4ca6bf7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -94,6 +94,7 @@
#include "obj_ptr-inl.h"
#include "reflection.h"
#include "runtime.h"
+#include "javaheapprof/javaheapsampler.h"
#include "scoped_thread_state_change-inl.h"
#include "thread_list.h"
#include "verify_object-inl.h"
@@ -348,6 +349,7 @@
kGcCountRateMaxBucketCount),
alloc_tracking_enabled_(false),
alloc_record_depth_(AllocRecordObjectMap::kDefaultAllocStackDepth),
+ perfetto_javaheapprof_heapid_(0),
backtrace_lock_(nullptr),
seen_backtrace_count_(0u),
unique_backtrace_count_(0u),
@@ -742,6 +744,15 @@
LOG(FATAL) << "There's a gap between the image space and the non-moving space";
}
}
+ // Perfetto Java Heap Profiler Support.
+ if (runtime->IsPerfettoJavaHeapStackProfEnabled()) {
+ // Perfetto Plugin is loaded and enabled, initialize the Java Heap Profiler.
+ InitPerfettoJavaHeapProf();
+ } else {
+ // Disable the Java Heap Profiler.
+ GetHeapSampler().DisableHeapSampler(/*disable_ptr=*/nullptr, /*disable_info_ptr=*/nullptr);
+ }
+
instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
if (gc_stress_mode_) {
backtrace_lock_ = new Mutex("GC complete lock");
@@ -4031,6 +4042,71 @@
}
}
+// Perfetto Java Heap Profiler Support.
+
+// Perfetto initialization.
+void Heap::InitPerfettoJavaHeapProf() {
+ // Register the heap and create the heapid.
+ // Use a heap name = "HeapSampler".
+ // Initialize Perfetto Heap info and Heap id.
+ static uint32_t heap_id = 1; // Initialize to 1, to be overwritten by Perfetto heap id.
+ SetPerfettoJavaHeapProfHeapID(heap_id);
+ // Enable the Java Heap Profiler.
+ GetHeapSampler().EnableHeapSampler(/*enable_ptr=*/nullptr, /*enable_info_ptr=*/nullptr);
+ // Set the Enable Callback, there is no callback data ("nullptr").
+ // Set the Disable Callback.
+ VLOG(heap) << "Java Heap Profiler Initialized";
+}
+
+// Check if the Java Heap Profiler is enabled and initialized.
+int Heap::CheckPerfettoJHPEnabled() {
+ return GetHeapSampler().IsEnabled();
+}
+
+void Heap::JHPCheckNonTlabSampleAllocation(Thread* self, mirror::Object* ret, size_t alloc_size) {
+ bool take_sample = false;
+ size_t bytes_until_sample = 0;
+ HeapSampler& prof_heap_sampler = GetHeapSampler();
+ if (ret != nullptr && prof_heap_sampler.IsEnabled()) {
+ // An allocation occurred, sample it, even if non-Tlab.
+ // In case take_sample is already set from the previous GetSampleOffset
+ // because we tried the Tlab allocation first, we will not use this value.
+ // A new value is generated below. Also bytes_until_sample will be updated.
+ // Note that we are not using the return value from the GetSampleOffset in
+ // the NonTlab case here.
+ prof_heap_sampler.GetSampleOffset(alloc_size,
+ self->GetTlabPosOffset(),
+ &take_sample,
+ &bytes_until_sample);
+ prof_heap_sampler.SetBytesUntilSample(bytes_until_sample);
+ if (take_sample) {
+ prof_heap_sampler.ReportSample(ret, alloc_size);
+ }
+ VLOG(heap) << "JHP:NonTlab:AllocNonvirtual";
+ }
+}
+
+size_t Heap::JHPCalculateNextTlabSize(Thread* self,
+ size_t jhp_def_tlab_size,
+ size_t alloc_size,
+ bool* take_sample,
+ size_t* bytes_until_sample) {
+ size_t next_tlab_size = jhp_def_tlab_size;
+ if (CheckPerfettoJHPEnabled()) {
+ size_t next_sample_point =
+ GetHeapSampler().GetSampleOffset(alloc_size,
+ self->GetTlabPosOffset(),
+ take_sample,
+ bytes_until_sample);
+ next_tlab_size = std::min(next_sample_point, jhp_def_tlab_size);
+ }
+ return next_tlab_size;
+}
+
+void Heap::AdjustSampleOffset(size_t adjustment) {
+ GetHeapSampler().AdjustSampleOffset(adjustment);
+}
+
void Heap::CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj) {
DCHECK(gc_stress_mode_);
auto* const runtime = Runtime::Current();
@@ -4117,14 +4193,23 @@
size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
+ mirror::Object* ret = nullptr;
+ bool take_sample = false;
+ size_t bytes_until_sample = 0;
+
if (kUsePartialTlabs && alloc_size <= self->TlabRemainingCapacity()) {
DCHECK_GT(alloc_size, self->TlabSize());
// There is enough space if we grow the TLAB. Lets do that. This increases the
// TLAB bytes.
const size_t min_expand_size = alloc_size - self->TlabSize();
+ size_t next_tlab_size = JHPCalculateNextTlabSize(self,
+ kPartialTlabSize,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
const size_t expand_bytes = std::max(
min_expand_size,
- std::min(self->TlabRemainingCapacity() - self->TlabSize(), kPartialTlabSize));
+ std::min(self->TlabRemainingCapacity() - self->TlabSize(), next_tlab_size));
if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, expand_bytes, grow))) {
return nullptr;
}
@@ -4133,7 +4218,12 @@
DCHECK_LE(alloc_size, self->TlabSize());
} else if (allocator_type == kAllocatorTypeTLAB) {
DCHECK(bump_pointer_space_ != nullptr);
- const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+ size_t next_tlab_size = JHPCalculateNextTlabSize(self,
+ kDefaultTLABSize,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
+ const size_t new_tlab_size = alloc_size + next_tlab_size;
if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
return nullptr;
}
@@ -4143,6 +4233,9 @@
return nullptr;
}
*bytes_tl_bulk_allocated = new_tlab_size;
+ if (CheckPerfettoJHPEnabled()) {
+ VLOG(heap) << "JHP:kAllocatorTypeTLAB, New Tlab bytes allocated= " << new_tlab_size;
+ }
} else {
DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
DCHECK(region_space_ != nullptr);
@@ -4151,25 +4244,37 @@
if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
space::RegionSpace::kRegionSize,
grow))) {
+ size_t def_pr_tlab_size = kUsePartialTlabs
+ ? kPartialTlabSize
+ : gc::space::RegionSpace::kRegionSize;
+ size_t next_pr_tlab_size = JHPCalculateNextTlabSize(self,
+ def_pr_tlab_size,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
const size_t new_tlab_size = kUsePartialTlabs
- ? std::max(alloc_size, kPartialTlabSize)
- : gc::space::RegionSpace::kRegionSize;
+ ? std::max(alloc_size, next_pr_tlab_size)
+ : next_pr_tlab_size;
// Try to allocate a tlab.
if (!region_space_->AllocNewTlab(self, new_tlab_size, bytes_tl_bulk_allocated)) {
// Failed to allocate a tlab. Try non-tlab.
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
// Fall-through to using the TLAB below.
} else {
// Check OOME for a non-tlab allocation.
if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
// Neither tlab or non-tlab works. Give up.
return nullptr;
@@ -4177,19 +4282,34 @@
} else {
// Large. Check OOME.
if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
return nullptr;
}
}
// Refilled TLAB, return.
- mirror::Object* ret = self->AllocTlab(alloc_size);
+ ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
*usable_size = alloc_size;
+
+ // JavaHeapProfiler: Send the thread information about this allocation in case a sample is
+ // requested.
+ // This is the fallthrough from both the if and else if above cases => Cases that use TLAB.
+ if (CheckPerfettoJHPEnabled()) {
+ if (take_sample) {
+ GetHeapSampler().ReportSample(ret, alloc_size);
+ // Update the bytes_until_sample now that the allocation is already done.
+ GetHeapSampler().SetBytesUntilSample(bytes_until_sample);
+ }
+ VLOG(heap) << "JHP:Fallthrough Tlab allocation";
+ }
+
return ret;
}
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 99da008..7cc06a7 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -834,6 +834,38 @@
void DumpGcCountRateHistogram(std::ostream& os) const REQUIRES(!*gc_complete_lock_);
void DumpBlockingGcCountRateHistogram(std::ostream& os) const REQUIRES(!*gc_complete_lock_);
+ // Perfetto Art Heap Profiler Support.
+ // HeapID is a heap identifier used by the Perfetto API and is used in allocation reporting
+ // to Perfetto API.
+ void SetPerfettoJavaHeapProfHeapID(uint32_t heapid) {
+ perfetto_javaheapprof_heapid_ = heapid;
+ }
+
+ uint32_t GetPerfettoJavaHeapProfHeapID() const {
+ return perfetto_javaheapprof_heapid_;
+ }
+
+ HeapSampler& GetHeapSampler() {
+ return heap_sampler_;
+ }
+
+ void InitPerfettoJavaHeapProf();
+ int CheckPerfettoJHPEnabled();
+ // In NonTlab case: Check whether we should report a sample allocation and if so report it.
+ // Also update state (bytes_until_sample).
+ void JHPCheckNonTlabSampleAllocation(Thread* self,
+ mirror::Object* ret,
+ size_t alloc_size);
+ // In Tlab case: Calculate the next tlab size (location of next sample point) and whether
+ // a sample should be taken.
+ size_t JHPCalculateNextTlabSize(Thread* self,
+ size_t jhp_def_tlab_size,
+ size_t alloc_size,
+ bool* take_sample,
+ size_t* bytes_until_sample);
+ // Reduce the number of bytes to the next sample position by this adjustment.
+ void AdjustSampleOffset(size_t adjustment);
+
// Allocation tracking support
// Callers to this function use double-checked locking to ensure safety on allocation_records_
bool IsAllocTrackingEnabled() const {
@@ -1571,6 +1603,10 @@
std::unique_ptr<AllocRecordObjectMap> allocation_records_;
size_t alloc_record_depth_;
+ // Perfetto Java Heap Profiler support.
+ uint32_t perfetto_javaheapprof_heapid_;
+ HeapSampler heap_sampler_;
+
// GC stress related data structures.
Mutex* backtrace_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
// Debugging variables, seen backtraces vs unique backtraces.
diff --git a/runtime/javaheapprof/javaheapsampler.cc b/runtime/javaheapprof/javaheapsampler.cc
new file mode 100644
index 0000000..a1c58d8
--- /dev/null
+++ b/runtime/javaheapprof/javaheapsampler.cc
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/atomic.h"
+#include "base/locks.h"
+#include "gc/heap.h"
+#include "javaheapprof/javaheapsampler.h"
+#include "runtime.h"
+
+namespace art {
+
+size_t HeapSampler::NextGeoDistRandSample() {
+ // Make sure that rng_ and geo_dist are thread safe by acquiring a lock to access.
+ art::MutexLock mu(art::Thread::Current(), geo_dist_rng_lock_);
+ size_t nsample = geo_dist_(rng_);
+ if (nsample == 0) {
+ // Geometric distribution results in +ve values but could have zero.
+ // In the zero case, return 1.
+ nsample = 1;
+ }
+ return nsample;
+}
+
+size_t HeapSampler::PickAndAdjustNextSample(size_t sample_adjust_bytes) {
+ size_t bytes_until_sample;
+ if (GetSamplingInterval() == 1) {
+ bytes_until_sample = 1;
+ return bytes_until_sample;
+ }
+ bytes_until_sample = NextGeoDistRandSample();
+ VLOG(heap) << "JHP:PickAndAdjustNextSample, sample_adjust_bytes: "
+ << sample_adjust_bytes
+ << " bytes_until_sample: " << bytes_until_sample;
+ // Adjust the sample bytes
+ if (sample_adjust_bytes > 0 && bytes_until_sample > sample_adjust_bytes) {
+ bytes_until_sample -= sample_adjust_bytes;
+ VLOG(heap) << "JHP:PickAndAdjustNextSample, final bytes_until_sample: "
+ << bytes_until_sample;
+ }
+ return bytes_until_sample;
+}
+
+// Report to Perfetto an allocation sample.
+// Samples can only be reported after the allocation is done.
+// Also bytes_until_sample can only be updated after the allocation and reporting is done.
+// Thus next bytes_until_sample is previously calculated (before allocation) to be able to
+// get the next tlab_size, but only saved/updated here.
+void HeapSampler::ReportSample(art::mirror::Object* obj ATTRIBUTE_UNUSED, size_t allocation_size) {
+ VLOG(heap) << "JHP:***Report Perfetto Allocation: alloc_size: " << allocation_size;
+}
+
+// Check whether we should take a sample or not at this allocation and calculate the sample
+// offset to use in the expand Tlab calculation. Thus the offset from current pos to the next
+// sample.
+// tlab_used = pos - start
+size_t HeapSampler::GetSampleOffset(size_t alloc_size,
+ size_t tlab_used,
+ bool* take_sample,
+ size_t* temp_bytes_until_sample) {
+ size_t exhausted_size = alloc_size + tlab_used;
+ VLOG(heap) << "JHP:GetSampleOffset: exhausted_size = " << exhausted_size;
+ // Note bytes_until_sample is used as an offset from the start point
+ size_t bytes_until_sample = *GetBytesUntilSample();
+ ssize_t diff = bytes_until_sample - exhausted_size;
+ VLOG(heap) << "JHP:GetSampleOffset: diff = " << diff << " bytes_until_sample = "
+ << bytes_until_sample;
+ if (diff <= 0) {
+ *take_sample = true;
+ // Compute a new bytes_until_sample
+ size_t sample_adj_bytes = -diff;
+ size_t next_bytes_until_sample = PickAndAdjustNextSample(sample_adj_bytes);
+ VLOG(heap) << "JHP:GetSampleOffset: Take sample, next_bytes_until_sample = "
+ << next_bytes_until_sample;
+ next_bytes_until_sample += tlab_used;
+ VLOG(heap) << "JHP:GetSampleOffset:Next sample offset = "
+ << (next_bytes_until_sample - tlab_used);
+ // This function is called before the actual allocation happens so we cannot update
+ // the bytes_until_sample till after the allocation happens, save it to temp which
+ // will be saved after the allocation by the calling function.
+ *temp_bytes_until_sample = next_bytes_until_sample;
+ return (next_bytes_until_sample - tlab_used);
+ // original bytes_until_sample, not offseted
+ } else {
+ *take_sample = false;
+ // The following 2 lines are used in the NonTlab case but have no effect on the
+ // Tlab case, because we will only use the temp_bytes_until_sample if the
+ // take_sample was true (after returning from this function in Tlab case in the
+ // SetBytesUntilSample).
+ size_t next_bytes_until_sample = bytes_until_sample - alloc_size;
+ *temp_bytes_until_sample = next_bytes_until_sample;
+ VLOG(heap) << "JHP:GetSampleOffset: No sample, next_bytes_until_sample= "
+ << next_bytes_until_sample << " alloc= " << alloc_size;
+ return diff;
+ }
+}
+
+// We are tracking the location of samples from the start location of the Tlab
+// We need to adjust how to calculate the sample position in cases where ResetTlab.
+// Adjustment is the new reference position adjustment, usually the new pos-start.
+void HeapSampler::AdjustSampleOffset(size_t adjustment) {
+ size_t* bytes_until_sample = GetBytesUntilSample();
+ size_t cur_bytes_until_sample = *bytes_until_sample;
+ if (cur_bytes_until_sample < adjustment) {
+ VLOG(heap) << "JHP:AdjustSampleOffset:No Adjustment";
+ return;
+ }
+ size_t next_bytes_until_sample = cur_bytes_until_sample - adjustment;
+ *bytes_until_sample = next_bytes_until_sample;
+ VLOG(heap) << "JHP:AdjustSampleOffset: adjustment = " << adjustment
+ << " next_bytes_until_sample = " << next_bytes_until_sample;
+}
+
+// Enable the heap sampler and initialize/set the sampling interval.
+void HeapSampler::EnableHeapSampler(void* enable_ptr ATTRIBUTE_UNUSED,
+ const void* enable_info_ptr ATTRIBUTE_UNUSED) {
+ uint64_t interval = 4 * 1024;
+ // Set the ART profiler sampling interval to the value from AHeapProfileSessionInfo
+ // Set interval to sampling interval from AHeapProfileSessionInfo
+ if (interval > 0) {
+ // Make sure that rng_ and geo_dist are thread safe by acquiring a lock to access.
+ art::MutexLock mu(art::Thread::Current(), geo_dist_rng_lock_);
+ SetSamplingInterval(interval);
+ }
+ // Else default is 4K sampling interval. However, default case shouldn't happen for Perfetto API.
+ // AHeapProfileEnableCallbackInfo_getSamplingInterval should always give the requested
+ // (non-negative) sampling interval. It is a uint64_t and gets checked for != 0
+ // Do not call heap->GetPerfettoJavaHeapProfHeapID() as a temp here, it will build but test run
+ // will silently fail. Heap is not fully constructed yet.
+ // heap_id will be set through the Perfetto API.
+ perfetto_heap_id_ = 1; // To be set by Perfetto API
+ enabled_.store(true, std::memory_order_release);
+}
+
+bool HeapSampler::IsEnabled() {
+ return enabled_.load(std::memory_order_acquire);
+}
+
+void HeapSampler::DisableHeapSampler(void* disable_ptr ATTRIBUTE_UNUSED,
+ const void* disable_info_ptr ATTRIBUTE_UNUSED) {
+ enabled_.store(false, std::memory_order_release);
+}
+
+int HeapSampler::GetSamplingInterval() {
+ return p_sampling_interval_.load(std::memory_order_acquire);
+}
+
+void HeapSampler::SetSamplingInterval(int sampling_interval) {
+ p_sampling_interval_.store(sampling_interval, std::memory_order_release);
+ geo_dist_.param(std::geometric_distribution<size_t>::param_type(1.0/p_sampling_interval_));
+}
+
+void HeapSampler::SetSessionInfo(void* info) {
+ perfetto_session_info_ = info;
+}
+
+void* HeapSampler::GetSessionInfo() {
+ return perfetto_session_info_;
+}
+
+} // namespace art
diff --git a/runtime/javaheapprof/javaheapsampler.h b/runtime/javaheapprof/javaheapsampler.h
new file mode 100644
index 0000000..02cb7b7
--- /dev/null
+++ b/runtime/javaheapprof/javaheapsampler.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JAVAHEAPPROF_JAVAHEAPSAMPLER_H_
+#define ART_RUNTIME_JAVAHEAPPROF_JAVAHEAPSAMPLER_H_
+
+#include <random>
+#include "base/locks.h"
+#include "base/mutex.h"
+#include "mirror/object.h"
+
+namespace art {
+
+class HeapSampler {
+ public:
+ HeapSampler() : rng_(/*seed=*/std::minstd_rand::default_seed),
+ geo_dist_(1.0 / /*expected value=4KB*/ 4096),
+ geo_dist_rng_lock_("Heap Sampler RNG Geometric Dist lock",
+ art::LockLevel::kGenericBottomLock) {}
+
+ // Set the bytes until sample.
+ void SetBytesUntilSample(size_t bytes) {
+ *GetBytesUntilSample() = bytes;
+ }
+ // Get the bytes until sample.
+ size_t* GetBytesUntilSample() {
+ // Initialization should happen only once the first time the function is called.
+ // However there will always be a slot allocated for it at thread creation.
+ thread_local size_t bytes_until_sample = 0;
+ return &bytes_until_sample;
+ }
+ // Report a sample to Perfetto.
+ void ReportSample(art::mirror::Object* obj, size_t allocation_size);
+ // Check whether we should take a sample or not at this allocation, and return the
+ // number of bytes from current pos to the next sample to use in the expand Tlab
+ // calculation.
+ // Update state of both take_sample and temp_bytes_until_sample.
+ // tlab_used = pos - start
+ // Note: we do not update bytes until sample here. It will be saved after the allocation
+ // happens. This function can be called before the actual allocation happens.
+ size_t GetSampleOffset(size_t alloc_size,
+ size_t tlab_used,
+ bool* take_sample,
+ size_t* temp_bytes_until_sample) REQUIRES(!geo_dist_rng_lock_);
+ // Adjust the sample offset value with the adjustment usually (pos - start)
+ // of new Tlab after Reset.
+ void AdjustSampleOffset(size_t adjustment);
+ // Is heap sampler enabled?
+ bool IsEnabled();
+ void EnableHeapSampler(void* enable_ptr, const void* enable_info_ptr);
+ void DisableHeapSampler(void* disable_ptr, const void* disable_info_ptr);
+ // Set the sampling interval.
+ void SetSamplingInterval(int sampling_interval) REQUIRES(geo_dist_rng_lock_);
+ // Return the sampling interval.
+ int GetSamplingInterval();
+ // Set the Perfetto Session Info.
+ void SetSessionInfo(void* info);
+ // Get the Perfetto Session Info.
+ void* GetSessionInfo();
+
+ private:
+ size_t NextGeoDistRandSample() REQUIRES(!geo_dist_rng_lock_);
+ // Choose, save, and return the number of bytes until the next sample,
+ // possibly decreasing sample intervals by sample_adj_bytes.
+ size_t PickAndAdjustNextSample(size_t sample_adj_bytes = 0) REQUIRES(!geo_dist_rng_lock_);
+
+ std::atomic<bool> enabled_;
+ // Default sampling interval is 4kb.
+ // Writes guarded by geo_dist_rng_lock_.
+ std::atomic<int> p_sampling_interval_{4 * 1024};
+ void* perfetto_session_info_;
+ uint32_t perfetto_heap_id_ = 0;
+ // std random number generator.
+ std::minstd_rand rng_ GUARDED_BY(geo_dist_rng_lock_); // Holds the state
+ // std geometric distribution
+ std::geometric_distribution</*result_type=*/size_t> geo_dist_ GUARDED_BY(geo_dist_rng_lock_);
+ // Multiple threads can access the geometric distribution and the random number
+ // generator concurrently and thus geo_dist_rng_lock_ is used for thread safety.
+ art::Mutex geo_dist_rng_lock_;
+};
+
+} // namespace art
+
+#endif // ART_RUNTIME_JAVAHEAPPROF_JAVAHEAPSAMPLER_H_
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 6dd121c..3aa85cd 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -439,6 +439,10 @@
.WithType<bool>()
.WithValueMap({{"false", false}, {"true", true}})
.IntoKey(M::PerfettoHprof)
+ .Define("-XX:PerfettoJavaHeapStackProf=_")
+ .WithType<bool>()
+ .WithValueMap({{"false", false}, {"true", true}})
+ .IntoKey(M::PerfettoJavaHeapStackProf)
.Ignore({
"-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
"-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ae54453..aeaa175 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -296,7 +296,8 @@
zygote_no_threads_(false),
verifier_logging_threshold_ms_(100),
verifier_missing_kthrow_fatal_(false),
- perfetto_hprof_enabled_(false) {
+ perfetto_hprof_enabled_(false),
+ perfetto_javaheapprof_enabled_(false) {
static_assert(Runtime::kCalleeSaveSize ==
static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType), "Unexpected size");
CheckConstants();
@@ -1085,6 +1086,16 @@
LOG(WARNING) << "Failed to load perfetto_hprof: " << err;
}
}
+ if (IsPerfettoJavaHeapStackProfEnabled() &&
+ (Dbg::IsJdwpAllowed() || IsProfileableFromShell() || IsJavaDebuggable() ||
+ Runtime::Current()->IsSystemServer())) {
+ std::string err;
+ ScopedTrace tr("perfetto_javaheapprof init.");
+ ScopedThreadSuspension sts(Thread::Current(), ThreadState::kNative);
+ if (!EnsurePerfettoJavaHeapProfPlugin(&err)) {
+ LOG(WARNING) << "Failed to load perfetto_javaheapprof: " << err;
+ }
+ }
if (LIKELY(automatically_set_jni_ids_indirection_) && CanSetJniIdType()) {
if (IsJavaDebuggable()) {
SetJniIdType(JniIdType::kIndices);
@@ -1217,6 +1228,7 @@
verifier_missing_kthrow_fatal_ = runtime_options.GetOrDefault(Opt::VerifierMissingKThrowFatal);
perfetto_hprof_enabled_ = runtime_options.GetOrDefault(Opt::PerfettoHprof);
+ perfetto_javaheapprof_enabled_ = runtime_options.GetOrDefault(Opt::PerfettoJavaHeapStackProf);
// Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
// If we cannot reserve it, log a warning.
@@ -1807,6 +1819,14 @@
// subsequent dlopens for the library no-ops.
dlopen(plugin_name, RTLD_NOW | RTLD_LOCAL);
}
+ if (IsZygote() && IsPerfettoJavaHeapStackProfEnabled()) {
+ // There is no debug build of heapprofd_client_api.so currently.
+ // Add debug build .so when available.
+ constexpr const char* jhp_plugin_name = "heapprofd_client_api.so";
+ // Load eagerly in Zygote to improve app startup times. This will make
+ // subsequent dlopens for the library no-ops.
+ dlopen(jhp_plugin_name, RTLD_NOW | RTLD_LOCAL);
+ }
VLOG(startup) << "Runtime::Init exiting";
@@ -1847,6 +1867,13 @@
return EnsurePluginLoaded(plugin_name, error_msg);
}
+bool Runtime::EnsurePerfettoJavaHeapProfPlugin(std::string* error_msg) {
+ // There is no debug build of heapprofd_client_api.so currently.
+ // Add debug build .so when available.
+ constexpr const char* jhp_plugin_name = "heapprofd_client_api.so";
+ return EnsurePluginLoaded(jhp_plugin_name, error_msg);
+}
+
static bool EnsureJvmtiPlugin(Runtime* runtime,
std::string* error_msg) {
// TODO Rename Dbg::IsJdwpAllowed is IsDebuggingAllowed.
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 662238e..0054403 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -132,6 +132,7 @@
bool EnsurePluginLoaded(const char* plugin_name, std::string* error_msg);
bool EnsurePerfettoPlugin(std::string* error_msg);
+ bool EnsurePerfettoJavaHeapProfPlugin(std::string* error_msg);
// IsAotCompiler for compilers that don't have a running runtime. Only dex2oat currently.
bool IsAotCompiler() const {
@@ -957,6 +958,10 @@
return perfetto_hprof_enabled_;
}
+ bool IsPerfettoJavaHeapStackProfEnabled() const {
+ return perfetto_javaheapprof_enabled_;
+ }
+
// Return true if we should load oat files as executable or not.
bool GetOatFilesExecutable() const;
@@ -1311,6 +1316,7 @@
bool verifier_missing_kthrow_fatal_;
bool perfetto_hprof_enabled_;
+ bool perfetto_javaheapprof_enabled_;
metrics::ArtMetrics metrics_;
std::unique_ptr<metrics::MetricsReporter> metrics_reporter_;
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 6aec33c..1961113 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -183,6 +183,9 @@
// This is set to true in frameworks/base/core/jni/AndroidRuntime.cpp.
RUNTIME_OPTIONS_KEY (bool, PerfettoHprof, false)
+// This is to enable/disable Perfetto Java Heap Stack Profiling
+RUNTIME_OPTIONS_KEY (bool, PerfettoJavaHeapStackProf, false)
+
// Whether to dump ART metrics to logcat
RUNTIME_OPTIONS_KEY (Unit, WriteMetricsToLog)
RUNTIME_OPTIONS_KEY (std::string, WriteMetricsToFile)
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7f80691..8f999f6 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -4154,6 +4154,15 @@
}
void Thread::ResetTlab() {
+ gc::Heap* const heap = Runtime::Current()->GetHeap();
+ if (heap->GetHeapSampler().IsEnabled()) {
+ // Note: We always ResetTlab before SetTlab, therefore we can do the sample
+ // offset adjustment here.
+ heap->AdjustSampleOffset(GetTlabPosOffset());
+ VLOG(heap) << "JHP: ResetTlab, Tid: " << GetTid()
+ << " adjustment = "
+ << (tlsPtr_.thread_local_pos - tlsPtr_.thread_local_start);
+ }
SetTlab(nullptr, nullptr, nullptr);
}
diff --git a/runtime/thread.h b/runtime/thread.h
index b23f647..7475681 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -36,6 +36,7 @@
#include "handle.h"
#include "handle_scope.h"
#include "interpreter/interpreter_cache.h"
+#include "javaheapprof/javaheapsampler.h"
#include "jvalue.h"
#include "managed_stack.h"
#include "offsets.h"
@@ -1157,6 +1158,11 @@
return tlsPtr_.thread_local_end - tlsPtr_.thread_local_pos;
}
+ // Returns pos offset from start.
+ size_t GetTlabPosOffset() const {
+ return tlsPtr_.thread_local_pos - tlsPtr_.thread_local_start;
+ }
+
// Returns the remaining space in the TLAB if we were to expand it to maximum capacity.
size_t TlabRemainingCapacity() const {
return tlsPtr_.thread_local_limit - tlsPtr_.thread_local_pos;