ART/Perfetto Java Heap Profiler
Adding a Sampling Java Heap Profiler to ART and its interface to Perfetto.
This cl is the first cl (CL1) in a series of cls described as below to implement the full ART/Perfetto Java Heap Profiler.
CL1: ART Java Heap Profiler. This is the main ART sampling profiler code. Tested using the ART testrunner as below as well as VLOG.
CL2: Uncomment APEX code ART side. CL3: Add APEX code Perfetto side.
CL2 and CL3 will be submitted simultaneously to avoid build failures and to add APEX dependencies.
CL4: Uncomment Perfetto API code. To be reviewed by fmayer@ (Perfetto Team). Further Testing, could be full feature testing including Perfetto at this point.
CL5: Further tests and/or optimizations can be added as needed.
Test: Passing Tests
test/testrunner/testrunner.py --host --debug -b
test/testrunner/testrunner.py --host --debug -b --64 -t 004-ThreadStress
test/testrunner/testrunner.py --host --runtime-option=-XX:PerfettoJavaHeapStackProf=true --debug -b
test/testrunner/testrunner.py --host --runtime-option=-XX:PerfettoJavaHeapStackProf=true --debug -b --64 -t 004-ThreadStress
Individualized and VLOG testing.
Bug: 160214819
Change-Id: I2be4c4e715ce8c3c8ac545e3e14332198b9c2295
(cherry picked from commit 7b149d585b4627ebb389e987c14fe808f2fe698b)
Merged-In: I2be4c4e715ce8c3c8ac545e3e14332198b9c2295
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 4a03e61..4ca6bf7 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -94,6 +94,7 @@
#include "obj_ptr-inl.h"
#include "reflection.h"
#include "runtime.h"
+#include "javaheapprof/javaheapsampler.h"
#include "scoped_thread_state_change-inl.h"
#include "thread_list.h"
#include "verify_object-inl.h"
@@ -348,6 +349,7 @@
kGcCountRateMaxBucketCount),
alloc_tracking_enabled_(false),
alloc_record_depth_(AllocRecordObjectMap::kDefaultAllocStackDepth),
+ perfetto_javaheapprof_heapid_(0),
backtrace_lock_(nullptr),
seen_backtrace_count_(0u),
unique_backtrace_count_(0u),
@@ -742,6 +744,15 @@
LOG(FATAL) << "There's a gap between the image space and the non-moving space";
}
}
+ // Perfetto Java Heap Profiler Support.
+ if (runtime->IsPerfettoJavaHeapStackProfEnabled()) {
+ // Perfetto Plugin is loaded and enabled, initialize the Java Heap Profiler.
+ InitPerfettoJavaHeapProf();
+ } else {
+ // Disable the Java Heap Profiler.
+ GetHeapSampler().DisableHeapSampler(/*disable_ptr=*/nullptr, /*disable_info_ptr=*/nullptr);
+ }
+
instrumentation::Instrumentation* const instrumentation = runtime->GetInstrumentation();
if (gc_stress_mode_) {
backtrace_lock_ = new Mutex("GC complete lock");
@@ -4031,6 +4042,71 @@
}
}
+// Perfetto Java Heap Profiler Support.
+
+// Perfetto initialization.
+void Heap::InitPerfettoJavaHeapProf() {
+ // Register the heap and create the heapid.
+ // Use a heap name = "HeapSampler".
+ // Initialize Perfetto Heap info and Heap id.
+ static uint32_t heap_id = 1; // Initialize to 1, to be overwritten by Perfetto heap id.
+ SetPerfettoJavaHeapProfHeapID(heap_id);
+ // Enable the Java Heap Profiler.
+ GetHeapSampler().EnableHeapSampler(/*enable_ptr=*/nullptr, /*enable_info_ptr=*/nullptr);
+ // Set the Enable Callback, there is no callback data ("nullptr").
+ // Set the Disable Callback.
+ VLOG(heap) << "Java Heap Profiler Initialized";
+}
+
+// Check if the Java Heap Profiler is enabled and initialized.
+int Heap::CheckPerfettoJHPEnabled() {
+ return GetHeapSampler().IsEnabled();
+}
+
+void Heap::JHPCheckNonTlabSampleAllocation(Thread* self, mirror::Object* ret, size_t alloc_size) {
+ bool take_sample = false;
+ size_t bytes_until_sample = 0;
+ HeapSampler& prof_heap_sampler = GetHeapSampler();
+ if (ret != nullptr && prof_heap_sampler.IsEnabled()) {
+ // An allocation occurred, sample it, even if non-Tlab.
+ // In case take_sample is already set from the previous GetSampleOffset
+ // because we tried the Tlab allocation first, we will not use this value.
+ // A new value is generated below. Also bytes_until_sample will be updated.
+ // Note that we are not using the return value from the GetSampleOffset in
+ // the NonTlab case here.
+ prof_heap_sampler.GetSampleOffset(alloc_size,
+ self->GetTlabPosOffset(),
+ &take_sample,
+ &bytes_until_sample);
+ prof_heap_sampler.SetBytesUntilSample(bytes_until_sample);
+ if (take_sample) {
+ prof_heap_sampler.ReportSample(ret, alloc_size);
+ }
+ VLOG(heap) << "JHP:NonTlab:AllocNonvirtual";
+ }
+}
+
+size_t Heap::JHPCalculateNextTlabSize(Thread* self,
+ size_t jhp_def_tlab_size,
+ size_t alloc_size,
+ bool* take_sample,
+ size_t* bytes_until_sample) {
+ size_t next_tlab_size = jhp_def_tlab_size;
+ if (CheckPerfettoJHPEnabled()) {
+ size_t next_sample_point =
+ GetHeapSampler().GetSampleOffset(alloc_size,
+ self->GetTlabPosOffset(),
+ take_sample,
+ bytes_until_sample);
+ next_tlab_size = std::min(next_sample_point, jhp_def_tlab_size);
+ }
+ return next_tlab_size;
+}
+
+void Heap::AdjustSampleOffset(size_t adjustment) {
+ GetHeapSampler().AdjustSampleOffset(adjustment);
+}
+
void Heap::CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj) {
DCHECK(gc_stress_mode_);
auto* const runtime = Runtime::Current();
@@ -4117,14 +4193,23 @@
size_t* bytes_allocated,
size_t* usable_size,
size_t* bytes_tl_bulk_allocated) {
+ mirror::Object* ret = nullptr;
+ bool take_sample = false;
+ size_t bytes_until_sample = 0;
+
if (kUsePartialTlabs && alloc_size <= self->TlabRemainingCapacity()) {
DCHECK_GT(alloc_size, self->TlabSize());
// There is enough space if we grow the TLAB. Lets do that. This increases the
// TLAB bytes.
const size_t min_expand_size = alloc_size - self->TlabSize();
+ size_t next_tlab_size = JHPCalculateNextTlabSize(self,
+ kPartialTlabSize,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
const size_t expand_bytes = std::max(
min_expand_size,
- std::min(self->TlabRemainingCapacity() - self->TlabSize(), kPartialTlabSize));
+ std::min(self->TlabRemainingCapacity() - self->TlabSize(), next_tlab_size));
if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, expand_bytes, grow))) {
return nullptr;
}
@@ -4133,7 +4218,12 @@
DCHECK_LE(alloc_size, self->TlabSize());
} else if (allocator_type == kAllocatorTypeTLAB) {
DCHECK(bump_pointer_space_ != nullptr);
- const size_t new_tlab_size = alloc_size + kDefaultTLABSize;
+ size_t next_tlab_size = JHPCalculateNextTlabSize(self,
+ kDefaultTLABSize,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
+ const size_t new_tlab_size = alloc_size + next_tlab_size;
if (UNLIKELY(IsOutOfMemoryOnAllocation(allocator_type, new_tlab_size, grow))) {
return nullptr;
}
@@ -4143,6 +4233,9 @@
return nullptr;
}
*bytes_tl_bulk_allocated = new_tlab_size;
+ if (CheckPerfettoJHPEnabled()) {
+ VLOG(heap) << "JHP:kAllocatorTypeTLAB, New Tlab bytes allocated= " << new_tlab_size;
+ }
} else {
DCHECK(allocator_type == kAllocatorTypeRegionTLAB);
DCHECK(region_space_ != nullptr);
@@ -4151,25 +4244,37 @@
if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type,
space::RegionSpace::kRegionSize,
grow))) {
+ size_t def_pr_tlab_size = kUsePartialTlabs
+ ? kPartialTlabSize
+ : gc::space::RegionSpace::kRegionSize;
+ size_t next_pr_tlab_size = JHPCalculateNextTlabSize(self,
+ def_pr_tlab_size,
+ alloc_size,
+ &take_sample,
+ &bytes_until_sample);
const size_t new_tlab_size = kUsePartialTlabs
- ? std::max(alloc_size, kPartialTlabSize)
- : gc::space::RegionSpace::kRegionSize;
+ ? std::max(alloc_size, next_pr_tlab_size)
+ : next_pr_tlab_size;
// Try to allocate a tlab.
if (!region_space_->AllocNewTlab(self, new_tlab_size, bytes_tl_bulk_allocated)) {
// Failed to allocate a tlab. Try non-tlab.
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
// Fall-through to using the TLAB below.
} else {
// Check OOME for a non-tlab allocation.
if (!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow)) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
// Neither tlab or non-tlab works. Give up.
return nullptr;
@@ -4177,19 +4282,34 @@
} else {
// Large. Check OOME.
if (LIKELY(!IsOutOfMemoryOnAllocation(allocator_type, alloc_size, grow))) {
- return region_space_->AllocNonvirtual<false>(alloc_size,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ ret = region_space_->AllocNonvirtual<false>(alloc_size,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
+ JHPCheckNonTlabSampleAllocation(self, ret, alloc_size);
+ return ret;
}
return nullptr;
}
}
// Refilled TLAB, return.
- mirror::Object* ret = self->AllocTlab(alloc_size);
+ ret = self->AllocTlab(alloc_size);
DCHECK(ret != nullptr);
*bytes_allocated = alloc_size;
*usable_size = alloc_size;
+
+ // JavaHeapProfiler: Send the thread information about this allocation in case a sample is
+ // requested.
+ // This is the fallthrough from both the if and else if above cases => Cases that use TLAB.
+ if (CheckPerfettoJHPEnabled()) {
+ if (take_sample) {
+ GetHeapSampler().ReportSample(ret, alloc_size);
+ // Update the bytes_until_sample now that the allocation is already done.
+ GetHeapSampler().SetBytesUntilSample(bytes_until_sample);
+ }
+ VLOG(heap) << "JHP:Fallthrough Tlab allocation";
+ }
+
return ret;
}