Faster stack trace creation
Cache stack frames from the counting visitor to avoid needing to
walk the stack twice in the common case.
Speeds up pmd benchark by 20%.
Test: test-art-host
Change-Id: I81e4e55280d9c1ccf1937a7ea12abff75e9abb94
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6843e31..d843de5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2190,12 +2190,18 @@
tlsPtr_.class_loader_override = GetJniEnv()->NewGlobalRef(class_loader_override);
}
-class CountStackDepthVisitor : public StackVisitor {
+using ArtMethodDexPcPair = std::pair<ArtMethod*, uint32_t>;
+
+// Counts the stack trace depth and also fetches the first max_saved_frames frames.
+class FetchStackTraceVisitor : public StackVisitor {
public:
- explicit CountStackDepthVisitor(Thread* thread)
+ explicit FetchStackTraceVisitor(Thread* thread,
+ ArtMethodDexPcPair* saved_frames = nullptr,
+ size_t max_saved_frames = 0)
REQUIRES_SHARED(Locks::mutator_lock_)
: StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
- depth_(0), skip_depth_(0), skipping_(true) {}
+ saved_frames_(saved_frames),
+ max_saved_frames_(max_saved_frames) {}
bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
// We want to skip frames up to and including the exception's constructor.
@@ -2208,6 +2214,10 @@
}
if (!skipping_) {
if (!m->IsRuntimeMethod()) { // Ignore runtime frames (in particular callee save).
+ if (depth_ < max_saved_frames_) {
+ saved_frames_[depth_].first = m;
+ saved_frames_[depth_].second = m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc();
+ }
++depth_;
}
} else {
@@ -2216,20 +2226,22 @@
return true;
}
- int GetDepth() const {
+ uint32_t GetDepth() const {
return depth_;
}
- int GetSkipDepth() const {
+ uint32_t GetSkipDepth() const {
return skip_depth_;
}
private:
- uint32_t depth_;
- uint32_t skip_depth_;
- bool skipping_;
+ uint32_t depth_ = 0;
+ uint32_t skip_depth_ = 0;
+ bool skipping_ = true;
+ ArtMethodDexPcPair* saved_frames_;
+ const size_t max_saved_frames_;
- DISALLOW_COPY_AND_ASSIGN(CountStackDepthVisitor);
+ DISALLOW_COPY_AND_ASSIGN(FetchStackTraceVisitor);
};
template<bool kTransactionActive>
@@ -2239,8 +2251,6 @@
: StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
self_(self),
skip_depth_(skip_depth),
- count_(0),
- trace_(nullptr),
pointer_size_(Runtime::Current()->GetClassLinker()->GetImagePointerSize()) {}
bool Init(int depth) REQUIRES_SHARED(Locks::mutator_lock_) ACQUIRE(Roles::uninterruptible_) {
@@ -2292,17 +2302,21 @@
if (m->IsRuntimeMethod()) {
return true; // Ignore runtime frames (in particular callee save).
}
+ AddFrame(m, m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc());
+ return true;
+ }
+
+ void AddFrame(ArtMethod* method, uint32_t dex_pc) REQUIRES_SHARED(Locks::mutator_lock_) {
ObjPtr<mirror::PointerArray> trace_methods_and_pcs = GetTraceMethodsAndPCs();
- trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(count_, m, pointer_size_);
+ trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(count_, method, pointer_size_);
trace_methods_and_pcs->SetElementPtrSize<kTransactionActive>(
trace_methods_and_pcs->GetLength() / 2 + count_,
- m->IsProxyMethod() ? DexFile::kDexNoIndex : GetDexPc(),
+ dex_pc,
pointer_size_);
// Save the declaring class of the method to ensure that the declaring classes of the methods
// do not get unloaded while the stack trace is live.
- trace_->Set(count_ + 1, m->GetDeclaringClass());
+ trace_->Set(count_ + 1, method->GetDeclaringClass());
++count_;
- return true;
}
ObjPtr<mirror::PointerArray> GetTraceMethodsAndPCs() const REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -2318,12 +2332,12 @@
// How many more frames to skip.
int32_t skip_depth_;
// Current position down stack trace.
- uint32_t count_;
+ uint32_t count_ = 0;
// An object array where the first element is a pointer array that contains the ArtMethod
// pointers on the stack and dex PCs. The rest of the elements are the declaring
// class of the ArtMethod pointers. trace_[i+1] contains the declaring class of the ArtMethod of
// the i'th frame.
- mirror::ObjectArray<mirror::Object>* trace_;
+ mirror::ObjectArray<mirror::Object>* trace_ = nullptr;
// For cross compilation.
const PointerSize pointer_size_;
@@ -2332,11 +2346,15 @@
template<bool kTransactionActive>
jobject Thread::CreateInternalStackTrace(const ScopedObjectAccessAlreadyRunnable& soa) const {
- // Compute depth of stack
- CountStackDepthVisitor count_visitor(const_cast<Thread*>(this));
+ // Compute depth of stack, save frames if possible to avoid needing to recompute many.
+ constexpr size_t kMaxSavedFrames = 256;
+ std::unique_ptr<ArtMethodDexPcPair[]> saved_frames(new ArtMethodDexPcPair[kMaxSavedFrames]);
+ FetchStackTraceVisitor count_visitor(const_cast<Thread*>(this),
+ &saved_frames[0],
+ kMaxSavedFrames);
count_visitor.WalkStack();
- int32_t depth = count_visitor.GetDepth();
- int32_t skip_depth = count_visitor.GetSkipDepth();
+ const uint32_t depth = count_visitor.GetDepth();
+ const uint32_t skip_depth = count_visitor.GetSkipDepth();
// Build internal stack trace.
BuildInternalStackTraceVisitor<kTransactionActive> build_trace_visitor(soa.Self(),
@@ -2345,7 +2363,16 @@
if (!build_trace_visitor.Init(depth)) {
return nullptr; // Allocation failed.
}
- build_trace_visitor.WalkStack();
+ // If we saved all of the frames we don't even need to do the actual stack walk. This is faster
+ // than doing the stack walk twice.
+ if (depth < kMaxSavedFrames) {
+ for (size_t i = 0; i < depth; ++i) {
+ build_trace_visitor.AddFrame(saved_frames[i].first, saved_frames[i].second);
+ }
+ } else {
+ build_trace_visitor.WalkStack();
+ }
+
mirror::ObjectArray<mirror::Object>* trace = build_trace_visitor.GetInternalStackTrace();
if (kIsDebugBuild) {
ObjPtr<mirror::PointerArray> trace_methods = build_trace_visitor.GetTraceMethodsAndPCs();
@@ -2364,9 +2391,10 @@
const ScopedObjectAccessAlreadyRunnable& soa) const;
bool Thread::IsExceptionThrownByCurrentMethod(ObjPtr<mirror::Throwable> exception) const {
- CountStackDepthVisitor count_visitor(const_cast<Thread*>(this));
+ // Only count the depth since we do not pass a stack frame array as an argument.
+ FetchStackTraceVisitor count_visitor(const_cast<Thread*>(this));
count_visitor.WalkStack();
- return count_visitor.GetDepth() == exception->GetStackDepth();
+ return count_visitor.GetDepth() == static_cast<uint32_t>(exception->GetStackDepth());
}
jobjectArray Thread::InternalStackTraceToStackTraceElementArray(