diff options
Diffstat (limited to 'runtime/trace.cc')
| -rw-r--r-- | runtime/trace.cc | 826 |
1 files changed, 548 insertions, 278 deletions
diff --git a/runtime/trace.cc b/runtime/trace.cc index ec61726ff2..429a5ae591 100644 --- a/runtime/trace.cc +++ b/runtime/trace.cc @@ -82,33 +82,147 @@ static TraceAction DecodeTraceAction(uint32_t tmid) { return static_cast<TraceAction>(tmid & kTraceMethodActionMask); } +namespace { +// Scaling factor to convert timestamp counter into wall clock time reported in micro seconds. +// This is initialized at the start of tracing using the timestamp counter update frequency. +// See InitializeTimestampCounters for more details. +double tsc_to_microsec_scaling_factor = -1.0; + +uint64_t GetTimestamp() { + uint64_t t = 0; +#if defined(__arm__) + // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34 + // Q and R specify that they should be written to lower and upper halves of 64-bit value. + // See: https://llvm.org/docs/LangRef.html#asm-template-argument-modifiers + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r"(t)); +#elif defined(__aarch64__) + // See Arm Architecture Registers Armv8 section System Registers + asm volatile("mrs %0, cntvct_el0" : "=r"(t)); +#elif defined(__i386__) || defined(__x86_64__) + // rdtsc returns two 32-bit values in rax and rdx even on 64-bit architectures. + unsigned int lo, hi; + asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); + t = (static_cast<uint64_t>(hi) << 32) | lo; +#else + t = MicroTime(); +#endif + return t; +} + +#if defined(__i386__) || defined(__x86_64__) +// Here we compute the scaling factor by sleeping for a millisecond. Alternatively, we could +// generate raw timestamp counter and also time using clock_gettime at the start and the end of the +// trace. We can compute the frequency of timestamp counter upadtes in the post processing step +// using these two samples. However, that would require a change in Android Studio which is the main +// consumer of these profiles. For now, just compute the frequency of tsc updates here. +double computeScalingFactor() { + uint64_t start = MicroTime(); + uint64_t start_tsc = GetTimestamp(); + // Sleep for one millisecond. + usleep(1000); + uint64_t diff_tsc = GetTimestamp() - start_tsc; + uint64_t diff_time = MicroTime() - start; + double scaling_factor = static_cast<double>(diff_time) / diff_tsc; + DCHECK(scaling_factor > 0.0) << scaling_factor; + return scaling_factor; +} + +double GetScalingFactorForX86() { + uint32_t eax, ebx, ecx; + asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x0), "c"(0)); + if (eax < 0x15) { + // There is no 15H - Timestamp counter and core crystal clock information + // leaf. Just compute the frequency. + return computeScalingFactor(); + } + + // From Intel architecture-instruction-set-extensions-programming-reference: + // EBX[31:0]/EAX[31:0] indicates the ratio of the TSC frequency and the + // core crystal clock frequency. + // If EBX[31:0] is 0, the TSC and "core crystal clock" ratio is not enumerated. + // If ECX is 0, the nominal core crystal clock frequency is not enumerated. + // "TSC frequency" = "core crystal clock frequency" * EBX/EAX. + // The core crystal clock may differ from the reference clock, bus clock, or core clock + // frequencies. + // EAX Bits 31 - 00: An unsigned integer which is the denominator of the + // TSC/"core crystal clock" ratio. + // EBX Bits 31 - 00: An unsigned integer which is the numerator of the + // TSC/"core crystal clock" ratio. + // ECX Bits 31 - 00: An unsigned integer which is the nominal frequency of the core + // crystal clock in Hz. + // EDX Bits 31 - 00: Reserved = 0. + asm volatile("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx) : "a"(0x15), "c"(0)); + if (ebx == 0 || ecx == 0) { + return computeScalingFactor(); + } + double coreCrystalFreq = ecx; + // frequency = coreCrystalFreq * (ebx / eax) + // scaling_factor = seconds_to_microseconds / frequency + // = seconds_to_microseconds * eax / (coreCrystalFreq * ebx) + double seconds_to_microseconds = 1000 * 1000; + double scaling_factor = (seconds_to_microseconds * eax) / (coreCrystalFreq * ebx); + return scaling_factor; +} +#endif + +void InitializeTimestampCounters() { + // It is sufficient to initialize this once for the entire execution. Just return if it is + // already initialized. + if (tsc_to_microsec_scaling_factor > 0.0) { + return; + } + +#if defined(__arm__) + double seconds_to_microseconds = 1000 * 1000; + uint64_t freq = 0; + // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.21 + asm volatile("mrc p15, 0, %0, c14, c0, 0" : "=r"(freq)); + tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast<double>(freq); +#elif defined(__aarch64__) + double seconds_to_microseconds = 1000 * 1000; + uint64_t freq = 0; + // See Arm Architecture Registers Armv8 section System Registers + asm volatile("mrs %0, cntfrq_el0" : "=r"(freq)); + tsc_to_microsec_scaling_factor = seconds_to_microseconds / static_cast<double>(freq); +#elif defined(__i386__) || defined(__x86_64__) + tsc_to_microsec_scaling_factor = GetScalingFactorForX86(); +#else + tsc_to_microsec_scaling_factor = 1.0; +#endif +} + +ALWAYS_INLINE uint64_t GetMicroTime(uint64_t counter) { + DCHECK(tsc_to_microsec_scaling_factor > 0.0) << tsc_to_microsec_scaling_factor; + return tsc_to_microsec_scaling_factor * counter; +} + +} // namespace + ArtMethod* Trace::DecodeTraceMethod(uint32_t tmid) { - MutexLock mu(Thread::Current(), *unique_methods_lock_); - return unique_methods_[tmid >> TraceActionBits]; + uint32_t method_index = tmid >> TraceActionBits; + // This is used only for logging which is usually needed only for debugging ART. So it's not + // performance critical. + for (auto const& entry : art_method_id_map_) { + if (method_index == entry.second) { + return entry.first; + } + } + return nullptr; } uint32_t Trace::EncodeTraceMethod(ArtMethod* method) { - MutexLock mu(Thread::Current(), *unique_methods_lock_); - uint32_t idx; + uint32_t idx = 0; auto it = art_method_id_map_.find(method); if (it != art_method_id_map_.end()) { idx = it->second; } else { - unique_methods_.push_back(method); - idx = unique_methods_.size() - 1; + idx = current_method_index_; art_method_id_map_.emplace(method, idx); + current_method_index_++; } - DCHECK_LT(idx, unique_methods_.size()); - DCHECK_EQ(unique_methods_[idx], method); return idx; } -uint32_t Trace::EncodeTraceMethodAndAction(ArtMethod* method, TraceAction action) { - uint32_t tmid = (EncodeTraceMethod(method) << TraceActionBits) | action; - DCHECK_EQ(method, DecodeTraceMethod(tmid)); - return tmid; -} - std::vector<ArtMethod*>* Trace::AllocStackTrace() { return (temp_stack_trace_.get() != nullptr) ? temp_stack_trace_.release() : new std::vector<ArtMethod*>(); @@ -154,7 +268,7 @@ void Trace::MeasureClockOverhead() { Thread::Current()->GetCpuMicroTime(); } if (UseWallClock()) { - MicroTime(); + GetTimestamp(); } } @@ -237,14 +351,13 @@ void Trace::CompareAndUpdateStackTrace(Thread* thread, thread->SetStackTraceSample(stack_trace); // Read timer clocks to use for all events in this trace. uint32_t thread_clock_diff = 0; - uint32_t wall_clock_diff = 0; - ReadClocks(thread, &thread_clock_diff, &wall_clock_diff); + uint64_t timestamp_counter = 0; + ReadClocks(thread, &thread_clock_diff, ×tamp_counter); if (old_stack_trace == nullptr) { // If there's no previous stack trace sample for this thread, log an entry event for all // methods in the trace. for (auto rit = stack_trace->rbegin(); rit != stack_trace->rend(); ++rit) { - LogMethodTraceEvent(thread, *rit, instrumentation::Instrumentation::kMethodEntered, - thread_clock_diff, wall_clock_diff); + LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } } else { // If there's a previous stack trace for this thread, diff the traces and emit entry and exit @@ -258,13 +371,11 @@ void Trace::CompareAndUpdateStackTrace(Thread* thread, } // Iterate top-down over the old trace until the point where they differ, emitting exit events. for (auto old_it = old_stack_trace->begin(); old_it != old_rit.base(); ++old_it) { - LogMethodTraceEvent(thread, *old_it, instrumentation::Instrumentation::kMethodExited, - thread_clock_diff, wall_clock_diff); + LogMethodTraceEvent(thread, *old_it, kTraceMethodExit, thread_clock_diff, timestamp_counter); } // Iterate bottom-up over the new trace from the point where they differ, emitting entry events. for (; rit != stack_trace->rend(); ++rit) { - LogMethodTraceEvent(thread, *rit, instrumentation::Instrumentation::kMethodEntered, - thread_clock_diff, wall_clock_diff); + LogMethodTraceEvent(thread, *rit, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } FreeStackTrace(old_stack_trace); } @@ -285,7 +396,7 @@ void* Trace::RunSamplingThread(void* arg) { { MutexLock mu(self, *Locks::trace_lock_); the_trace = the_trace_; - if (the_trace == nullptr) { + if (the_trace_->stop_tracing_) { break; } } @@ -388,22 +499,21 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, return; } + // Initialize the frequency of timestamp counter updates here. This is needed + // to get wallclock time from timestamp counter values. + InitializeTimestampCounters(); + Runtime* runtime = Runtime::Current(); // Enable count of allocs if specified in the flags. bool enable_stats = false; - if (runtime->GetJit() != nullptr) { - // TODO b/110263880 It would be better if we didn't need to do this. - // Since we need to hold the method entrypoint across a suspend to ensure instrumentation - // hooks are called correctly we have to disable jit-gc to ensure that the entrypoint doesn't - // go away. Furthermore we need to leave this off permanently since one could get the same - // effect by causing this to be toggled on and off. - runtime->GetJit()->GetCodeCache()->SetGarbageCollectCode(false); - } - // Create Trace object. { + // Suspend JIT here since we are switching runtime to debuggable. Debuggable runtimes cannot use + // JITed code from before so we need to invalidated all JITed code here. Enter suspend JIT scope + // to prevent any races with ongoing JIT compilations. + jit::ScopedJitSuspend suspend_jit; // Required since EnableMethodTracing calls ConfigureStubs which visits class linker classes. gc::ScopedGCCriticalSection gcs(self, gc::kGcCauseInstrumentation, @@ -421,6 +531,17 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, "Sampling profiler thread"); the_trace_->interval_us_ = interval_us; } else { + if (!runtime->IsJavaDebuggable()) { + art::jit::Jit* jit = runtime->GetJit(); + if (jit != nullptr) { + jit->GetCodeCache()->InvalidateAllCompiledCode(); + jit->GetCodeCache()->TransitionToDebuggable(); + jit->GetJitCompiler()->SetDebuggableCompilerOption(true); + } + runtime->SetRuntimeDebugState(art::Runtime::RuntimeDebugState::kJavaDebuggable); + runtime->GetInstrumentation()->UpdateEntrypointsForDebuggable(); + runtime->DeoptimizeBootImage(); + } runtime->GetInstrumentation()->AddListener( the_trace_, instrumentation::Instrumentation::kMethodEntered | @@ -431,8 +552,9 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, // we know that inlining and other problematic optimizations are disabled. We might just // want to use the trampolines anyway since it is faster. It makes the story with disabling // jit-gc more complex though. - runtime->GetInstrumentation()->EnableMethodTracing( - kTracerInstrumentationKey, /*needs_interpreter=*/!runtime->IsJavaDebuggable()); + runtime->GetInstrumentation()->EnableMethodTracing(kTracerInstrumentationKey, + the_trace_, + /*needs_interpreter=*/false); } } } @@ -443,78 +565,129 @@ void Trace::Start(std::unique_ptr<File>&& trace_file_in, } } +void Trace::UpdateThreadsList(Thread* thread) { + // TODO(mythria): Clean this up and update threads_list_ when recording the trace event similar + // to what we do for streaming case. + std::string name; + thread->GetThreadName(name); + // In tests, we destroy VM after already detaching the current thread. When a thread is + // detached we record the information about the threads_list_. We re-attach the current + // thread again as a "Shutdown thread" in the process of shutting down. So don't record + // information about shutdown threads. + if (name.compare("Shutdown thread") == 0) { + return; + } + + // There can be races when unregistering a thread and stopping the trace and it is possible to + // update the list twice. For example, This information is updated here when stopping tracing and + // also when a thread is detaching. In thread detach, we first update this information and then + // remove the thread from the list of active threads. If the tracing was stopped in between these + // events, we can see two updates for the same thread. Since we need a trace_lock_ it isn't easy + // to prevent this race (for ex: update this information when holding thread_list_lock_). It is + // harmless to do two updates so just use overwrite here. + threads_list_.Overwrite(thread->GetTid(), name); +} + void Trace::StopTracing(bool finish_tracing, bool flush_file) { - bool stop_alloc_counting = false; Runtime* const runtime = Runtime::Current(); - Trace* the_trace = nullptr; Thread* const self = Thread::Current(); pthread_t sampling_pthread = 0U; { MutexLock mu(self, *Locks::trace_lock_); if (the_trace_ == nullptr) { LOG(ERROR) << "Trace stop requested, but no trace currently running"; - } else { - the_trace = the_trace_; - the_trace_ = nullptr; - sampling_pthread = sampling_pthread_; + return; } + // Tell sampling_pthread_ to stop tracing. + the_trace_->stop_tracing_ = true; + sampling_pthread = sampling_pthread_; } + // Make sure that we join before we delete the trace since we don't want to have // the sampling thread access a stale pointer. This finishes since the sampling thread exits when // the_trace_ is null. if (sampling_pthread != 0U) { CHECK_PTHREAD_CALL(pthread_join, (sampling_pthread, nullptr), "sampling thread shutdown"); - sampling_pthread_ = 0U; } - if (the_trace != nullptr) { - stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0; - // Stop the trace sources adding more entries to the trace buffer and synchronise stores. - { - gc::ScopedGCCriticalSection gcs(self, - gc::kGcCauseInstrumentation, - gc::kCollectorTypeInstrumentation); - ScopedSuspendAll ssa(__FUNCTION__); + // Make a copy of the_trace_, so it can be flushed later. We want to reset + // the_trace_ to nullptr in suspend all scope to prevent any races + Trace* the_trace = the_trace_; + bool stop_alloc_counting = (the_trace->flags_ & Trace::kTraceCountAllocs) != 0; + // Stop the trace sources adding more entries to the trace buffer and synchronise stores. + { + gc::ScopedGCCriticalSection gcs( + self, gc::kGcCauseInstrumentation, gc::kCollectorTypeInstrumentation); + jit::ScopedJitSuspend suspend_jit; + ScopedSuspendAll ssa(__FUNCTION__); - if (the_trace->trace_mode_ == TraceMode::kSampling) { - MutexLock mu(self, *Locks::thread_list_lock_); - runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); - } else { - runtime->GetInstrumentation()->RemoveListener( - the_trace, - instrumentation::Instrumentation::kMethodEntered | - instrumentation::Instrumentation::kMethodExited | - instrumentation::Instrumentation::kMethodUnwind); - runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); - } - } - // At this point, code may read buf_ as it's writers are shutdown - // and the ScopedSuspendAll above has ensured all stores to buf_ - // are now visible. - if (finish_tracing) { - the_trace->FinishTracing(); + if (the_trace->trace_mode_ == TraceMode::kSampling) { + MutexLock mu(self, *Locks::thread_list_lock_); + runtime->GetThreadList()->ForEach(ClearThreadStackTraceAndClockBase, nullptr); + } else { + runtime->GetInstrumentation()->RemoveListener( + the_trace, + instrumentation::Instrumentation::kMethodEntered | + instrumentation::Instrumentation::kMethodExited | + instrumentation::Instrumentation::kMethodUnwind); + runtime->GetInstrumentation()->DisableMethodTracing(kTracerInstrumentationKey); + runtime->GetInstrumentation()->MaybeSwitchRuntimeDebugState(self); } - if (the_trace->trace_file_.get() != nullptr) { - // Do not try to erase, so flush and close explicitly. - if (flush_file) { - if (the_trace->trace_file_->Flush() != 0) { - PLOG(WARNING) << "Could not flush trace file."; + + // Flush thread specific buffer from all threads before resetting the_trace_ to nullptr. + // We also flush the buffer when destroying a thread which expects the_trace_ to be valid so + // make sure that the per-thread buffer is reset before resetting the_trace_. + { + MutexLock tl_lock(Thread::Current(), *Locks::thread_list_lock_); + for (Thread* thread : Runtime::Current()->GetThreadList()->GetList()) { + if (thread->GetMethodTraceBuffer() != nullptr) { + the_trace_->FlushStreamingBuffer(thread); + thread->ResetMethodTraceBuffer(); } - } else { - the_trace->trace_file_->MarkUnchecked(); // Do not trigger guard. + // Record threads here before resetting the_trace_ to prevent any races between + // unregistering the thread and resetting the_trace_. + the_trace->UpdateThreadsList(thread); } - if (the_trace->trace_file_->Close() != 0) { - PLOG(ERROR) << "Could not close trace file."; + } + + // Reset the_trace_ by taking a trace_lock + MutexLock mu(self, *Locks::trace_lock_); + the_trace_ = nullptr; + sampling_pthread_ = 0U; + } + + // At this point, code may read buf_ as its writers are shutdown + // and the ScopedSuspendAll above has ensured all stores to buf_ + // are now visible. + if (finish_tracing) { + the_trace->FinishTracing(); + } + if (the_trace->trace_file_.get() != nullptr) { + // Do not try to erase, so flush and close explicitly. + if (flush_file) { + if (the_trace->trace_file_->Flush() != 0) { + PLOG(WARNING) << "Could not flush trace file."; } + } else { + the_trace->trace_file_->MarkUnchecked(); // Do not trigger guard. + } + if (the_trace->trace_file_->Close() != 0) { + PLOG(ERROR) << "Could not close trace file."; } - delete the_trace; } + delete the_trace; + if (stop_alloc_counting) { // Can be racy since SetStatsEnabled is not guarded by any locks. runtime->SetStatsEnabled(false); } } +void Trace::FlushThreadBuffer(Thread* self) { + MutexLock mu(self, *Locks::trace_lock_); + the_trace_->FlushStreamingBuffer(self); +} + void Trace::Abort() { // Do not write anything anymore. StopTracing(false, false); @@ -548,6 +721,28 @@ TracingMode Trace::GetMethodTracingMode() { } static constexpr size_t kMinBufSize = 18U; // Trace header is up to 18B. +// Size of per-thread buffer size. The value is chosen arbitrarily. This value +// should be greater than kMinBufSize. +static constexpr size_t kPerThreadBufSize = 512 * 1024; +static_assert(kPerThreadBufSize > kMinBufSize); + +namespace { + +TraceClockSource GetClockSourceFromFlags(int flags) { + bool need_wall = flags & Trace::TraceFlag::kTraceClockSourceWallClock; + bool need_thread_cpu = flags & Trace::TraceFlag::kTraceClockSourceThreadCpu; + if (need_wall && need_thread_cpu) { + return TraceClockSource::kDual; + } else if (need_wall) { + return TraceClockSource::kWall; + } else if (need_thread_cpu) { + return TraceClockSource::kThreadCpu; + } else { + return kDefaultTraceClockSource; + } +} + +} // namespace Trace::Trace(File* trace_file, size_t buffer_size, @@ -556,12 +751,17 @@ Trace::Trace(File* trace_file, TraceMode trace_mode) : trace_file_(trace_file), buf_(new uint8_t[std::max(kMinBufSize, buffer_size)]()), - flags_(flags), trace_output_mode_(output_mode), trace_mode_(trace_mode), - clock_source_(default_clock_source_), + flags_(flags), + trace_output_mode_(output_mode), + trace_mode_(trace_mode), + clock_source_(GetClockSourceFromFlags(flags)), buffer_size_(std::max(kMinBufSize, buffer_size)), - start_time_(MicroTime()), clock_overhead_ns_(GetClockOverheadNanoSeconds()), - overflow_(false), interval_us_(0), streaming_lock_(nullptr), - unique_methods_lock_(new Mutex("unique methods lock", kTracingUniqueMethodsLock)) { + start_time_(GetMicroTime(GetTimestamp())), + clock_overhead_ns_(GetClockOverheadNanoSeconds()), + overflow_(false), + interval_us_(0), + stop_tracing_(false), + tracing_lock_("tracing lock", LockLevel::kTracingStreamingLock) { CHECK_IMPLIES(trace_file == nullptr, output_mode == TraceOutputMode::kDDMS); uint16_t trace_version = GetTraceVersion(clock_source_); @@ -583,16 +783,15 @@ Trace::Trace(File* trace_file, cur_offset_.store(kTraceHeaderLength, std::memory_order_relaxed); if (output_mode == TraceOutputMode::kStreaming) { - streaming_lock_ = new Mutex("tracing lock", LockLevel::kTracingStreamingLock); - seen_threads_.reset(new ThreadIDBitSet()); + // Flush the header information to the file. We use a per thread buffer, so + // it is easier to just write the header information directly to file. + if (!trace_file_->WriteFully(buf_.get(), kTraceHeaderLength)) { + PLOG(WARNING) << "Failed streaming a tracing event."; + } + cur_offset_.store(0, std::memory_order_relaxed); } } -Trace::~Trace() { - delete streaming_lock_; - delete unique_methods_lock_; -} - static uint64_t ReadBytes(uint8_t* buf, size_t bytes) { uint64_t ret = 0; for (size_t i = 0; i < bytes; ++i) { @@ -605,6 +804,7 @@ void Trace::DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source uint8_t* ptr = buf + kTraceHeaderLength; uint8_t* end = buf + buf_size; + MutexLock mu(Thread::Current(), tracing_lock_); while (ptr < end) { uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid)); ArtMethod* method = DecodeTraceMethod(tmid); @@ -616,18 +816,12 @@ void Trace::DumpBuf(uint8_t* buf, size_t buf_size, TraceClockSource clock_source void Trace::FinishTracing() { size_t final_offset = 0; - std::set<ArtMethod*> visited_methods; - if (trace_output_mode_ == TraceOutputMode::kStreaming) { - // Clean up. - MutexLock mu(Thread::Current(), *streaming_lock_); - STLDeleteValues(&seen_methods_); - } else { + if (trace_output_mode_ != TraceOutputMode::kStreaming) { final_offset = cur_offset_.load(std::memory_order_relaxed); - GetVisitedMethods(final_offset, &visited_methods); } // Compute elapsed time. - uint64_t elapsed = MicroTime() - start_time_; + uint64_t elapsed = GetMicroTime(GetTimestamp()) - start_time_; std::ostringstream os; @@ -659,25 +853,25 @@ void Trace::FinishTracing() { os << StringPrintf("%cthreads\n", kTraceTokenChar); DumpThreadList(os); os << StringPrintf("%cmethods\n", kTraceTokenChar); - DumpMethodList(os, visited_methods); + DumpMethodList(os); os << StringPrintf("%cend\n", kTraceTokenChar); std::string header(os.str()); if (trace_output_mode_ == TraceOutputMode::kStreaming) { - // Protect access to buf_ and satisfy sanitizer for calls to WriteBuf / FlushBuf. - MutexLock mu(Thread::Current(), *streaming_lock_); + // It is expected that this method is called when all other threads are suspended, so there + // cannot be any writes to trace_file_ after finish tracing. // Write a special token to mark the end of trace records and the start of // trace summary. uint8_t buf[7]; Append2LE(buf, 0); buf[2] = kOpTraceSummary; Append4LE(buf + 3, static_cast<uint32_t>(header.length())); - WriteToBuf(buf, sizeof(buf)); // Write the trace summary. The summary is identical to the file header when // the output mode is not streaming (except for methods). - WriteToBuf(reinterpret_cast<const uint8_t*>(header.c_str()), header.length()); - // Flush the buffer, which may include some trace records before the summary. - FlushBuf(); + if (!trace_file_->WriteFully(buf, sizeof(buf)) || + !trace_file_->WriteFully(header.c_str(), header.length())) { + PLOG(WARNING) << "Failed streaming a tracing event."; + } } else { if (trace_file_.get() == nullptr) { std::vector<uint8_t> data; @@ -736,10 +930,9 @@ void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED, void Trace::MethodEntered(Thread* thread, ArtMethod* method) { uint32_t thread_clock_diff = 0; - uint32_t wall_clock_diff = 0; - ReadClocks(thread, &thread_clock_diff, &wall_clock_diff); - LogMethodTraceEvent(thread, method, instrumentation::Instrumentation::kMethodEntered, - thread_clock_diff, wall_clock_diff); + uint64_t timestamp_counter = 0; + ReadClocks(thread, &thread_clock_diff, ×tamp_counter); + LogMethodTraceEvent(thread, method, kTraceMethodEnter, thread_clock_diff, timestamp_counter); } void Trace::MethodExited(Thread* thread, @@ -747,24 +940,18 @@ void Trace::MethodExited(Thread* thread, instrumentation::OptionalFrame frame ATTRIBUTE_UNUSED, JValue& return_value ATTRIBUTE_UNUSED) { uint32_t thread_clock_diff = 0; - uint32_t wall_clock_diff = 0; - ReadClocks(thread, &thread_clock_diff, &wall_clock_diff); - LogMethodTraceEvent(thread, - method, - instrumentation::Instrumentation::kMethodExited, - thread_clock_diff, - wall_clock_diff); + uint64_t timestamp_counter = 0; + ReadClocks(thread, &thread_clock_diff, ×tamp_counter); + LogMethodTraceEvent(thread, method, kTraceMethodExit, thread_clock_diff, timestamp_counter); } void Trace::MethodUnwind(Thread* thread, - Handle<mirror::Object> this_object ATTRIBUTE_UNUSED, ArtMethod* method, uint32_t dex_pc ATTRIBUTE_UNUSED) { uint32_t thread_clock_diff = 0; - uint32_t wall_clock_diff = 0; - ReadClocks(thread, &thread_clock_diff, &wall_clock_diff); - LogMethodTraceEvent(thread, method, instrumentation::Instrumentation::kMethodUnwind, - thread_clock_diff, wall_clock_diff); + uint64_t timestamp_counter = 0; + ReadClocks(thread, &thread_clock_diff, ×tamp_counter); + LogMethodTraceEvent(thread, method, kTraceUnroll, thread_clock_diff, timestamp_counter); } void Trace::ExceptionThrown(Thread* thread ATTRIBUTE_UNUSED, @@ -790,7 +977,7 @@ void Trace::WatchedFramePop(Thread* self ATTRIBUTE_UNUSED, LOG(ERROR) << "Unexpected WatchedFramePop event in tracing"; } -void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wall_clock_diff) { +void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint64_t* timestamp_counter) { if (UseThreadCpuClock()) { uint64_t clock_base = thread->GetTraceClockBase(); if (UNLIKELY(clock_base == 0)) { @@ -802,90 +989,186 @@ void Trace::ReadClocks(Thread* thread, uint32_t* thread_clock_diff, uint32_t* wa } } if (UseWallClock()) { - *wall_clock_diff = MicroTime() - start_time_; + *timestamp_counter = GetTimestamp(); } } -bool Trace::RegisterMethod(ArtMethod* method) { - const DexFile* dex_file = method->GetDexFile(); - if (seen_methods_.find(dex_file) == seen_methods_.end()) { - seen_methods_.insert(std::make_pair(dex_file, new DexIndexBitSet())); - } - DexIndexBitSet* bit_set = seen_methods_.find(dex_file)->second; - if (!(*bit_set)[method->GetDexMethodIndex()]) { - bit_set->set(method->GetDexMethodIndex()); - return true; - } - return false; +std::string Trace::GetMethodLine(ArtMethod* method, uint32_t method_index) { + method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize); + return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", + (method_index << TraceActionBits), + PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), + method->GetName(), + method->GetSignature().ToString().c_str(), + method->GetDeclaringClassSourceFile()); } -bool Trace::RegisterThread(Thread* thread) { - pid_t tid = thread->GetTid(); - CHECK_LT(0U, static_cast<uint32_t>(tid)); - CHECK_LT(static_cast<uint32_t>(tid), kMaxThreadIdNumber); +void Trace::RecordStreamingMethodEvent(Thread* thread, + ArtMethod* method, + TraceAction action, + uint32_t thread_clock_diff, + uint64_t timestamp_counter) { + uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer(); + size_t* current_offset = thread->GetMethodTraceIndexPtr(); + // Initialize the buffer lazily. It's just simpler to keep the creation at one place. + if (method_trace_buffer == nullptr) { + method_trace_buffer = new uintptr_t[std::max(kMinBufSize, kPerThreadBufSize)](); + thread->SetMethodTraceBuffer(method_trace_buffer); + *current_offset = 0; + + // This is the first event from this thread, so first record information about the thread. + std::string thread_name; + thread->GetThreadName(thread_name); + static constexpr size_t kThreadNameHeaderSize = 7; + uint8_t header[kThreadNameHeaderSize]; + Append2LE(header, 0); + header[2] = kOpNewThread; + // We use only 16 bits to encode thread id. On Android, we don't expect to use more than + // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits. + // See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads + // is currently less than 65536. + // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding + // a map similar to method ids. + DCHECK(!kIsTargetBuild || thread->GetTid() < (1 << 16)); + Append2LE(header + 3, static_cast<uint16_t>(thread->GetTid())); + Append2LE(header + 5, static_cast<uint16_t>(thread_name.length())); + + { + MutexLock mu(Thread::Current(), tracing_lock_); + if (!trace_file_->WriteFully(header, kThreadNameHeaderSize) || + !trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(thread_name.c_str()), + thread_name.length())) { + PLOG(WARNING) << "Failed streaming a tracing event."; + } + } + } + + size_t required_entries = (clock_source_ == TraceClockSource::kDual) ? 4 : 3; + if (*current_offset + required_entries >= kPerThreadBufSize) { + // We don't have space for further entries. Flush the contents of the buffer and reuse the + // buffer to store contents. Reset the index to the start of the buffer. + FlushStreamingBuffer(thread); + *current_offset = 0; + } - if (!(*seen_threads_)[tid]) { - seen_threads_->set(tid); - return true; + // Record entry in per-thread trace buffer. + int current_index = *current_offset; + method_trace_buffer[current_index++] = reinterpret_cast<uintptr_t>(method); + // TODO(mythria): We only need two bits to record the action. Consider merging + // it with the method entry to save space. + method_trace_buffer[current_index++] = action; + if (UseThreadCpuClock()) { + method_trace_buffer[current_index++] = thread_clock_diff; + } + if (UseWallClock()) { + if (art::kRuntimePointerSize == PointerSize::k32) { + // On 32-bit architectures store timestamp counter as two 32-bit values. + method_trace_buffer[current_index++] = timestamp_counter >> 32; + method_trace_buffer[current_index++] = static_cast<uint32_t>(timestamp_counter); + } else { + method_trace_buffer[current_index++] = timestamp_counter; + } } - return false; + *current_offset = current_index; } -std::string Trace::GetMethodLine(ArtMethod* method) { - method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize); - return StringPrintf("%#x\t%s\t%s\t%s\t%s\n", (EncodeTraceMethod(method) << TraceActionBits), - PrettyDescriptor(method->GetDeclaringClassDescriptor()).c_str(), method->GetName(), - method->GetSignature().ToString().c_str(), method->GetDeclaringClassSourceFile()); -} - -void Trace::WriteToBuf(const uint8_t* src, size_t src_size) { - // Updates to cur_offset_ are done under the streaming_lock_ here as in streaming mode. - int32_t old_offset = cur_offset_.load(std::memory_order_relaxed); - int32_t new_offset = old_offset + static_cast<int32_t>(src_size); - if (dchecked_integral_cast<size_t>(new_offset) > buffer_size_) { - // Flush buffer. - if (!trace_file_->WriteFully(buf_.get(), old_offset)) { +void Trace::WriteToBuf(uint8_t* header, + size_t header_size, + const std::string& data, + size_t* current_index, + uint8_t* buffer, + size_t buffer_size) { + EnsureSpace(buffer, current_index, buffer_size, header_size); + memcpy(buffer + *current_index, header, header_size); + *current_index += header_size; + + EnsureSpace(buffer, current_index, buffer_size, data.length()); + if (data.length() < buffer_size) { + memcpy(buffer + *current_index, reinterpret_cast<const uint8_t*>(data.c_str()), data.length()); + *current_index += data.length(); + } else { + // The data is larger than buffer, so write directly to the file. EnsureSpace should have + // flushed any data in the buffer. + DCHECK_EQ(*current_index, 0U); + if (!trace_file_->WriteFully(reinterpret_cast<const uint8_t*>(data.c_str()), data.length())) { PLOG(WARNING) << "Failed streaming a tracing event."; } + } +} - // Check whether the data is too large for the buffer, then write immediately. - if (src_size >= buffer_size_) { - if (!trace_file_->WriteFully(src, src_size)) { - PLOG(WARNING) << "Failed streaming a tracing event."; +void Trace::FlushStreamingBuffer(Thread* thread) { + // Take a tracing_lock_ to serialize writes across threads. We also need to allocate a unique + // method id for each method. We do that by maintaining a map from id to method for each newly + // seen method. tracing_lock_ is required to serialize these. + MutexLock mu(Thread::Current(), tracing_lock_); + uintptr_t* method_trace_buffer = thread->GetMethodTraceBuffer(); + // Create a temporary buffer to encode the trace events from the specified thread. + size_t buffer_size = kPerThreadBufSize; + size_t current_index = 0; + std::unique_ptr<uint8_t[]> buffer(new uint8_t[std::max(kMinBufSize, buffer_size)]); + + size_t num_entries = *(thread->GetMethodTraceIndexPtr()); + for (size_t entry_index = 0; entry_index < num_entries;) { + ArtMethod* method = reinterpret_cast<ArtMethod*>(method_trace_buffer[entry_index++]); + TraceAction action = DecodeTraceAction(method_trace_buffer[entry_index++]); + uint32_t thread_time = 0; + uint32_t wall_time = 0; + if (UseThreadCpuClock()) { + thread_time = method_trace_buffer[entry_index++]; + } + if (UseWallClock()) { + uint64_t timestamp = method_trace_buffer[entry_index++]; + if (art::kRuntimePointerSize == PointerSize::k32) { + // On 32-bit architectures timestamp is stored as two 32-bit values. + timestamp = (timestamp << 32 | method_trace_buffer[entry_index++]); } - cur_offset_.store(0, std::memory_order_relaxed); // Buffer is empty now. - return; + wall_time = GetMicroTime(timestamp) - start_time_; + } + + auto it = art_method_id_map_.find(method); + uint32_t method_index = 0; + // If we haven't seen this method before record information about the method. + if (it == art_method_id_map_.end()) { + art_method_id_map_.emplace(method, current_method_index_); + method_index = current_method_index_; + current_method_index_++; + // Write a special block with the name. + std::string method_line(GetMethodLine(method, method_index)); + static constexpr size_t kMethodNameHeaderSize = 5; + uint8_t method_header[kMethodNameHeaderSize]; + DCHECK_LT(kMethodNameHeaderSize, kPerThreadBufSize); + Append2LE(method_header, 0); + method_header[2] = kOpNewMethod; + Append2LE(method_header + 3, static_cast<uint16_t>(method_line.length())); + WriteToBuf(method_header, + kMethodNameHeaderSize, + method_line, + ¤t_index, + buffer.get(), + buffer_size); + } else { + method_index = it->second; } - old_offset = 0; - new_offset = static_cast<int32_t>(src_size); + const size_t record_size = GetRecordSize(clock_source_); + DCHECK_LT(record_size, kPerThreadBufSize); + EnsureSpace(buffer.get(), ¤t_index, buffer_size, record_size); + EncodeEventEntry( + buffer.get() + current_index, thread, method_index, action, thread_time, wall_time); + current_index += record_size; } - cur_offset_.store(new_offset, std::memory_order_relaxed); - // Fill in data. - memcpy(buf_.get() + old_offset, src, src_size); -} -void Trace::FlushBuf() { - // Updates to cur_offset_ are done under the streaming_lock_ here as in streaming mode. - int32_t offset = cur_offset_.load(std::memory_order_relaxed); - if (!trace_file_->WriteFully(buf_.get(), offset)) { - PLOG(WARNING) << "Failed flush the remaining data in streaming."; + // Flush the contents of buffer to file. + if (!trace_file_->WriteFully(buffer.get(), current_index)) { + PLOG(WARNING) << "Failed streaming a tracing event."; } - cur_offset_.store(0, std::memory_order_relaxed); } -void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method, - instrumentation::Instrumentation::InstrumentationEvent event, - uint32_t thread_clock_diff, uint32_t wall_clock_diff) { - // This method is called in both tracing modes (method and - // sampling). In sampling mode, this method is only called by the - // sampling thread. In method tracing mode, it can be called - // concurrently. - - // Ensure we always use the non-obsolete version of the method so that entry/exit events have the - // same pointer value. - method = method->GetNonObsoleteMethod(); - +void Trace::RecordMethodEvent(Thread* thread, + ArtMethod* method, + TraceAction action, + uint32_t thread_clock_diff, + uint64_t timestamp_counter) { // Advance cur_offset_ atomically. int32_t new_offset; int32_t old_offset = 0; @@ -893,37 +1176,18 @@ void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method, // In the non-streaming case, we do a busy loop here trying to get // an offset to write our record and advance cur_offset_ for the // next use. - if (trace_output_mode_ != TraceOutputMode::kStreaming) { - // Although multiple threads can call this method concurrently, - // the compare_exchange_weak here is still atomic (by definition). - // A succeeding update is visible to other cores when they pass - // through this point. - old_offset = cur_offset_.load(std::memory_order_relaxed); // Speculative read - do { - new_offset = old_offset + GetRecordSize(clock_source_); - if (static_cast<size_t>(new_offset) > buffer_size_) { - overflow_ = true; - return; - } - } while (!cur_offset_.compare_exchange_weak(old_offset, new_offset, std::memory_order_relaxed)); - } - - TraceAction action = kTraceMethodEnter; - switch (event) { - case instrumentation::Instrumentation::kMethodEntered: - action = kTraceMethodEnter; - break; - case instrumentation::Instrumentation::kMethodExited: - action = kTraceMethodExit; - break; - case instrumentation::Instrumentation::kMethodUnwind: - action = kTraceUnroll; - break; - default: - UNIMPLEMENTED(FATAL) << "Unexpected event: " << event; - } - - uint32_t method_value = EncodeTraceMethodAndAction(method, action); + // Although multiple threads can call this method concurrently, + // the compare_exchange_weak here is still atomic (by definition). + // A succeeding update is visible to other cores when they pass + // through this point. + old_offset = cur_offset_.load(std::memory_order_relaxed); // Speculative read + do { + new_offset = old_offset + GetRecordSize(clock_source_); + if (static_cast<size_t>(new_offset) > buffer_size_) { + overflow_ = true; + return; + } + } while (!cur_offset_.compare_exchange_weak(old_offset, new_offset, std::memory_order_relaxed)); // Write data into the tracing buffer (if not streaming) or into a // small buffer on the stack (if streaming) which we'll put into the @@ -934,14 +1198,41 @@ void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method, // of FinishTracing() acquire locks and (implicitly) synchronise // the buffer memory. uint8_t* ptr; - static constexpr size_t kPacketSize = 14U; // The maximum size of data in a packet. - uint8_t stack_buf[kPacketSize]; // Space to store a packet when in streaming mode. + ptr = buf_.get() + old_offset; + uint32_t wall_clock_diff = GetMicroTime(timestamp_counter) - start_time_; + MutexLock mu(Thread::Current(), tracing_lock_); + EncodeEventEntry( + ptr, thread, EncodeTraceMethod(method), action, thread_clock_diff, wall_clock_diff); +} + +void Trace::LogMethodTraceEvent(Thread* thread, + ArtMethod* method, + TraceAction action, + uint32_t thread_clock_diff, + uint64_t timestamp_counter) { + // This method is called in both tracing modes (method and sampling). In sampling mode, this + // method is only called by the sampling thread. In method tracing mode, it can be called + // concurrently. + + // Ensure we always use the non-obsolete version of the method so that entry/exit events have the + // same pointer value. + method = method->GetNonObsoleteMethod(); + if (trace_output_mode_ == TraceOutputMode::kStreaming) { - ptr = stack_buf; + RecordStreamingMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter); } else { - ptr = buf_.get() + old_offset; + RecordMethodEvent(thread, method, action, thread_clock_diff, timestamp_counter); } +} +void Trace::EncodeEventEntry(uint8_t* ptr, + Thread* thread, + uint32_t method_index, + TraceAction action, + uint32_t thread_clock_diff, + uint32_t wall_clock_diff) { + static constexpr size_t kPacketSize = 14U; // The maximum size of data in a packet. + uint32_t method_value = (method_index << TraceActionBits) | action; Append2LE(ptr, thread->GetTid()); Append4LE(ptr + 2, method_value); ptr += 6; @@ -954,79 +1245,46 @@ void Trace::LogMethodTraceEvent(Thread* thread, ArtMethod* method, Append4LE(ptr, wall_clock_diff); } static_assert(kPacketSize == 2 + 4 + 4 + 4, "Packet size incorrect."); - - if (trace_output_mode_ == TraceOutputMode::kStreaming) { - MutexLock mu(Thread::Current(), *streaming_lock_); // To serialize writing. - if (RegisterMethod(method)) { - // Write a special block with the name. - std::string method_line(GetMethodLine(method)); - uint8_t buf2[5]; - Append2LE(buf2, 0); - buf2[2] = kOpNewMethod; - Append2LE(buf2 + 3, static_cast<uint16_t>(method_line.length())); - WriteToBuf(buf2, sizeof(buf2)); - WriteToBuf(reinterpret_cast<const uint8_t*>(method_line.c_str()), method_line.length()); - } - if (RegisterThread(thread)) { - // It might be better to postpone this. Threads might not have received names... - std::string thread_name; - thread->GetThreadName(thread_name); - uint8_t buf2[7]; - Append2LE(buf2, 0); - buf2[2] = kOpNewThread; - Append2LE(buf2 + 3, static_cast<uint16_t>(thread->GetTid())); - Append2LE(buf2 + 5, static_cast<uint16_t>(thread_name.length())); - WriteToBuf(buf2, sizeof(buf2)); - WriteToBuf(reinterpret_cast<const uint8_t*>(thread_name.c_str()), thread_name.length()); - } - WriteToBuf(stack_buf, sizeof(stack_buf)); - } } -void Trace::GetVisitedMethods(size_t buf_size, - std::set<ArtMethod*>* visited_methods) { - uint8_t* ptr = buf_.get() + kTraceHeaderLength; - uint8_t* end = buf_.get() + buf_size; - - while (ptr < end) { - uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid)); - ArtMethod* method = DecodeTraceMethod(tmid); - visited_methods->insert(method); - ptr += GetRecordSize(clock_source_); +void Trace::EnsureSpace(uint8_t* buffer, + size_t* current_index, + size_t buffer_size, + size_t required_size) { + if (*current_index + required_size < buffer_size) { + return; } -} -void Trace::DumpMethodList(std::ostream& os, const std::set<ArtMethod*>& visited_methods) { - for (const auto& method : visited_methods) { - os << GetMethodLine(method); + if (!trace_file_->WriteFully(buffer, *current_index)) { + PLOG(WARNING) << "Failed streaming a tracing event."; } + *current_index = 0; } -static void DumpThread(Thread* t, void* arg) { - std::ostream& os = *reinterpret_cast<std::ostream*>(arg); - std::string name; - t->GetThreadName(name); - os << t->GetTid() << "\t" << name << "\n"; +void Trace::DumpMethodList(std::ostream& os) { + MutexLock mu(Thread::Current(), tracing_lock_); + for (auto const& entry : art_method_id_map_) { + os << GetMethodLine(entry.first, entry.second); + } } void Trace::DumpThreadList(std::ostream& os) { - Thread* self = Thread::Current(); - for (const auto& it : exited_threads_) { - os << it.first << "\t" << it.second << "\n"; + for (const auto& it : threads_list_) { + // We use only 16 bits to encode thread id. On Android, we don't expect to use more than + // 16-bits for a Tid. For 32-bit platforms it is always ensured we use less than 16 bits. + // See __check_max_thread_id in bionic for more details. Even on 64-bit the max threads + // is currently less than 65536. + // TODO(mythria): On host, we know thread ids can be greater than 16 bits. Consider adding + // a map similar to method ids. + DCHECK(!kIsTargetBuild || it.first < (1 << 16)); + os << static_cast<uint16_t>(it.first) << "\t" << it.second << "\n"; } - Locks::thread_list_lock_->AssertNotHeld(self); - MutexLock mu(self, *Locks::thread_list_lock_); - Runtime::Current()->GetThreadList()->ForEach(DumpThread, &os); } void Trace::StoreExitingThreadInfo(Thread* thread) { MutexLock mu(thread, *Locks::trace_lock_); if (the_trace_ != nullptr) { - std::string name; - thread->GetThreadName(name); - // The same thread/tid may be used multiple times. As SafeMap::Put does not allow to override - // a previous mapping, use SafeMap::Overwrite. - the_trace_->exited_threads_.Overwrite(thread->GetTid(), name); + the_trace_->UpdateThreadsList(thread); } } @@ -1042,9 +1300,21 @@ Trace::TraceMode Trace::GetMode() { return the_trace_->trace_mode_; } +int Trace::GetFlags() { + MutexLock mu(Thread::Current(), *Locks::trace_lock_); + CHECK(the_trace_ != nullptr) << "Trace flags requested, but no trace currently running"; + return the_trace_->flags_; +} + +int Trace::GetIntervalInMillis() { + MutexLock mu(Thread::Current(), *Locks::trace_lock_); + CHECK(the_trace_ != nullptr) << "Trace interval requested, but no trace currently running"; + return the_trace_->interval_us_; +} + size_t Trace::GetBufferSize() { MutexLock mu(Thread::Current(), *Locks::trace_lock_); - CHECK(the_trace_ != nullptr) << "Trace mode requested, but no trace currently running"; + CHECK(the_trace_ != nullptr) << "Trace buffer size requested, but no trace currently running"; return the_trace_->buffer_size_; } |