diff options
author | 2024-07-09 10:31:04 +0000 | |
---|---|---|
committer | 2024-07-31 10:15:19 +0000 | |
commit | 1793c0985d065dd00c0c6ab16af5244f25c4080b (patch) | |
tree | 21442b207cffda30dfdfd1aad28ac329f5ac52ba | |
parent | 2339531e3a5865f91686a1dc6e0fc24b23936936 (diff) |
Add support for the experimental on-demand tracing
This is to support on-demand tracing. This is behind a flag that is
disabled by default. This is the initial CL that adds support to ART.
There will be a followup CL to add an API that can be used from the
frameworks to request a trace of dex methods that got executed. This is
different from method tracing in two ways:
1. Method tracing is precise whereas this traces on a best effort basis.
2. Unlike method tracing this uses a circular buffer so can only trace a
limited window into the past.
Bug: 352518093
Test: art/test.py
Change-Id: I8d958dd2ccefe8205a6c05b4daf339ea71b5dbc4
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 50 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 1 | ||||
-rw-r--r-- | dex2oat/linker/oat_writer_test.cc | 2 | ||||
-rw-r--r-- | runtime/arch/arm64/entrypoints_init_arm64.cc | 10 | ||||
-rw-r--r-- | runtime/arch/arm64/quick_entrypoints_arm64.S | 31 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_default_init_entrypoints.h | 4 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_entrypoints_list.h | 4 | ||||
-rw-r--r-- | runtime/entrypoints_order_test.cc | 4 | ||||
-rw-r--r-- | runtime/oat/oat.h | 4 | ||||
-rw-r--r-- | runtime/runtime_globals.h | 3 | ||||
-rw-r--r-- | runtime/trace.h | 4 | ||||
-rw-r--r-- | tools/cpp-define-generator/thread.def | 6 |
12 files changed, 118 insertions, 5 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 3ec67afce9..8d5d1a791c 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -818,6 +818,33 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); }; +class TracingMethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit TracingMethodEntryExitHooksSlowPathARM64(bool is_method_entry) + : SlowPathCodeARM64(/* instruction= */ nullptr), is_method_entry_(is_method_entry) {} + + void EmitNativeCode(CodeGenerator* codegen) override { + QuickEntrypointEnum entry_point = + (is_method_entry_) ? kQuickRecordEntryTraceEvent : kQuickRecordExitTraceEvent; + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + vixl::aarch64::Label call; + __ Bind(GetEntryLabel()); + uint32_t entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entry_point).Int32Value(); + __ Ldr(lr, MemOperand(tr, entrypoint_offset)); + __ Blr(lr); + __ B(GetExitLabel()); + } + + const char* GetDescription() const override { + return "TracingMethodEntryExitHooksSlowPath"; + } + + private: + const bool is_method_entry_; + + DISALLOW_COPY_AND_ASSIGN(TracingMethodEntryExitHooksSlowPathARM64); +}; + class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 { public: explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction) @@ -1289,6 +1316,25 @@ void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instr GenerateMethodEntryExitHook(instruction); } +void CodeGeneratorARM64::MaybeRecordTraceEvent(bool is_method_entry) { + if (!kAlwaysEnableProfileCode) { + return; + } + + MacroAssembler* masm = GetVIXLAssembler(); + UseScratchRegisterScope temps(masm); + Register addr = temps.AcquireX(); + CHECK(addr.Is(vixl::aarch64::x16)); + + SlowPathCodeARM64* slow_path = + new (GetScopedAllocator()) TracingMethodEntryExitHooksSlowPathARM64(is_method_entry); + AddSlowPath(slow_path); + + __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue())); + __ Cbnz(addr, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); if (GetCompilerOptions().CountHotnessInCompiledCode()) { @@ -1443,6 +1489,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() { Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } + + MaybeRecordTraceEvent(/* is_method_entry= */ true); } MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); @@ -1451,6 +1499,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() { void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { + MaybeRecordTraceEvent(/* is_method_entry= */ false); + int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize()); uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 04694ece3e..53ec32c427 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -1038,6 +1038,7 @@ class CodeGeneratorARM64 : public CodeGenerator { void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass); void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry); + void MaybeRecordTraceEvent(bool is_method_entry); bool CanUseImplicitSuspendCheck() const; diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 4bcf29804c..5683e8b2c4 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -502,7 +502,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(68U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(4U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(170 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(172 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 41876ec0f1..b4e9c65746 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -76,6 +76,9 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Obj extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); +extern "C" void art_quick_record_entry_trace_event(); +extern "C" void art_quick_record_exit_trace_event(); + void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { // ARM64 is the architecture with the largest number of core // registers (32) that supports the read barrier configuration. @@ -193,6 +196,13 @@ void InitEntryPoints(JniEntryPoints* jpoints, UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false); qpoints->SetReadBarrierSlow(artReadBarrierSlow); qpoints->SetReadBarrierForRootSlow(artReadBarrierForRootSlow); + + if (kAlwaysEnableProfileCode) { + // These are used for always-on-tracing, currently only supported on arm64 + // devices. + qpoints->SetRecordEntryTraceEvent(art_quick_record_entry_trace_event); + qpoints->SetRecordExitTraceEvent(art_quick_record_exit_trace_event); + } } } // namespace art diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 3c2445ce2c..8b1aef8494 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2491,6 +2491,37 @@ ENTRY art_quick_compile_optimized ret END art_quick_compile_optimized +ENTRY art_quick_record_entry_trace_event + ldr xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET] + // xIP0 has the trace buffer pointer. This is loaded on the fast path before + // checking if we need to call this method. This will be still valid here. + cmp xIP1, xIP0 + bhs .Lupdate_entry + mov xIP1, #TRACE_BUFFER_SIZE + add xIP1, xIP0, xIP1 +.Lupdate_entry: + str x0, [xIP1] + sub xIP1, xIP1, 8 + str xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET] + ret +END art_quick_record_entry_trace_event + +ENTRY art_quick_record_exit_trace_event + ldr xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET] + // xIP0 has the trace buffer pointer. This is loaded on the fast path before + // checking if we need to call this method. This will be still valid here. + cmp xIP1, xIP0 + bhs .Lupdate_entry_exit + mov xIP1, #TRACE_BUFFER_SIZE + add xIP1, xIP0, xIP1 +.Lupdate_entry_exit: + mov xIP0, #1 + str xIP0, [xIP1] + sub xIP1, xIP1, 8 + str xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET] + ret +END art_quick_record_exit_trace_event + .extern artMethodEntryHook ENTRY art_quick_method_entry_hook SETUP_SAVE_EVERYTHING_FRAME diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 46840e60ed..05b4bd7c2f 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -136,6 +136,10 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, qpoints->SetMethodEntryHook(art_quick_method_entry_hook); qpoints->SetMethodExitHook(art_quick_method_exit_hook); + // These are used for on-demand-tracing, currently only supported on arm64 devices. + qpoints->SetRecordEntryTraceEvent(nullptr); + qpoints->SetRecordExitTraceEvent(nullptr); + if (monitor_jni_entry_exit) { qpoints->SetJniMethodStart(art_jni_monitored_method_start); qpoints->SetJniMethodEnd(art_jni_monitored_method_end); diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index aa3360e1a4..be417964a8 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -208,7 +208,9 @@ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \ \ V(MethodEntryHook, void, ArtMethod*, Thread*) \ - V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) + V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) \ + V(RecordEntryTraceEvent, void) \ + V(RecordExitTraceEvent, void) #endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ #undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint. diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 090484eabb..d4014162c4 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -414,8 +414,10 @@ class EntrypointsOrderTest : public CommonArtTest { EXPECT_OFFSET_DIFFNP( QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodExitHook, pRecordEntryTraceEvent, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pRecordEntryTraceEvent, pRecordExitTraceEvent, sizeof(void*)); - CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) == + CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pRecordExitTraceEvent) + sizeof(void*) == sizeof(QuickEntryPoints), QuickEntryPoints_all); } diff --git a/runtime/oat/oat.h b/runtime/oat/oat.h index b850fe8dd5..cd7df60ca5 100644 --- a/runtime/oat/oat.h +++ b/runtime/oat/oat.h @@ -44,8 +44,8 @@ std::ostream& operator<<(std::ostream& stream, StubType stub_type); class EXPORT PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: Implement `HLoadClass::LoadKind::kAppImageRelRo`. - static constexpr std::array<uint8_t, 4> kOatVersion{{'2', '4', '4', '\0'}}; + // Last oat version changed reason: Adding new entrypoints for on demand tracing. + static constexpr std::array<uint8_t, 4> kOatVersion{{'2', '4', '5', '\0'}}; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/runtime_globals.h b/runtime/runtime_globals.h index dc69063b97..9968897afc 100644 --- a/runtime/runtime_globals.h +++ b/runtime/runtime_globals.h @@ -28,6 +28,9 @@ namespace art HIDDEN { // Size of Dex virtual registers. static constexpr size_t kVRegSize = 4; +// Should we always generate code to trace executed dex methods. +static constexpr bool kAlwaysEnableProfileCode = false; + #ifdef ART_PAGE_SIZE_AGNOSTIC // Accessor for the page size constant local to the libart. // diff --git a/runtime/trace.h b/runtime/trace.h index 57f79f6761..b02bdc36e4 100644 --- a/runtime/trace.h +++ b/runtime/trace.h @@ -94,6 +94,10 @@ std::ostream& operator<<(std::ostream& os, TracingMode rhs); // // All values are stored in little-endian order. +// TODO(mythria): A randomly chosen value. Tune it later based on the number of +// entries required in the buffer. +static constexpr size_t kAlwaysOnTraceBufSize = 2048; + enum TraceAction { kTraceMethodEnter = 0x00, // method entry kTraceMethodExit = 0x01, // method exit diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def index 5cc5f7106e..0d1860d549 100644 --- a/tools/cpp-define-generator/thread.def +++ b/tools/cpp-define-generator/thread.def @@ -17,6 +17,7 @@ #if ASM_DEFINE_INCLUDE_DEPENDENCIES #include "entrypoints/quick/quick_entrypoints_enum.h" #include "thread.h" +#include "trace.h" #endif ASM_DEFINE(THREAD_CARD_TABLE_OFFSET, @@ -71,3 +72,8 @@ ASM_DEFINE(THREAD_SHARED_METHOD_HOTNESS_OFFSET, art::Thread::SharedMethodHotnessOffset<art::kRuntimePointerSize>().Int32Value()) ASM_DEFINE(THREAD_TID_OFFSET, art::Thread::TidOffset<art::kRuntimePointerSize>().Int32Value()) +ASM_DEFINE(TRACE_BUFFER_INIT_OFFSET, + art::Thread::TraceBufferPtrOffset<art::kRuntimePointerSize>().Int32Value()) +ASM_DEFINE(TRACE_BUFFER_CURRENT_OFFSET, + art::Thread::TraceBufferCurrPtrOffset<art::kRuntimePointerSize>().Int32Value()) +ASM_DEFINE(TRACE_BUFFER_SIZE, (art::kAlwaysOnTraceBufSize - 1) * sizeof(uintptr_t)) |