summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Mythri Alle <mythria@google.com> 2024-07-09 10:31:04 +0000
committer Mythri Alle <mythria@google.com> 2024-07-31 10:15:19 +0000
commit1793c0985d065dd00c0c6ab16af5244f25c4080b (patch)
tree21442b207cffda30dfdfd1aad28ac329f5ac52ba
parent2339531e3a5865f91686a1dc6e0fc24b23936936 (diff)
Add support for the experimental on-demand tracing
This is to support on-demand tracing. This is behind a flag that is disabled by default. This is the initial CL that adds support to ART. There will be a followup CL to add an API that can be used from the frameworks to request a trace of dex methods that got executed. This is different from method tracing in two ways: 1. Method tracing is precise whereas this traces on a best effort basis. 2. Unlike method tracing this uses a circular buffer so can only trace a limited window into the past. Bug: 352518093 Test: art/test.py Change-Id: I8d958dd2ccefe8205a6c05b4daf339ea71b5dbc4
-rw-r--r--compiler/optimizing/code_generator_arm64.cc50
-rw-r--r--compiler/optimizing/code_generator_arm64.h1
-rw-r--r--dex2oat/linker/oat_writer_test.cc2
-rw-r--r--runtime/arch/arm64/entrypoints_init_arm64.cc10
-rw-r--r--runtime/arch/arm64/quick_entrypoints_arm64.S31
-rw-r--r--runtime/entrypoints/quick/quick_default_init_entrypoints.h4
-rw-r--r--runtime/entrypoints/quick/quick_entrypoints_list.h4
-rw-r--r--runtime/entrypoints_order_test.cc4
-rw-r--r--runtime/oat/oat.h4
-rw-r--r--runtime/runtime_globals.h3
-rw-r--r--runtime/trace.h4
-rw-r--r--tools/cpp-define-generator/thread.def6
12 files changed, 118 insertions, 5 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 3ec67afce9..8d5d1a791c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -818,6 +818,33 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
};
+class TracingMethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ explicit TracingMethodEntryExitHooksSlowPathARM64(bool is_method_entry)
+ : SlowPathCodeARM64(/* instruction= */ nullptr), is_method_entry_(is_method_entry) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ QuickEntrypointEnum entry_point =
+ (is_method_entry_) ? kQuickRecordEntryTraceEvent : kQuickRecordExitTraceEvent;
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ vixl::aarch64::Label call;
+ __ Bind(GetEntryLabel());
+ uint32_t entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entry_point).Int32Value();
+ __ Ldr(lr, MemOperand(tr, entrypoint_offset));
+ __ Blr(lr);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "TracingMethodEntryExitHooksSlowPath";
+ }
+
+ private:
+ const bool is_method_entry_;
+
+ DISALLOW_COPY_AND_ASSIGN(TracingMethodEntryExitHooksSlowPathARM64);
+};
+
class MethodEntryExitHooksSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit MethodEntryExitHooksSlowPathARM64(HInstruction* instruction)
@@ -1289,6 +1316,25 @@ void InstructionCodeGeneratorARM64::VisitMethodEntryHook(HMethodEntryHook* instr
GenerateMethodEntryExitHook(instruction);
}
+void CodeGeneratorARM64::MaybeRecordTraceEvent(bool is_method_entry) {
+ if (!kAlwaysEnableProfileCode) {
+ return;
+ }
+
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+ Register addr = temps.AcquireX();
+ CHECK(addr.Is(vixl::aarch64::x16));
+
+ SlowPathCodeARM64* slow_path =
+ new (GetScopedAllocator()) TracingMethodEntryExitHooksSlowPathARM64(is_method_entry);
+ AddSlowPath(slow_path);
+
+ __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
+ __ Cbnz(addr, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void CodeGeneratorARM64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
MacroAssembler* masm = GetVIXLAssembler();
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
@@ -1443,6 +1489,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
__ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
}
+
+ MaybeRecordTraceEvent(/* is_method_entry= */ true);
}
MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
@@ -1451,6 +1499,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
void CodeGeneratorARM64::GenerateFrameExit() {
GetAssembler()->cfi().RememberState();
if (!HasEmptyFrame()) {
+ MaybeRecordTraceEvent(/* is_method_entry= */ false);
+
int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 04694ece3e..53ec32c427 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -1038,6 +1038,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
void MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry);
+ void MaybeRecordTraceEvent(bool is_method_entry);
bool CanUseImplicitSuspendCheck() const;
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 4bcf29804c..5683e8b2c4 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -502,7 +502,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
EXPECT_EQ(68U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(4U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(170 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+ EXPECT_EQ(172 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
sizeof(QuickEntryPoints));
}
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 41876ec0f1..b4e9c65746 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -76,6 +76,9 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Obj
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*);
extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*);
+extern "C" void art_quick_record_entry_trace_event();
+extern "C" void art_quick_record_exit_trace_event();
+
void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) {
// ARM64 is the architecture with the largest number of core
// registers (32) that supports the read barrier configuration.
@@ -193,6 +196,13 @@ void InitEntryPoints(JniEntryPoints* jpoints,
UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false);
qpoints->SetReadBarrierSlow(artReadBarrierSlow);
qpoints->SetReadBarrierForRootSlow(artReadBarrierForRootSlow);
+
+ if (kAlwaysEnableProfileCode) {
+ // These are used for always-on-tracing, currently only supported on arm64
+ // devices.
+ qpoints->SetRecordEntryTraceEvent(art_quick_record_entry_trace_event);
+ qpoints->SetRecordExitTraceEvent(art_quick_record_exit_trace_event);
+ }
}
} // namespace art
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 3c2445ce2c..8b1aef8494 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2491,6 +2491,37 @@ ENTRY art_quick_compile_optimized
ret
END art_quick_compile_optimized
+ENTRY art_quick_record_entry_trace_event
+ ldr xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET]
+ // xIP0 has the trace buffer pointer. This is loaded on the fast path before
+ // checking if we need to call this method. This will be still valid here.
+ cmp xIP1, xIP0
+ bhs .Lupdate_entry
+ mov xIP1, #TRACE_BUFFER_SIZE
+ add xIP1, xIP0, xIP1
+.Lupdate_entry:
+ str x0, [xIP1]
+ sub xIP1, xIP1, 8
+ str xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET]
+ ret
+END art_quick_record_entry_trace_event
+
+ENTRY art_quick_record_exit_trace_event
+ ldr xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET]
+ // xIP0 has the trace buffer pointer. This is loaded on the fast path before
+ // checking if we need to call this method. This will be still valid here.
+ cmp xIP1, xIP0
+ bhs .Lupdate_entry_exit
+ mov xIP1, #TRACE_BUFFER_SIZE
+ add xIP1, xIP0, xIP1
+.Lupdate_entry_exit:
+ mov xIP0, #1
+ str xIP0, [xIP1]
+ sub xIP1, xIP1, 8
+ str xIP1, [xSELF, #TRACE_BUFFER_CURRENT_OFFSET]
+ ret
+END art_quick_record_exit_trace_event
+
.extern artMethodEntryHook
ENTRY art_quick_method_entry_hook
SETUP_SAVE_EVERYTHING_FRAME
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 46840e60ed..05b4bd7c2f 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -136,6 +136,10 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints,
qpoints->SetMethodEntryHook(art_quick_method_entry_hook);
qpoints->SetMethodExitHook(art_quick_method_exit_hook);
+ // These are used for on-demand-tracing, currently only supported on arm64 devices.
+ qpoints->SetRecordEntryTraceEvent(nullptr);
+ qpoints->SetRecordExitTraceEvent(nullptr);
+
if (monitor_jni_entry_exit) {
qpoints->SetJniMethodStart(art_jni_monitored_method_start);
qpoints->SetJniMethodEnd(art_jni_monitored_method_end);
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index aa3360e1a4..be417964a8 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -208,7 +208,9 @@
V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) \
\
V(MethodEntryHook, void, ArtMethod*, Thread*) \
- V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*)
+ V(MethodExitHook, int32_t, Thread*, ArtMethod*, uint64_t*, uint64_t*) \
+ V(RecordEntryTraceEvent, void) \
+ V(RecordExitTraceEvent, void)
#endif // ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_
#undef ART_RUNTIME_ENTRYPOINTS_QUICK_QUICK_ENTRYPOINTS_LIST_H_ // #define is only for lint.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 090484eabb..d4014162c4 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -414,8 +414,10 @@ class EntrypointsOrderTest : public CommonArtTest {
EXPECT_OFFSET_DIFFNP(
QuickEntryPoints, pReadBarrierForRootSlow, pMethodEntryHook, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodEntryHook, pMethodExitHook, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMethodExitHook, pRecordEntryTraceEvent, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pRecordEntryTraceEvent, pRecordExitTraceEvent, sizeof(void*));
- CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pMethodExitHook) + sizeof(void*) ==
+ CHECKED(OFFSETOF_MEMBER(QuickEntryPoints, pRecordExitTraceEvent) + sizeof(void*) ==
sizeof(QuickEntryPoints),
QuickEntryPoints_all);
}
diff --git a/runtime/oat/oat.h b/runtime/oat/oat.h
index b850fe8dd5..cd7df60ca5 100644
--- a/runtime/oat/oat.h
+++ b/runtime/oat/oat.h
@@ -44,8 +44,8 @@ std::ostream& operator<<(std::ostream& stream, StubType stub_type);
class EXPORT PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Implement `HLoadClass::LoadKind::kAppImageRelRo`.
- static constexpr std::array<uint8_t, 4> kOatVersion{{'2', '4', '4', '\0'}};
+ // Last oat version changed reason: Adding new entrypoints for on demand tracing.
+ static constexpr std::array<uint8_t, 4> kOatVersion{{'2', '4', '5', '\0'}};
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/runtime_globals.h b/runtime/runtime_globals.h
index dc69063b97..9968897afc 100644
--- a/runtime/runtime_globals.h
+++ b/runtime/runtime_globals.h
@@ -28,6 +28,9 @@ namespace art HIDDEN {
// Size of Dex virtual registers.
static constexpr size_t kVRegSize = 4;
+// Should we always generate code to trace executed dex methods.
+static constexpr bool kAlwaysEnableProfileCode = false;
+
#ifdef ART_PAGE_SIZE_AGNOSTIC
// Accessor for the page size constant local to the libart.
//
diff --git a/runtime/trace.h b/runtime/trace.h
index 57f79f6761..b02bdc36e4 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -94,6 +94,10 @@ std::ostream& operator<<(std::ostream& os, TracingMode rhs);
//
// All values are stored in little-endian order.
+// TODO(mythria): A randomly chosen value. Tune it later based on the number of
+// entries required in the buffer.
+static constexpr size_t kAlwaysOnTraceBufSize = 2048;
+
enum TraceAction {
kTraceMethodEnter = 0x00, // method entry
kTraceMethodExit = 0x01, // method exit
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index 5cc5f7106e..0d1860d549 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -17,6 +17,7 @@
#if ASM_DEFINE_INCLUDE_DEPENDENCIES
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "thread.h"
+#include "trace.h"
#endif
ASM_DEFINE(THREAD_CARD_TABLE_OFFSET,
@@ -71,3 +72,8 @@ ASM_DEFINE(THREAD_SHARED_METHOD_HOTNESS_OFFSET,
art::Thread::SharedMethodHotnessOffset<art::kRuntimePointerSize>().Int32Value())
ASM_DEFINE(THREAD_TID_OFFSET,
art::Thread::TidOffset<art::kRuntimePointerSize>().Int32Value())
+ASM_DEFINE(TRACE_BUFFER_INIT_OFFSET,
+ art::Thread::TraceBufferPtrOffset<art::kRuntimePointerSize>().Int32Value())
+ASM_DEFINE(TRACE_BUFFER_CURRENT_OFFSET,
+ art::Thread::TraceBufferCurrPtrOffset<art::kRuntimePointerSize>().Int32Value())
+ASM_DEFINE(TRACE_BUFFER_SIZE, (art::kAlwaysOnTraceBufSize - 1) * sizeof(uintptr_t))