Update method exit hooks from JITed code to not use Stack visitor
Using NthCallerStackVisitor is expensive since that involves decoding
method header and other tasks that are reasonably expensive especially
when called on every method exit. When calling method exit hooks from
JITed code a lot of this information like the frame_size, calling method
are already known and can be directly passed to the method exit hook
instead of computing them.
Locally this change improves the performance by 70% on debuggable-cc
config of golem benchmarks.
Bug: 253232638
Test: art/test.py
Change-Id: I3a1d80748c6d85e5fa1d3bd4aec0b29962ba0156
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index c1afdb8..3dfb741 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -199,6 +199,10 @@
return ArmManagedRegister::FromCoreRegister(R0);
}
+ManagedRegister ArmManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return ArmManagedRegister::FromCoreRegister(R2);
+}
+
void ArmManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
ManagedRuntimeCallingConvention::ResetIterator(displacement);
gpr_index_ = 1u; // Skip r0 for ArtMethod*
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 4526d9e..353e3cf 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -39,6 +39,7 @@
void ResetIterator(FrameOffset displacement) override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
void Next() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index ec77db3..3f9ed50 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -174,6 +174,10 @@
return Arm64ManagedRegister::FromXRegister(X0);
}
+ManagedRegister Arm64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return Arm64ManagedRegister::FromXRegister(X4);
+}
+
bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
if (IsCurrentParamAFloatOrDouble()) {
return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 176271e..b948bbe 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -35,6 +35,7 @@
ManagedRegister ReturnRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index e2f3bfb..8fe8e00 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -244,6 +244,11 @@
// Register that holds the incoming method argument
virtual ManagedRegister MethodRegister() = 0;
+ // Register that is used to pass frame size for method exit hook call. This
+ // shouldn't be the same as the return register since method exit hook also expects
+ // return values in the return register.
+ virtual ManagedRegister ArgumentRegisterForMethodExitHook() = 0;
+
// Iterator interface
bool HasNext();
virtual void Next();
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index d520daa..a1ccabf 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -689,6 +689,7 @@
// Method exit hooks is called just before tearing down the frame. So there are no live
// registers and we can directly call the method exit hook and don't need a Jni specific
// entrypoint.
+ __ Move(mr_conv->ArgumentRegisterForMethodExitHook(), managed_frame_size);
__ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pMethodExitHook));
__ Jump(method_exit_hook_return.get());
}
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 65be92c..b56d0a1 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -143,6 +143,10 @@
return X86ManagedRegister::FromCpuRegister(EAX);
}
+ManagedRegister X86ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return X86ManagedRegister::FromCpuRegister(EBX);
+}
+
void X86ManagedRuntimeCallingConvention::ResetIterator(FrameOffset displacement) {
ManagedRuntimeCallingConvention::ResetIterator(displacement);
gpr_arg_count_ = 1u; // Skip EAX for ArtMethod*
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index cd7ef5b..ebcd266 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -37,6 +37,7 @@
void ResetIterator(FrameOffset displacement) override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
void Next() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 862ee5e..8a472b3 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -147,6 +147,10 @@
return X86_64ManagedRegister::FromCpuRegister(RDI);
}
+ManagedRegister X86_64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+ return X86_64ManagedRegister::FromCpuRegister(R8);
+}
+
bool X86_64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
if (IsCurrentParamAFloatOrDouble()) {
return itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index 483f1f5..67d63b8 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -35,6 +35,7 @@
ManagedRegister ReturnRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
+ ManagedRegister ArgumentRegisterForMethodExitHook() override;
bool IsCurrentParamInRegister() override;
bool IsCurrentParamOnStack() override;
ManagedRegister CurrentParamRegister() override;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cc367f..72ca927 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -824,6 +824,9 @@
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ Mov(vixl::aarch64::x4, arm64_codegen->GetFrameSize());
+ }
arm64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2a9bc39..03a9977 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -973,6 +973,10 @@
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load frame size to pass to the exit hooks
+ __ Mov(vixl::aarch32::Register(R2), arm_codegen->GetFrameSize());
+ }
arm_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ B(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index a14ea8b..5cc7270 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -966,6 +966,9 @@
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ __ movl(EBX, Immediate(codegen->GetFrameSize()));
+ }
x86_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 3d0f35d..33c9ae4 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -985,6 +985,10 @@
(instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
+ if (instruction_->IsMethodExitHook()) {
+ // Load FrameSize to pass to the exit hook.
+ __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
+ }
x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 2939c54..2fde783 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -878,6 +878,11 @@
}
}
+void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, size_t value) {
+ ArmManagedRegister dst = mdst.AsArm();
+ ___ Mov(AsVIXLRegister(dst), static_cast<uint32_t>(value));
+}
+
void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, size_t size) {
DCHECK(size == 4 || size == 8) << size;
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 5965552..2b331af 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -100,6 +100,8 @@
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
+
void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
void CopyRawPtrToThread(ThreadOffset32 thr_offs,
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index f0ade42..88a1b37 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -643,6 +643,12 @@
}
}
+void Arm64JNIMacroAssembler::Move(ManagedRegister m_dst, size_t value) {
+ Arm64ManagedRegister dst = m_dst.AsArm64();
+ DCHECK(dst.IsXRegister());
+ ___ Mov(reg_x(dst.AsXRegister()), value);
+}
+
void Arm64JNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 tr_offs) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register scratch = temps.AcquireX();
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 9d3e821..762fe68 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -92,6 +92,7 @@
ArrayRef<ArgumentLocation> srcs,
ArrayRef<FrameOffset> refs) override;
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
override;
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 36de012..a91176c 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -215,6 +215,8 @@
ManagedRegister scratch,
size_t size) = 0;
+ virtual void Move(ManagedRegister dst, size_t value) = 0;
+
virtual void MemoryBarrier(ManagedRegister scratch) = 0;
// Sign extension
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index e292c5b..a1c874e 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -412,6 +412,11 @@
}
}
+void X86JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+ X86ManagedRegister dest = mdest.AsX86();
+ __ movl(dest.AsCpuRegister(), Immediate(value));
+}
+
void X86JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
Register scratch = GetScratchRegister();
__ movl(scratch, Address(ESP, src));
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 571b213..1a1bc13 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -92,6 +92,8 @@
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
+
void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs) override;
void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 8115911..7710dfb 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -484,6 +484,12 @@
}
}
+
+void X86_64JNIMacroAssembler::Move(ManagedRegister mdest, size_t value) {
+ X86_64ManagedRegister dest = mdest.AsX86_64();
+ __ movq(dest.AsCpuRegister(), Immediate(value));
+}
+
void X86_64JNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src) {
CpuRegister scratch = GetScratchRegister();
__ movl(scratch, Address(CpuRegister(RSP), src));
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 04c6bfc..827e1cc 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -95,6 +95,8 @@
void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+ void Move(ManagedRegister dest, size_t value) override;
+
void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs) override;
void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch)