Direct calls to @CriticalNative methods.
Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.
Golem results:
art-opt-cc x86 x86-64 arm arm64
NativeDowncallStaticCritical +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt x86 x86-64 arm arm64
NativeDowncallStaticCritical +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%
Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
--optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 2db1390..685e1e2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -314,6 +314,12 @@
}
ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig;
+ // Make sure the test class is visibly initialized so that the RegisterNatives() below
+ // sets the JNI entrypoint rather than leaving it as null (this test pretends to be an
+ // AOT compiler and therefore the ClassLinker skips entrypoint initialization). Even
+ // if the ClassLinker initialized it with a stub, we would not want to test that here.
+ class_linker_->MakeInitializedClassesVisiblyInitialized(Thread::Current(), /*wait=*/ true);
+
if (native_fnptr != nullptr) {
JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } };
ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1))
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index d07ab98..7afa8b1 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -420,7 +420,7 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t ArmJniCallingConvention::OutArgSize() const {
+size_t ArmJniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
// Account for arguments passed through r0-r3. (No FP args, AAPCS32 is soft-float.)
@@ -440,7 +440,7 @@
}
size_t out_args_size = RoundUp(size, kAapcsStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -512,9 +512,9 @@
CHECK_GE(itr_slots_, kJniArgumentRegisterCount);
size_t offset =
displacement_.Int32Value()
- - OutArgSize()
+ - OutFrameSize()
+ ((itr_slots_ - kJniArgumentRegisterCount) * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -537,7 +537,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool ArmJniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 7896d64..38f7184 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -65,7 +65,7 @@
// JNI calling convention
void Next() override; // Override default behavior for AAPCS
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 32da141..06796c1 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -265,20 +265,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t Arm64JniCallingConvention::OutArgSize() const {
+size_t Arm64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through v0-v7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer and pointer) arguments passed through GPR (x0-x7).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
static_assert((kCoreCalleeSpillMask & ~kAapcs64CoreCalleeSpillMask) == 0u);
@@ -291,7 +285,7 @@
}
size_t out_args_size = RoundUp(size, kAapcs64StackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -355,8 +349,8 @@
static_cast<size_t>(itr_float_and_doubles_))
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
@@ -378,7 +372,7 @@
// Whether to use tail call (used only for @CriticalNative).
bool Arm64JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == 0u;
+ return OutFrameSize() == 0u;
}
} // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 7beca08..d381d9d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index b4396f0..005ae91 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -303,9 +303,9 @@
// always at the bottom of a frame, but this doesn't work for outgoing
// native args). Includes alignment.
virtual size_t FrameSize() const = 0;
- // Size of outgoing arguments (stack portion), including alignment.
+ // Size of outgoing frame, i.e. stack arguments, @CriticalNative return PC if needed, alignment.
// -- Arguments that are passed via registers are excluded from this size.
- virtual size_t OutArgSize() const = 0;
+ virtual size_t OutFrameSize() const = 0;
// Number of references in stack indirect reference table
size_t ReferenceCount() const;
// Location where the segment state of the local indirect reference table is saved
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 036cdbb..913a3ba 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -220,7 +220,7 @@
// 1. Build the frame saving all callee saves, Method*, and PC return address.
// For @CriticalNative, this includes space for out args, otherwise just the managed frame.
const size_t managed_frame_size = main_jni_conv->FrameSize();
- const size_t main_out_arg_size = main_jni_conv->OutArgSize();
+ const size_t main_out_arg_size = main_jni_conv->OutFrameSize();
size_t current_frame_size = is_critical_native ? main_out_arg_size : managed_frame_size;
ManagedRegister method_register =
is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
@@ -582,7 +582,7 @@
if (LIKELY(!is_critical_native)) {
// Increase frame size for out args if needed by the end_jni_conv.
- const size_t end_out_arg_size = end_jni_conv->OutArgSize();
+ const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
if (end_out_arg_size > current_out_arg_size) {
size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
current_out_arg_size = end_out_arg_size;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 6776f12..df45627 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -220,11 +220,10 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86JniCallingConvention::OutArgSize() const {
- // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
- size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
- // The size of outgoiong arguments.
- size_t size = all_args * kFramePointerSize;
+size_t X86JniCallingConvention::OutFrameSize() const {
+ // The size of outgoing arguments.
+ size_t size = GetNativeOutArgsSize(/*num_args=*/ NumberOfExtraArgumentsForJni() + NumArgs(),
+ NumLongOrDoubleArgs());
// @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
@@ -244,14 +243,16 @@
if (return_type_ok && size == kFramePointerSize) {
// Note: This is not aligned to kNativeStackAlignment but that's OK for tail call.
static_assert(kFramePointerSize < kNativeStackAlignment);
- DCHECK_EQ(kFramePointerSize, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ // The stub frame size is considered 0 in the callee where the return PC is a part of
+ // the callee frame but it is kPointerSize in the compiled stub before the tail call.
+ DCHECK_EQ(0u, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
return kFramePointerSize;
}
}
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -279,7 +280,8 @@
}
FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
- return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize));
+ return
+ FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize));
}
ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
@@ -295,7 +297,7 @@
bool X86JniCallingConvention::UseTailCall() const {
CHECK(IsCriticalNative());
- return OutArgSize() == kFramePointerSize;
+ return OutFrameSize() == kFramePointerSize;
}
} // namespace x86
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 6f22c2b..81f617d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -61,7 +61,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index e97cab8..44ae8be 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -208,21 +208,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t X86_64JniCallingConvention::OutArgSize() const {
+size_t X86_64JniCallingConvention::OutFrameSize() const {
// Count param args, including JNIEnv* and jclass*.
size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
size_t num_fp_args = NumFloatOrDoubleArgs();
DCHECK_GE(all_args, num_fp_args);
size_t num_non_fp_args = all_args - num_fp_args;
- // Account for FP arguments passed through Xmm0..Xmm7.
- size_t num_stack_fp_args =
- num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
- // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
- size_t num_stack_non_fp_args =
- num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
// The size of outgoing arguments.
- static_assert(kFramePointerSize == kMmxSpillSize);
- size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+ size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
if (UNLIKELY(IsCriticalNative())) {
// We always need to spill xmm12-xmm15 as they are managed callee-saves
@@ -239,7 +232,7 @@
size_t out_args_size = RoundUp(size, kNativeStackAlignment);
if (UNLIKELY(IsCriticalNative())) {
- DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+ DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
}
return out_args_size;
}
@@ -297,8 +290,8 @@
- std::min(kMaxIntLikeRegisterArguments,
static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
// Integer arguments passed through GPR
- size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
- CHECK_LT(offset, OutArgSize());
+ size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+ CHECK_LT(offset, OutFrameSize());
return FrameOffset(offset);
}
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index d043a3e..5bde766 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -56,7 +56,7 @@
ManagedRegister IntReturnRegister() override;
// JNI calling convention
size_t FrameSize() const override;
- size_t OutArgSize() const override;
+ size_t OutFrameSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;