JNI: Inline fast-path for `JniMethodStart()`.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticNormal 35.306 47.382 (+34.20%)
NativeDowncallStaticNormal6 32.951 42.247 (+28.21%)
NativeDowncallStaticNormalRefs6 17.866 41.355 (+131.5%)
NativeDowncallVirtualNormal 35.341 46.836 (+32.53%)
NativeDowncallVirtualNormal6 32.403 41.791 (+28.97%)
NativeDowncallVirtualNormalRefs6 32.131 40.500 (+26.05%)
linux-x64 before after
NativeDowncallStaticNormal 33.350 43.716 (+31.08%)
NativeDowncallStaticNormal6 31.096 43.176 (+38.85%)
NativeDowncallStaticNormalRefs6 30.617 38.500 (+25.75%)
NativeDowncallVirtualNormal 33.234 43.672 (+32.41%)
NativeDowncallVirtualNormal6 30.617 42.247 (+37.98%)
NativeDowncallVirtualNormalRefs6 32.131 42.701 (+32.90%)
linux-armv7 before after
NativeDowncallStaticNormal 7.8701 9.9651 (+26.62%)
NativeDowncallStaticNormal6 7.4147 8.9463 (+20.66%)
NativeDowncallStaticNormalRefs6 6.8830 8.3868 (+21.85%)
NativeDowncallVirtualNormal 7.8316 9.8377 (+25.61%)
NativeDowncallVirtualNormal6 7.4147 9.3596 (+26.23%)
NativeDowncallVirtualNormalRefs6 6.6794 8.4325 (+26.25%)
linux-armv8 before after
NativeDowncallStaticNormal 7.6372 9.8571 (+29.07%)
NativeDowncallStaticNormal6 7.4147 9.4905 (+28.00%)
NativeDowncallStaticNormalRefs6 6.8527 8.6705 (+26.53%)
NativeDowncallVirtualNormal 7.4147 9.3183 (+25.67%)
NativeDowncallVirtualNormal6 7.0755 9.2593 (+30.86%)
NativeDowncallVirtualNormalRefs6 6.5604 8.2967 (+26.47%)
Note that NativeDowncallStaticNormalRefs6 on x86 has been
jumping like crazy since
https://android-review.googlesource.com/1905055
between ~17.6 and ~32.4 for completely unrelated changes,
so if we take the 32.4 as a baseline, the improvement is
only ~27.6% in line with the other x86 benchmarks.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I771a4765bd3a7c4e58b94be4155515241ea6fa3c
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 4ba3aa1..fc92c30 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -589,6 +589,35 @@
__ movl(Address(ESP, offset), scratch);
}
+void X86JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kX86PointerSize>();
+ constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kX86PointerSize>(kMutatorLock);
+
+ // We need to preserve managed argument EAX.
+ DCHECK_GE(scratch_regs.size(), 2u);
+ Register saved_eax = scratch_regs[0].AsX86().AsCpuRegister();
+ Register scratch = scratch_regs[1].AsX86().AsCpuRegister();
+
+ // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ __ movl(saved_eax, EAX); // Save EAX.
+ static_assert(kRunnableStateValue == 0u);
+ __ xorl(EAX, EAX);
+ __ movl(scratch, Immediate(kNativeStateValue));
+ __ fs()->LockCmpxchgl(Address::Absolute(thread_flags_offset.Uint32Value()), scratch);
+ // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
+ __ movl(EAX, saved_eax); // Restore EAX; MOV does not change flags.
+ // If any flags are set, go to the slow path.
+ __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+ __ fs()->movl(Address::Absolute(thread_held_mutex_mutator_lock_offset.Uint32Value()),
+ Immediate(0));
+}
+
void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
__ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()),
Immediate(Thread::SuspendOrCheckpointRequestFlags()));