JNI: Inline fast-path for `JniMethodStart()`. Golem results for art-opt-cc (higher is better): linux-ia32 before after NativeDowncallStaticNormal 35.306 47.382 (+34.20%) NativeDowncallStaticNormal6 32.951 42.247 (+28.21%) NativeDowncallStaticNormalRefs6 17.866 41.355 (+131.5%) NativeDowncallVirtualNormal 35.341 46.836 (+32.53%) NativeDowncallVirtualNormal6 32.403 41.791 (+28.97%) NativeDowncallVirtualNormalRefs6 32.131 40.500 (+26.05%) linux-x64 before after NativeDowncallStaticNormal 33.350 43.716 (+31.08%) NativeDowncallStaticNormal6 31.096 43.176 (+38.85%) NativeDowncallStaticNormalRefs6 30.617 38.500 (+25.75%) NativeDowncallVirtualNormal 33.234 43.672 (+32.41%) NativeDowncallVirtualNormal6 30.617 42.247 (+37.98%) NativeDowncallVirtualNormalRefs6 32.131 42.701 (+32.90%) linux-armv7 before after NativeDowncallStaticNormal 7.8701 9.9651 (+26.62%) NativeDowncallStaticNormal6 7.4147 8.9463 (+20.66%) NativeDowncallStaticNormalRefs6 6.8830 8.3868 (+21.85%) NativeDowncallVirtualNormal 7.8316 9.8377 (+25.61%) NativeDowncallVirtualNormal6 7.4147 9.3596 (+26.23%) NativeDowncallVirtualNormalRefs6 6.6794 8.4325 (+26.25%) linux-armv8 before after NativeDowncallStaticNormal 7.6372 9.8571 (+29.07%) NativeDowncallStaticNormal6 7.4147 9.4905 (+28.00%) NativeDowncallStaticNormalRefs6 6.8527 8.6705 (+26.53%) NativeDowncallVirtualNormal 7.4147 9.3183 (+25.67%) NativeDowncallVirtualNormal6 7.0755 9.2593 (+30.86%) NativeDowncallVirtualNormalRefs6 6.5604 8.2967 (+26.47%) Note that NativeDowncallStaticNormalRefs6 on x86 has been jumping like crazy since https://android-review.googlesource.com/1905055 between ~17.6 and ~32.4 for completely unrelated changes, so if we take the 32.4 as a baseline, the improvement is only ~27.6% in line with the other x86 benchmarks. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Bug: 172332525 Change-Id: I771a4765bd3a7c4e58b94be4155515241ea6fa3c

commit: ce2a3445a4d777acea889056abe149650b332058 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Wed Nov 24 15:10:26 2021 +0000
committer: Vladimir Marko <vmarko@google.com> Tue Dec 07 12:01:50 2021 +0000
tree: 4f329335dd42e9039ac22021e4c664676d93ca51
parent: 76ec6f6562229033b2fecc08c01bef58a9488c92 [diff] [blame]
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 4ba3aa1..fc92c30 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc

@@ -589,6 +589,35 @@
   __ movl(Address(ESP, offset), scratch);
 }
 
+void X86JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+    JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
+  constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+  constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+  constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kX86PointerSize>();
+  constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
+      Thread::HeldMutexOffset<kX86PointerSize>(kMutatorLock);
+
+  // We need to preserve managed argument EAX.
+  DCHECK_GE(scratch_regs.size(), 2u);
+  Register saved_eax = scratch_regs[0].AsX86().AsCpuRegister();
+  Register scratch = scratch_regs[1].AsX86().AsCpuRegister();
+
+  // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  __ movl(saved_eax, EAX);  // Save EAX.
+  static_assert(kRunnableStateValue == 0u);
+  __ xorl(EAX, EAX);
+  __ movl(scratch, Immediate(kNativeStateValue));
+  __ fs()->LockCmpxchgl(Address::Absolute(thread_flags_offset.Uint32Value()), scratch);
+  // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
+  __ movl(EAX, saved_eax);  // Restore EAX; MOV does not change flags.
+  // If any flags are set, go to the slow path.
+  __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
+
+  // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+  __ fs()->movl(Address::Absolute(thread_held_mutex_mutator_lock_offset.Uint32Value()),
+                Immediate(0));
+}
+
 void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
   __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()),
                  Immediate(Thread::SuspendOrCheckpointRequestFlags()));
commit	ce2a3445a4d777acea889056abe149650b332058	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Wed Nov 24 15:10:26 2021 +0000
committer	Vladimir Marko <vmarko@google.com>	Tue Dec 07 12:01:50 2021 +0000
tree	4f329335dd42e9039ac22021e4c664676d93ca51
parent	76ec6f6562229033b2fecc08c01bef58a9488c92 [diff] [blame]