JNI: Inline fast-path for `JniMethodStart()`.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticNormal 35.306 47.382 (+34.20%)
NativeDowncallStaticNormal6 32.951 42.247 (+28.21%)
NativeDowncallStaticNormalRefs6 17.866 41.355 (+131.5%)
NativeDowncallVirtualNormal 35.341 46.836 (+32.53%)
NativeDowncallVirtualNormal6 32.403 41.791 (+28.97%)
NativeDowncallVirtualNormalRefs6 32.131 40.500 (+26.05%)
linux-x64 before after
NativeDowncallStaticNormal 33.350 43.716 (+31.08%)
NativeDowncallStaticNormal6 31.096 43.176 (+38.85%)
NativeDowncallStaticNormalRefs6 30.617 38.500 (+25.75%)
NativeDowncallVirtualNormal 33.234 43.672 (+32.41%)
NativeDowncallVirtualNormal6 30.617 42.247 (+37.98%)
NativeDowncallVirtualNormalRefs6 32.131 42.701 (+32.90%)
linux-armv7 before after
NativeDowncallStaticNormal 7.8701 9.9651 (+26.62%)
NativeDowncallStaticNormal6 7.4147 8.9463 (+20.66%)
NativeDowncallStaticNormalRefs6 6.8830 8.3868 (+21.85%)
NativeDowncallVirtualNormal 7.8316 9.8377 (+25.61%)
NativeDowncallVirtualNormal6 7.4147 9.3596 (+26.23%)
NativeDowncallVirtualNormalRefs6 6.6794 8.4325 (+26.25%)
linux-armv8 before after
NativeDowncallStaticNormal 7.6372 9.8571 (+29.07%)
NativeDowncallStaticNormal6 7.4147 9.4905 (+28.00%)
NativeDowncallStaticNormalRefs6 6.8527 8.6705 (+26.53%)
NativeDowncallVirtualNormal 7.4147 9.3183 (+25.67%)
NativeDowncallVirtualNormal6 7.0755 9.2593 (+30.86%)
NativeDowncallVirtualNormalRefs6 6.5604 8.2967 (+26.47%)
Note that NativeDowncallStaticNormalRefs6 on x86 has been
jumping like crazy since
https://android-review.googlesource.com/1905055
between ~17.6 and ~32.4 for completely unrelated changes,
so if we take the 32.4 as a baseline, the improvement is
only ~27.6% in line with the other x86 benchmarks.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I771a4765bd3a7c4e58b94be4155515241ea6fa3c
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index a222ff3..8de5c9c 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -27,6 +27,7 @@
#include "common_compiler_test.h"
#include "compiler.h"
#include "dex/dex_file.h"
+#include "entrypoints/entrypoint_utils-inl.h"
#include "gtest/gtest.h"
#include "indirect_reference_table.h"
#include "java_frame_root_info.h"
@@ -337,6 +338,8 @@
static jobject jobj_;
static jobject class_loader_;
+ static void AssertCallerObjectLocked(JNIEnv* env);
+
static LockWord GetLockWord(jobject obj);
protected:
@@ -391,53 +394,17 @@
jmethodID jmethod_;
private:
- // Helper class that overrides original entrypoints with alternative versions
- // that check that the object (`this` or class) is locked.
- class ScopedSynchronizedEntryPointOverrides {
- public:
- ScopedSynchronizedEntryPointOverrides() {
- QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints;
- jni_method_start_original_ = qpoints->pJniMethodStart;
- qpoints->pJniMethodStart = JniMethodStartSynchronizedOverride;
- jni_method_end_original_ = qpoints->pJniMethodEnd;
- qpoints->pJniMethodEnd = JniMethodEndSynchronizedOverride;
- jni_method_end_with_reference_original_ = qpoints->pJniMethodEndWithReference;
- qpoints->pJniMethodEndWithReference = JniMethodEndWithReferenceSynchronizedOverride;
- }
-
- ~ScopedSynchronizedEntryPointOverrides() {
- QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints;
- qpoints->pJniMethodStart = jni_method_start_original_;
- qpoints->pJniMethodEnd = jni_method_end_original_;
- qpoints->pJniMethodEndWithReference = jni_method_end_with_reference_original_;
- }
- };
-
- static void AssertCallerObjectLocked(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
- static void JniMethodStartSynchronizedOverride(Thread* self);
- static void JniMethodEndSynchronizedOverride(Thread* self);
- static mirror::Object* JniMethodEndWithReferenceSynchronizedOverride(
- jobject result, Thread* self);
-
- using JniStartType = void (*)(Thread*);
- using JniEndType = void (*)(Thread*);
- using JniEndWithReferenceType = mirror::Object* (*)(jobject, Thread*);
-
- static JniStartType jni_method_start_original_;
- static JniEndType jni_method_end_original_;
- static JniEndWithReferenceType jni_method_end_with_reference_original_;
-
bool check_generic_jni_;
};
jclass JniCompilerTest::jklass_;
jobject JniCompilerTest::jobj_;
jobject JniCompilerTest::class_loader_;
-JniCompilerTest::JniStartType JniCompilerTest::jni_method_start_original_;
-JniCompilerTest::JniEndType JniCompilerTest::jni_method_end_original_;
-JniCompilerTest::JniEndWithReferenceType JniCompilerTest::jni_method_end_with_reference_original_;
-void JniCompilerTest::AssertCallerObjectLocked(Thread* self) {
+void JniCompilerTest::AssertCallerObjectLocked(JNIEnv* env) {
+ Thread* self = down_cast<JNIEnvExt*>(env)->GetSelf();
+ CHECK_EQ(self, Thread::Current());
+ ScopedObjectAccess soa(self);
ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame();
CHECK(caller_frame != nullptr);
ArtMethod* caller = *caller_frame;
@@ -447,7 +414,10 @@
CHECK(!caller->IsCriticalNative());
CHECK(caller->IsSynchronized());
ObjPtr<mirror::Object> lock;
- if (caller->IsStatic()) {
+ if (self->GetManagedStack()->GetTopQuickFrameTag()) {
+ // Generic JNI.
+ lock = GetGenericJniSynchronizationObject(self, caller);
+ } else if (caller->IsStatic()) {
lock = caller->GetDeclaringClass();
} else {
uint8_t* sp = reinterpret_cast<uint8_t*>(caller_frame);
@@ -461,23 +431,6 @@
CHECK_EQ(Monitor::GetLockOwnerThreadId(lock), self->GetThreadId());
}
-void JniCompilerTest::JniMethodStartSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
- AssertCallerObjectLocked(self);
- jni_method_start_original_(self);
-}
-
-void JniCompilerTest::JniMethodEndSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
- jni_method_end_original_(self);
- AssertCallerObjectLocked(self);
-}
-
-mirror::Object* JniCompilerTest::JniMethodEndWithReferenceSynchronizedOverride(
- jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS {
- mirror::Object* raw_result = jni_method_end_with_reference_original_(result, self);
- AssertCallerObjectLocked(self);
- return raw_result;
-}
-
LockWord JniCompilerTest::GetLockWord(jobject obj) {
ScopedObjectAccess soa(Thread::Current());
return soa.Decode<mirror::Object>(obj)->GetLockWord(/*as_volatile=*/ false);
@@ -886,7 +839,8 @@
}
int gJava_MyClassNatives_fooJJ_synchronized_calls[kJniKindCount] = {};
-jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv*, jobject, jlong x, jlong y) {
+jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject, jlong x, jlong y) {
+ JniCompilerTest::AssertCallerObjectLocked(env);
gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]++;
return x | y;
}
@@ -894,7 +848,6 @@
void JniCompilerTest::CompileAndRun_fooJJ_synchronizedImpl() {
SetUpForTest(false, "fooJJ_synchronized", "(JJ)J",
CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooJJ_synchronized));
- ScopedSynchronizedEntryPointOverrides ssepo;
EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]);
jlong a = 0x1000000020000000ULL;
@@ -1220,7 +1173,8 @@
JNI_TEST(CompileAndRunStaticIntObjectObjectMethod)
int gJava_MyClassNatives_fooSSIOO_calls[kJniKindCount] = {};
-jobject Java_MyClassNatives_fooSSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) {
+jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y, jobject z) {
+ JniCompilerTest::AssertCallerObjectLocked(env);
gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]++;
switch (x) {
case 1:
@@ -1236,7 +1190,6 @@
SetUpForTest(true, "fooSSIOO",
"(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooSSIOO));
- ScopedSynchronizedEntryPointOverrides ssepo;
EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
@@ -1505,7 +1458,8 @@
JNI_TEST(GetText)
int gJava_MyClassNatives_GetSinkProperties_calls[kJniKindCount] = {};
-jarray Java_MyClassNatives_GetSinkProperties(JNIEnv*, jobject thisObj, jstring s) {
+jarray Java_MyClassNatives_GetSinkProperties(JNIEnv* env, jobject thisObj, jstring s) {
+ JniCompilerTest::AssertCallerObjectLocked(env);
EXPECT_EQ(s, nullptr);
gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni]++;
@@ -1518,7 +1472,6 @@
void JniCompilerTest::GetSinkPropertiesNativeImpl() {
SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;",
CURRENT_JNI_WRAPPER(Java_MyClassNatives_GetSinkProperties));
- ScopedSynchronizedEntryPointOverrides ssepo;
EXPECT_EQ(0, gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni]);
jarray result = down_cast<jarray>(
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 40110d7..bc1c842 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -74,28 +74,17 @@
return JNIMacroAssembler<kPointerSize>::Create(allocator, isa, features);
}
-enum class JniEntrypoint {
- kStart,
- kEnd
-};
-
template <PointerSize kPointerSize>
-static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which,
- bool reference_return) {
- if (which == JniEntrypoint::kStart) { // JniMethodStart
- ThreadOffset<kPointerSize> jni_start = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
- return jni_start;
- } else { // JniMethodEnd
- ThreadOffset<kPointerSize> jni_end(-1);
- if (reference_return) {
- // Pass result.
- jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
- } else {
- jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
- }
-
- return jni_end;
+static ThreadOffset<kPointerSize> GetJniMethodEndThreadOffset(bool reference_return) {
+ ThreadOffset<kPointerSize> jni_end(-1);
+ if (reference_return) {
+ // Pass result.
+ jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
+ } else {
+ jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
}
+
+ return jni_end;
}
@@ -249,7 +238,7 @@
__ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
}
- // 2. Lock the object (if synchronized) and transition out of runnable (if normal native).
+ // 2. Lock the object (if synchronized) and transition out of Runnable (if normal native).
// 2.1. Lock the synchronization object (`this` or class) for synchronized methods.
if (UNLIKELY(is_synchronized)) {
@@ -273,92 +262,19 @@
__ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniLockObject));
}
- // 2.2. Move frame down to allow space for out going args.
- // This prepares for both the `JniMethodStart()` call as well as the main native call.
- size_t current_out_arg_size = main_out_arg_size;
- if (UNLIKELY(is_critical_native)) {
- DCHECK_EQ(main_out_arg_size, current_frame_size);
- } else {
- __ IncreaseFrameSize(main_out_arg_size);
- current_frame_size += main_out_arg_size;
- }
-
- // 2.3. Spill all register arguments to preserve them across the `JniLockObject()`
- // call (if synchronized) and `JniMethodStart()` call (if normal native).
- // Native stack arguments are spilled directly to their argument stack slots and
- // references are converted to `jobject`. Native register arguments are spilled to
- // the reserved slots in the caller frame, references are not converted to `jobject`;
- // references from registers are actually skipped as they were already spilled above.
- // TODO: Implement fast-path for transition to Native and avoid this spilling.
- src_args.clear();
- dest_args.clear();
- refs.clear();
+ // 2.2. Transition from Runnable to Suspended.
+ // Managed callee-saves were already saved, so these registers are now available.
+ ArrayRef<const ManagedRegister> callee_save_scratch_regs = UNLIKELY(is_critical_native)
+ ? ArrayRef<const ManagedRegister>()
+ : main_jni_conv->CalleeSaveScratchRegisters();
+ std::unique_ptr<JNIMacroLabel> transition_to_native_slow_path;
+ std::unique_ptr<JNIMacroLabel> transition_to_native_resume;
if (LIKELY(!is_critical_native && !is_fast_native)) {
- mr_conv->ResetIterator(FrameOffset(current_frame_size));
- main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- main_jni_conv->Next(); // Skip JNIEnv*.
- // Add a no-op move for the `jclass` / `this` argument to avoid the
- // next argument being treated as non-null if it's a reference.
- // Note: We have already spilled `this` as raw reference above. Since `this`
- // cannot be null, the argument move before the native call does not need
- // to reload the reference, and that argument move also needs to see the
- // `this` argument to avoid treating another reference as non-null.
- // Note: Using the method register for the no-op move even for `this`.
- src_args.emplace_back(method_register, kRawPointerSize);
- dest_args.emplace_back(method_register, kRawPointerSize);
- refs.push_back(kInvalidReferenceOffset);
- if (is_static) {
- main_jni_conv->Next(); // Skip `jclass`.
- } else {
- // Skip `this`
- DCHECK(mr_conv->HasNext());
- DCHECK(main_jni_conv->HasNext());
- DCHECK(mr_conv->IsCurrentParamAReference());
- mr_conv->Next();
- main_jni_conv->Next();
- }
- for (; mr_conv->HasNext(); mr_conv->Next(), main_jni_conv->Next()) {
- DCHECK(main_jni_conv->HasNext());
- static_assert(kObjectReferenceSize == 4u);
- bool is_reference = mr_conv->IsCurrentParamAReference();
- bool src_in_reg = mr_conv->IsCurrentParamInRegister();
- bool dest_in_reg = main_jni_conv->IsCurrentParamInRegister();
- if (is_reference && src_in_reg && dest_in_reg) {
- // We have already spilled the raw reference above.
- continue;
- }
- bool spill_jobject = is_reference && !dest_in_reg;
- size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
- size_t dest_size = spill_jobject ? kRawPointerSize : src_size;
- src_args.push_back(src_in_reg
- ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
- : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
- dest_args.push_back(dest_in_reg
- ? ArgumentLocation(mr_conv->CurrentParamStackOffset(), dest_size)
- : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size));
- refs.push_back(spill_jobject ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset);
- }
- __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args),
- ArrayRef<ArgumentLocation>(src_args),
- ArrayRef<FrameOffset>(refs));
- } // if (!is_critical_native)
-
- // 2.4. Call into `JniMethodStart()` passing Thread* so that transition out of Runnable
- // can occur. We abuse the JNI calling convention here, that is guaranteed to support
- // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, and we use just one.
- if (LIKELY(!is_critical_native && !is_fast_native)) {
- // Skip this for @CriticalNative and @FastNative methods. They do not call JniMethodStart.
- ThreadOffset<kPointerSize> jni_start =
- GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, reference_return);
- main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- if (main_jni_conv->IsCurrentParamInRegister()) {
- __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
- __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start));
- } else {
- __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
- __ CallFromThread(jni_start);
- }
- method_register = ManagedRegister::NoRegister(); // Method register is clobbered by the call.
+ transition_to_native_slow_path = __ CreateLabel();
+ transition_to_native_resume = __ CreateLabel();
+ __ TryToTransitionFromRunnableToNative(transition_to_native_slow_path.get(),
+ callee_save_scratch_regs);
+ __ Bind(transition_to_native_resume.get());
}
// 3. Push local reference frame.
@@ -369,9 +285,6 @@
if (LIKELY(!is_critical_native)) {
// To pop the local reference frame later, we shall need the JNI environment pointer
// as well as the cookie, so we preserve them across calls in callee-save registers.
- // Managed callee-saves were already saved, so these registers are now available.
- ArrayRef<const ManagedRegister> callee_save_scratch_regs =
- main_jni_conv->CalleeSaveScratchRegisters();
CHECK_GE(callee_save_scratch_regs.size(), 3u); // At least 3 for each supported architecture.
jni_env_reg = callee_save_scratch_regs[0];
saved_cookie_reg = __ CoreRegisterWithSize(callee_save_scratch_regs[1], kIRTCookieSize);
@@ -387,7 +300,19 @@
// 4. Make the main native call.
- // 4.1. Fill arguments except the `JNIEnv*`.
+ // 4.1. Move frame down to allow space for out going args.
+ size_t current_out_arg_size = main_out_arg_size;
+ if (UNLIKELY(is_critical_native)) {
+ DCHECK_EQ(main_out_arg_size, current_frame_size);
+ } else {
+ __ IncreaseFrameSize(main_out_arg_size);
+ current_frame_size += main_out_arg_size;
+ }
+
+ // 4.2. Fill arguments except the `JNIEnv*`.
+ // Note: Non-null reference arguments in registers may point to the from-space if we
+ // took the slow-path for locking or transition to Native. However, we only need to
+ // compare them with null to construct `jobject`s, so we can still use them.
src_args.clear();
dest_args.clear();
refs.clear();
@@ -406,16 +331,10 @@
if (!is_static || main_jni_conv->IsCurrentParamOnStack()) {
// The method shall not be available in the `jclass` argument register.
// Make sure it is available in `callee_save_temp` for the call below.
- // (For @FastNative, the old method register can be clobbered by argument moves.
- // For normal native, it was already clobbered by the `JniMethodStart*()` call.)
+ // (The old method register can be clobbered by argument moves.)
ManagedRegister new_method_reg = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
- if (UNLIKELY(is_fast_native)) {
- DCHECK(!method_register.IsNoRegister());
- __ Move(new_method_reg, method_register, kRawPointerSize);
- } else {
- DCHECK(method_register.IsNoRegister());
- __ Load(new_method_reg, method_offset, kRawPointerSize);
- }
+ DCHECK(!method_register.IsNoRegister());
+ __ Move(new_method_reg, method_register, kRawPointerSize);
method_register = new_method_reg;
}
if (is_static) {
@@ -436,41 +355,19 @@
}
refs.push_back(kInvalidReferenceOffset);
main_jni_conv->Next();
- } else {
- // The `this` argument for instance methods is passed first, so that `MoveArguments()`
- // treats it as non-null. It has not been converted to `jobject` yet, not even for normal
- // native methods on architectures where this argument is passed on the stack (x86).
- DCHECK(mr_conv->HasNext());
- DCHECK(main_jni_conv->HasNext());
- DCHECK(mr_conv->IsCurrentParamAReference());
- src_args.push_back(UNLIKELY(is_fast_native) && mr_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(mr_conv->CurrentParamRegister(), kObjectReferenceSize)
- : ArgumentLocation(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize));
- dest_args.push_back(main_jni_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(main_jni_conv->CurrentParamRegister(), kRawPointerSize)
- : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), kRawPointerSize));
- refs.push_back(mr_conv->CurrentParamStackOffset());
- mr_conv->Next();
- main_jni_conv->Next();
}
}
// Move normal arguments to their locations.
for (; mr_conv->HasNext(); mr_conv->Next(), main_jni_conv->Next()) {
DCHECK(main_jni_conv->HasNext());
- bool dest_in_reg = main_jni_conv->IsCurrentParamInRegister();
- if (LIKELY(!is_critical_native && !is_fast_native) && !dest_in_reg) {
- // Stack arguments for normal native have already been filled.
- continue;
- }
static_assert(kObjectReferenceSize == 4u);
bool is_reference = mr_conv->IsCurrentParamAReference();
size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
size_t dest_size = is_reference ? kRawPointerSize : src_size;
- src_args.push_back(
- UNLIKELY(is_critical_native || is_fast_native) && mr_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
- : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
- dest_args.push_back(dest_in_reg
+ src_args.push_back(mr_conv->IsCurrentParamInRegister()
+ ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
+ : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
+ dest_args.push_back(main_jni_conv->IsCurrentParamInRegister()
? ArgumentLocation(main_jni_conv->CurrentParamRegister(), dest_size)
: ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size));
refs.push_back(is_reference ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset);
@@ -480,7 +377,7 @@
ArrayRef<ArgumentLocation>(src_args),
ArrayRef<FrameOffset>(refs));
- // 4.2. Create 1st argument, the JNI environment ptr.
+ // 4.3. Create 1st argument, the JNI environment ptr.
if (LIKELY(!is_critical_native)) {
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
if (main_jni_conv->IsCurrentParamInRegister()) {
@@ -492,7 +389,7 @@
}
}
- // 4.3. Plant call to native code associated with method.
+ // 4.4. Plant call to native code associated with method.
MemberOffset jni_entrypoint_offset =
ArtMethod::EntryPointFromJniOffset(InstructionSetPointerSize(instruction_set));
if (UNLIKELY(is_critical_native)) {
@@ -509,7 +406,7 @@
method_register = ManagedRegister::NoRegister();
}
- // 4.4. Fix differences in result widths.
+ // 4.5. Fix differences in result widths.
if (main_jni_conv->RequiresSmallResultTypeExtension()) {
DCHECK(main_jni_conv->HasSmallReturnType());
CHECK(!is_critical_native || !main_jni_conv->UseTailCall());
@@ -591,7 +488,7 @@
if (LIKELY(!is_fast_native) || reference_return) {
ThreadOffset<kPointerSize> jni_end = is_fast_native
? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult)
- : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, reference_return);
+ : GetJniMethodEndThreadOffset<kPointerSize>(reference_return);
if (reference_return) {
// Pass result.
SetNativeParameter(jni_asm.get(), main_jni_conv.get(), main_jni_conv->ReturnRegister());
@@ -709,7 +606,14 @@
__ Jump(jclass_read_barrier_return.get());
}
- // 8.2. Suspend check slow path.
+ // 8.2. Slow path for transition to Native.
+ if (LIKELY(!is_critical_native && !is_fast_native)) {
+ __ Bind(transition_to_native_slow_path.get());
+ __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+ __ Jump(transition_to_native_resume.get());
+ }
+
+ // 8.3. Suspend check slow path.
if (UNLIKELY(is_fast_native)) {
__ Bind(suspend_check_slow_path.get());
if (reference_return && main_out_arg_size != 0) {
@@ -729,7 +633,7 @@
__ Jump(suspend_check_resume.get());
}
- // 8.3. Exception poll slow path(s).
+ // 8.4. Exception poll slow path(s).
if (LIKELY(!is_critical_native)) {
__ Bind(exception_slow_path.get());
if (UNLIKELY(is_fast_native) && reference_return) {
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 3d45abd..b06f428 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -1050,6 +1050,35 @@
asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value());
}
+void ArmVIXLJNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kArmPointerSize>();
+ constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kArmPointerSize>(kMutatorLock);
+
+ DCHECK_GE(scratch_regs.size(), 2u);
+ vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
+ vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
+
+ // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ vixl32::Label retry;
+ ___ Bind(&retry);
+ ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
+ ___ Mov(scratch2, kNativeStateValue);
+ // If any flags are set, go to the slow path.
+ ___ Cmp(scratch, kRunnableStateValue);
+ ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+ ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
+ ___ Cmp(scratch, 0);
+ ___ B(ne, &retry);
+ ___ Dmb(DmbOptions::ISH); // Memory barrier "load-any" for the "acquire" operation.
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`; `scratch` holds 0 at this point.
+ ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
+}
+
void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
vixl32::Register scratch = temps.Acquire();
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 49f5e7c..7b9d7de 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -184,6 +184,10 @@
void Call(FrameOffset base, Offset offset) override;
void CallFromThread(ThreadOffset32 offset) override;
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ void TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) override;
+
// Generate suspend check and branch to `label` if there is a pending suspend request.
void SuspendCheck(JNIMacroLabel* label) override;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index a505db0..8ae1d04 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -889,6 +889,34 @@
___ Str(scratch, MEM_OP(reg_x(SP), out_off.Int32Value()));
}
+void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>();
+ constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kArm64PointerSize>(kMutatorLock);
+
+ UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+ Register scratch = temps.AcquireW();
+ Register scratch2 = temps.AcquireW();
+
+ // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ vixl::aarch64::Label retry;
+ ___ Bind(&retry);
+ static_assert(thread_flags_offset.Int32Value() == 0); // LDAXR/STXR require exact address.
+ ___ Ldaxr(scratch, MEM_OP(reg_x(TR)));
+ ___ Mov(scratch2, kNativeStateValue);
+ // If any flags are set, go to the slow path.
+ static_assert(kRunnableStateValue == 0u);
+ ___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
+ ___ Stxr(scratch, scratch2, MEM_OP(reg_x(TR)));
+ ___ Cbnz(scratch, &retry);
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+ ___ Str(xzr, MEM_OP(reg_x(TR), thread_held_mutex_mutator_lock_offset.Int32Value()));
+}
+
void Arm64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register scratch = temps.AcquireW();
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index b6e31c2..1c61d96 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -168,6 +168,10 @@
void Call(FrameOffset base, Offset offset) override;
void CallFromThread(ThreadOffset64 offset) override;
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ void TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) override;
+
// Generate suspend check and branch to `label` if there is a pending suspend request.
void SuspendCheck(JNIMacroLabel* label) override;
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index abb53b7..659ff4c 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -251,6 +251,10 @@
virtual void Call(FrameOffset base, Offset offset) = 0;
virtual void CallFromThread(ThreadOffset<kPointerSize> offset) = 0;
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ virtual void TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) = 0;
+
// Generate suspend check and branch to `label` if there is a pending suspend request.
virtual void SuspendCheck(JNIMacroLabel* label) = 0;
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 4ba3aa1..fc92c30 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -589,6 +589,35 @@
__ movl(Address(ESP, offset), scratch);
}
+void X86JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset32 thread_flags_offset = Thread::ThreadFlagsOffset<kX86PointerSize>();
+ constexpr ThreadOffset32 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kX86PointerSize>(kMutatorLock);
+
+ // We need to preserve managed argument EAX.
+ DCHECK_GE(scratch_regs.size(), 2u);
+ Register saved_eax = scratch_regs[0].AsX86().AsCpuRegister();
+ Register scratch = scratch_regs[1].AsX86().AsCpuRegister();
+
+ // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ __ movl(saved_eax, EAX); // Save EAX.
+ static_assert(kRunnableStateValue == 0u);
+ __ xorl(EAX, EAX);
+ __ movl(scratch, Immediate(kNativeStateValue));
+ __ fs()->LockCmpxchgl(Address::Absolute(thread_flags_offset.Uint32Value()), scratch);
+ // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
+ __ movl(EAX, saved_eax); // Restore EAX; MOV does not change flags.
+ // If any flags are set, go to the slow path.
+ __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+ __ fs()->movl(Address::Absolute(thread_held_mutex_mutator_lock_offset.Uint32Value()),
+ Immediate(0));
+}
+
void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
__ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()),
Immediate(Thread::SuspendOrCheckpointRequestFlags()));
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 058e040..0af6371 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -160,6 +160,10 @@
void Call(FrameOffset base, Offset offset) override;
void CallFromThread(ThreadOffset32 offset) override;
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ void TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) override;
+
// Generate suspend check and branch to `label` if there is a pending suspend request.
void SuspendCheck(JNIMacroLabel* label) override;
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index de99e74..3ddb689 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -671,6 +671,33 @@
__ movq(Address(CpuRegister(RSP), offset), scratch);
}
+void X86_64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+ constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+ constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+ constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kX86_64PointerSize>();
+ constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+ Thread::HeldMutexOffset<kX86_64PointerSize>(kMutatorLock);
+
+ CpuRegister rax(RAX); // RAX can be freely clobbered. It does not hold any argument.
+ CpuRegister scratch = GetScratchRegister();
+
+ // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+ static_assert(kRunnableStateValue == 0u);
+ __ xorl(rax, rax);
+ __ movl(scratch, Immediate(kNativeStateValue));
+ __ gs()->LockCmpxchgl(Address::Absolute(thread_flags_offset.Uint32Value(), /*no_rip=*/ true),
+ scratch);
+ // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
+ // If any flags are set, go to the slow path.
+ __ j(kNotZero, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+
+ // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+ __ gs()->movq(
+ Address::Absolute(thread_held_mutex_mutator_lock_offset.Uint32Value(), /*no_rip=*/ true),
+ Immediate(0));
+}
+
void X86_64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
__ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
Immediate(Thread::SuspendOrCheckpointRequestFlags()));
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 3e5dfb7..6eb7873 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -180,6 +180,10 @@
void Call(FrameOffset base, Offset offset) override;
void CallFromThread(ThreadOffset64 offset) override;
+ // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+ void TryToTransitionFromRunnableToNative(
+ JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs) override;
+
// Generate suspend check and branch to `label` if there is a pending suspend request.
void SuspendCheck(JNIMacroLabel* label) override;