diff options
author | 2021-12-08 14:16:21 +0000 | |
---|---|---|
committer | 2021-12-14 09:57:41 +0000 | |
commit | e74e0ce0b5dfb4dae697d55bd64fb4819cc7ef61 (patch) | |
tree | 3ff14b821138b8dce7cd27c60d42fb7b3b35e09a /runtime | |
parent | b087bb226d65a6b1a62102694168b9cdc5b3f665 (diff) |
JNI: Inline fast-path for `JniMethodEnd()`.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticNormal 46.766 51.016 (+9.086%)
NativeDowncallStaticNormal6 42.268 45.748 (+8.235%)
NativeDowncallStaticNormalRefs6 41.355 44.776 (+8.272%)
NativeDowncallVirtualNormal 46.361 52.527 (+13.30%)
NativeDowncallVirtualNormal6 41.812 45.206 (+8.118%)
NativeDowncallVirtualNormalRefs6 40.500 44.169 (+9.059%)
(The NativeDowncallVirtualNormal result for x86 is skewed
by one extra good run as Golem reports the best result in
the summary. Using the second best and most frequent
result 50.5, the improvement is only around 8.9%.)
linux-x64 before after
NativeDowncallStaticNormal 44.169 47.976 (+8.620%)
NativeDowncallStaticNormal6 43.198 46.836 (+8.423%)
NativeDowncallStaticNormalRefs6 38.481 44.687 (+16.13%)
NativeDowncallVirtualNormal 43.672 47.405 (+8.547%)
NativeDowncallVirtualNormal6 42.268 45.726 (+8.182%)
NativeDowncallVirtualNormalRefs6 41.355 44.687 (+8.057%)
(The NativeDowncallStaticNormalRefs6 result for x86-64 is
a bit inflated because recent results jump between ~38.5
and ~40.5. If we take the latter as the baseline, the
improvements is only around 10.3%.)
linux-armv7 before after
NativeDowncallStaticNormal 10.659 14.620 (+37.16%)
NativeDowncallStaticNormal6 9.8377 13.120 (+33.36%)
NativeDowncallStaticNormalRefs6 8.8714 11.454 (+29.11%)
NativeDowncallVirtualNormal 10.511 14.349 (+36.51%)
NativeDowncallVirtualNormal6 9.9701 13.347 (+33.87%)
NativeDowncallVirtualNormalRefs6 8.9241 11.454 (+28.35%)
linux-armv8 before after
NativeDowncallStaticNormal 10.608 16.329 (+53.93%)
NativeDowncallStaticNormal6 10.179 15.347 (+50.76%)
NativeDowncallStaticNormalRefs6 9.2457 13.705 (+48.23%)
NativeDowncallVirtualNormal 9.9850 14.903 (+49.25%)
NativeDowncallVirtualNormal6 9.9206 14.757 (+48.75%)
NativeDowncallVirtualNormalRefs6 8.8235 12.789 (+44.94%)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: Ie144bc4f7f82be95790ea7d3123b81a3b6bfa603
Diffstat (limited to 'runtime')
-rw-r--r-- | runtime/arch/arm/jni_entrypoints_arm.S | 66 | ||||
-rw-r--r-- | runtime/arch/arm64/jni_entrypoints_arm64.S | 60 | ||||
-rw-r--r-- | runtime/arch/x86/jni_entrypoints_x86.S | 57 | ||||
-rw-r--r-- | runtime/arch/x86_64/jni_entrypoints_x86_64.S | 48 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_default_externs.h | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_default_init_entrypoints.h | 6 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_entrypoints.h | 28 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_entrypoints_list.h | 3 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_jni_entrypoints.cc | 72 | ||||
-rw-r--r-- | runtime/entrypoints_order_test.cc | 5 | ||||
-rw-r--r-- | runtime/oat.h | 4 | ||||
-rw-r--r-- | runtime/thread-inl.h | 6 | ||||
-rw-r--r-- | runtime/thread.cc | 1 | ||||
-rw-r--r-- | runtime/thread.h | 6 |
14 files changed, 203 insertions, 161 deletions
diff --git a/runtime/arch/arm/jni_entrypoints_arm.S b/runtime/arch/arm/jni_entrypoints_arm.S index d1431cfed2..b3d89f9f4f 100644 --- a/runtime/arch/arm/jni_entrypoints_arm.S +++ b/runtime/arch/arm/jni_entrypoints_arm.S @@ -50,12 +50,39 @@ ENTRY \name .ifnc \arg1, none mov r0, \arg1 @ Pass arg1. .endif - bl \cxx_name @ Call cxx_name(...). + bl \cxx_name @ Call cxx_name(...). // Restore args and R4 and return. RESTORE_MANAGED_ARGS_R4_AND_RETURN /*restore_cfa*/ 0 END \name .endm +.macro JNI_SAVE_RETURN_VALUE_TRAMPOLINE name, cxx_name, arg1, arg2 = "none", label = "none" + .extern \cxx_name +ENTRY \name + .ifnc \label, none + \label: + .endif + // Save GPR return registers and return address. Also save r4 for stack alignment. + push {r0-r1, r4, lr} + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset lr, 12 + // Save FPR return registers. + vpush {s0-s1} + .cfi_adjust_cfa_offset 8 + // Call `cxx_name()`. + mov r0, \arg1 @ Pass arg1. + .ifnc \arg2, none + mov r1, \arg2 @ Pass arg2. + .endif + bl \cxx_name @ Call cxx_name(...). + // Restore FPR return registers. + vpop {s0-s1} + .cfi_adjust_cfa_offset -8 + // Restore GPR return registers and r4 and return. + pop {r0-r1, r4, pc} +END \name +.endm + /* * Jni dlsym lookup stub. */ @@ -298,13 +325,22 @@ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_read_barrier, artJniReadBarrier */ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, rSELF - /* * Trampoline to `artJniMonitoredMethodStart()` that preserves all managed arguments. */ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_monitored_method_start, artJniMonitoredMethodStart, rSELF /* + * Trampoline to `artJniMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_method_end, artJniMethodEnd, rSELF + + /* + * Trampoline to `artJniMonitoredMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_monitored_method_end, artJniMonitoredMethodEnd, rSELF + + /* * Entry from JNI stub that tries to lock the object in a fast path and * calls `artLockObjectFromCode()` (the same as for managed code) for the * difficult cases, may block for GC. @@ -376,26 +412,8 @@ END art_jni_unlock_object * Callee-save registers have been saved and can be used as temporaries. * Return registers r0-r1 and s0-s1 need to be preserved. */ - .extern artJniUnlockObject -ENTRY art_jni_unlock_object_no_inline // This is also the slow path for art_jni_unlock_object. - // Note that we need a local label as the assembler emits bad instructions - // for CBZ/CBNZ if we try to jump to `art_jni_unlock_object_no_inline`. -.Lunlock_object_jni_slow: - // Save GPR return registers and return address. Also save r4 for stack alignment. - push {r0-r1, r4, lr} - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset lr, 12 - // Save FPR return registers. - vpush {s0-s1} - .cfi_adjust_cfa_offset 8 - // Call `artJniUnlockObject()`. - mov r0, r4 @ Pass the object to unlock. - mov r1, rSELF @ Pass Thread::Current(). - bl artJniUnlockObject @ (Object* obj, Thread*) - // Restore FPR return registers. - vpop {s0-s1} - .cfi_adjust_cfa_offset -8 - // Restore GPR return registers and r4 and return. - pop {r0-r1, r4, pc} -END art_jni_unlock_object_no_inline +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_unlock_object_no_inline, artJniUnlockObject, r4, rSELF, \ + /* Note that we need a local label as the assembler emits bad instructions */ \ + /* for CBZ/CBNZ if we try to jump to `art_jni_unlock_object_no_inline`. */ \ + .Lunlock_object_jni_slow diff --git a/runtime/arch/arm64/jni_entrypoints_arm64.S b/runtime/arch/arm64/jni_entrypoints_arm64.S index fa071fdfd5..8872362294 100644 --- a/runtime/arch/arm64/jni_entrypoints_arm64.S +++ b/runtime/arch/arm64/jni_entrypoints_arm64.S @@ -44,25 +44,48 @@ .endm .macro JNI_SAVE_MANAGED_ARGS_TRAMPOLINE name, cxx_name, arg1 = "none" - .extern cxx_name + .extern \cxx_name ENTRY \name // Save args and LR. SAVE_ALL_ARGS_INCREASE_FRAME /*padding*/ 8 + /*LR*/ 8 - str lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)] + str lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)] .cfi_rel_offset lr, ALL_ARGS_SIZE + /*padding*/ 8 // Call `cxx_name()`. .ifnc \arg1, none mov x0, \arg1 // Pass arg1. .endif - bl \cxx_name // Call cxx_name(...). + bl \cxx_name // Call cxx_name(...). // Restore LR and args and return. - ldr lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)] + ldr lr, [sp, #(ALL_ARGS_SIZE + /*padding*/ 8)] .cfi_restore lr RESTORE_ALL_ARGS_DECREASE_FRAME /*padding*/ 8 + /*LR*/ 8 ret END \name .endm +.macro JNI_SAVE_RETURN_VALUE_TRAMPOLINE name, cxx_name, arg1, arg2 = "none" + .extern \cxx_name +ENTRY \name + // Save return registers and return address. + stp x0, lr, [sp, #-32]! + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset lr, 8 + str d0, [sp, #16] + // Call `cxx_name()`. + mov x0, \arg1 // Pass arg1. + .ifnc \arg2, none + mov x1, \arg2 // Pass arg2. + .endif + bl \cxx_name // Call cxx_name(...). + // Restore return registers and return. + ldr d0, [sp, #16] + ldp x0, lr, [sp], #32 + .cfi_adjust_cfa_offset -32 + .cfi_restore lr + ret +END \name +.endm + /* * Jni dlsym lookup stub. */ @@ -349,6 +372,16 @@ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_method_start, artJniMethodStart, xSELF JNI_SAVE_MANAGED_ARGS_TRAMPOLINE art_jni_monitored_method_start, artJniMonitoredMethodStart, xSELF /* + * Trampoline to `artJniMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_method_end, artJniMethodEnd, xSELF + + /* + * Trampoline to `artJniMonitoredMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_monitored_method_end, artJniMonitoredMethodEnd, xSELF + + /* * Entry from JNI stub that tries to lock the object in a fast path and * calls `artLockObjectFromCode()` (the same as for managed code) for the * difficult cases, may block for GC. @@ -419,22 +452,5 @@ END art_jni_unlock_object * Callee-save registers have been saved and can be used as temporaries. * Return registers r0 and d0 need to be preserved. */ - .extern artJniUnlockObject -ENTRY art_jni_unlock_object_no_inline // This is also the slow path for art_jni_unlock_object. - // Save return registers and return address. - stp x0, lr, [sp, #-32]! - .cfi_adjust_cfa_offset 32 - .cfi_rel_offset lr, 8 - str d0, [sp, #16] - // Call `artJniUnlockObject()`. - mov x0, x15 // Pass the object to unlock. - mov x1, xSELF // Pass Thread::Current(). - bl artJniUnlockObject // (Object* obj, Thread*) - // Restore return registers and return. - ldr d0, [sp, #16] - ldp x0, lr, [sp], #32 - .cfi_adjust_cfa_offset -32 - .cfi_restore lr - ret -END art_jni_unlock_object_no_inline +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_unlock_object_no_inline, artJniUnlockObject, x15, xSELF diff --git a/runtime/arch/x86/jni_entrypoints_x86.S b/runtime/arch/x86/jni_entrypoints_x86.S index d1d0f41216..d82750973d 100644 --- a/runtime/arch/x86/jni_entrypoints_x86.S +++ b/runtime/arch/x86/jni_entrypoints_x86.S @@ -62,6 +62,31 @@ DEFINE_FUNCTION \name END_FUNCTION \name END_MACRO +MACRO4(JNI_SAVE_RETURN_VALUE_TRAMPOLINE, name, cxx_name, arg1, arg2) +DEFINE_FUNCTION \name + // Save return registers. + PUSH_ARG edx + PUSH_ARG eax + .ifnc \arg2, none + INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4 + movsd %xmm0, 0(%esp) + PUSH_ARG RAW_VAR(arg2) // Pass arg2. + .else + INCREASE_FRAME /*padding*/ 4 + /*mmx0*/ 8 + /*padding*/ 4 + movsd %xmm0, 4(%esp) + .endif + // Call `cxx_name()`. + PUSH_ARG RAW_VAR(arg1) // Pass arg1. + call CALLVAR(cxx_name) // Call cxx_name(...). + // Restore return registers and return. + movsd 8(%esp), %xmm0 + DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4 + POP_ARG eax + POP_ARG edx + ret +END_FUNCTION \name +END_MACRO + /* * Jni dlsym lookup stub. */ @@ -267,6 +292,17 @@ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \ art_jni_monitored_method_start, artJniMonitoredMethodStart, fs:THREAD_SELF_OFFSET /* + * Trampoline to `artJniMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_method_end, artJniMethodEnd, fs:THREAD_SELF_OFFSET, none + + /* + * Trampoline to `artJniMonitoredMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE \ + art_jni_monitored_method_end, artJniMonitoredMethodEnd, fs:THREAD_SELF_OFFSET, none + + /* * Entry from JNI stub that tries to lock the object in a fast path and * calls `artLockObjectFromCode()` (the same as for managed code) for the * difficult cases, may block for GC. @@ -346,23 +382,6 @@ END_FUNCTION art_jni_unlock_object * Callee-save registers have been saved and can be used as temporaries (except EBP). * Return registers EAX, EDX and mmx0 need to be preserved. */ -DEFINE_FUNCTION art_jni_unlock_object_no_inline // This is also the slow path for art_jni_unlock_object. - // Save return registers. - PUSH_ARG edx - PUSH_ARG eax - INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4 - movsd %xmm0, 0(%esp) - // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call. - // Call `artJniUnlockObject()`. - pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). - CFI_ADJUST_CFA_OFFSET(4) - PUSH_ARG ebp // Pass the object to unlock. - call SYMBOL(artJniUnlockObject) // (object, Thread*) - // Restore return registers and return. - movsd 8(%esp), %xmm0 - DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4 - POP_ARG eax - POP_ARG edx - ret -END_FUNCTION art_jni_unlock_object_no_inline +JNI_SAVE_RETURN_VALUE_TRAMPOLINE \ + art_jni_unlock_object_no_inline, artJniUnlockObject, ebp, fs:THREAD_SELF_OFFSET diff --git a/runtime/arch/x86_64/jni_entrypoints_x86_64.S b/runtime/arch/x86_64/jni_entrypoints_x86_64.S index b5d3bd13f4..0d5fa3f3e0 100644 --- a/runtime/arch/x86_64/jni_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/jni_entrypoints_x86_64.S @@ -71,6 +71,26 @@ DEFINE_FUNCTION \name END_FUNCTION \name END_MACRO +MACRO4(JNI_SAVE_RETURN_VALUE_TRAMPOLINE, name, cxx_name, arg1, arg2) +DEFINE_FUNCTION \name + // Save return registers and return address. + PUSH_ARG rax + INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 + movsd %xmm0, 0(%rsp) + // Call `cxx_name()`. + mov REG_VAR(arg1), %rdi // Pass arg1. + .ifnc \arg2, none + mov REG_VAR(arg2), %rsi // Pass arg2. + .endif + call CALLVAR(cxx_name) // Call cxx_name(...). + // Restore return registers and return. + movsd 0(%rsp), %xmm0 + DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 + POP_ARG rax + ret +END_FUNCTION \name +END_MACRO + /* * Jni dlsym lookup stub. */ @@ -386,6 +406,17 @@ JNI_SAVE_MANAGED_ARGS_TRAMPOLINE \ art_jni_monitored_method_start, artJniMonitoredMethodStart, gs:THREAD_SELF_OFFSET /* + * Trampoline to `artJniMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE art_jni_method_end, artJniMethodEnd, gs:THREAD_SELF_OFFSET, none + + /* + * Trampoline to `artJniMonitoredMethodEnd()` that preserves all return registers. + */ +JNI_SAVE_RETURN_VALUE_TRAMPOLINE \ + art_jni_monitored_method_end, artJniMonitoredMethodEnd, gs:THREAD_SELF_OFFSET, none + + /* * Entry from JNI stub that tries to lock the object in a fast path and * calls `artLockObjectFromCode()` (the same as for managed code) for the * difficult cases, may block for GC. @@ -455,19 +486,6 @@ END_FUNCTION art_jni_unlock_object * Callee-save registers have been saved and can be used as temporaries (except RBX). * Return registers RAX and mmx0 need to be preserved. */ -DEFINE_FUNCTION art_jni_unlock_object_no_inline // This is also the slow path for art_jni_unlock_object. - // Save return registers and return address. - PUSH_ARG rax - INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 - movsd %xmm0, 0(%rsp) - // Call `artJniUnlockObject()`. - movq %rbx, %rdi // Pass the object to unlock. - movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current(). - call SYMBOL(artJniUnlockObject) // (object, Thread*) - // Restore return registers and return. - movsd 0(%rsp), %xmm0 - DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 - POP_ARG rax - ret -END_FUNCTION art_jni_unlock_object_no_inline +JNI_SAVE_RETURN_VALUE_TRAMPOLINE \ + art_jni_unlock_object_no_inline, artJniUnlockObject, rbx, gs:THREAD_SELF_OFFSET diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h index b3c6c021d3..f8856d82b9 100644 --- a/runtime/entrypoints/quick/quick_default_externs.h +++ b/runtime/entrypoints/quick/quick_default_externs.h @@ -120,6 +120,8 @@ extern "C" void art_jni_read_barrier(art::ArtMethod* method); // JNI method start entrypoint. Note: Custom calling convention. extern "C" void art_jni_method_start(); extern "C" void art_jni_monitored_method_start(); +extern "C" void art_jni_method_end(); +extern "C" void art_jni_monitored_method_end(); // JNI lock/unlock entrypoints. Note: Custom calling convention. extern "C" void art_jni_lock_object(art::mirror::Object*); diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 6f3c8d0ae5..eec7ca9b0c 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -75,8 +75,7 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, // JNI qpoints->pJniMethodStart = art_jni_method_start; - qpoints->pJniMethodEnd = JniMethodEnd; - qpoints->pJniMethodEndWithReference = JniMethodEndWithReference; + qpoints->pJniMethodEnd = art_jni_method_end; qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline; qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult; qpoints->pJniReadBarrier = art_jni_read_barrier; @@ -138,8 +137,7 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, if (monitor_jni_entry_exit) { qpoints->pJniMethodStart = art_jni_monitored_method_start; - qpoints->pJniMethodEnd = JniMonitoredMethodEnd; - qpoints->pJniMethodEndWithReference = JniMonitoredMethodEndWithReference; + qpoints->pJniMethodEnd = art_jni_monitored_method_end; } } diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 4580cfb293..2b9f2f327e 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -53,33 +53,25 @@ struct PACKED(4) QuickEntryPoints { // JNI entrypoints. -// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. -extern "C" void artJniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMethodEnd(Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern "C" void artJniMethodStart(Thread* self) UNLOCK_FUNCTION(Locks::mutator_lock_) HOT_ATTR; +extern "C" void artJniMethodEnd(Thread* self) SHARED_LOCK_FUNCTION(Locks::mutator_lock_) HOT_ATTR; extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; + REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; +extern "C" void artJniReadBarrier(ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; +extern "C" void artJniUnlockObject(mirror::Object* locked, Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; // JNI entrypoints when monitoring entry/exit. -extern "C" void artJniMonitoredMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMonitoredMethodEnd(Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; - +extern "C" void artJniMonitoredMethodStart(Thread* self) UNLOCK_FUNCTION(Locks::mutator_lock_); +extern "C" void artJniMonitoredMethodEnd(Thread* self) SHARED_LOCK_FUNCTION(Locks::mutator_lock_); +// StringAppend pattern entrypoint. extern "C" mirror::String* artStringBuilderAppend(uint32_t format, const uint32_t* args, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; -extern "C" void artJniReadBarrier(ArtMethod* method) - REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; -extern "C" void artJniUnlockObject(mirror::Object* locked, Thread* self) - REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; - // Read barrier entrypoints. // // Compilers for ARM, ARM64 can insert a call to these diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index b89ff2c698..dffaa4bb25 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -73,8 +73,7 @@ V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \ \ V(JniMethodStart, void) \ - V(JniMethodEnd, void, Thread*) \ - V(JniMethodEndWithReference, mirror::Object*, jobject, Thread*) \ + V(JniMethodEnd, void) \ V(JniDecodeReferenceResult, mirror::Object*, jobject, Thread*) \ V(JniLockObject, void, mirror::Object*) \ V(JniUnlockObject, void, mirror::Object*) \ diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index 205dd22b27..ab13bd95b1 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -63,22 +63,13 @@ extern "C" void artJniMethodStart(Thread* self) { if (kIsDebugBuild) { ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame(); CHECK(!native_method->IsFastNative()) << native_method->PrettyMethod(); + CHECK(!native_method->IsCriticalNative()) << native_method->PrettyMethod(); } // Transition out of runnable. self->TransitionFromRunnableToSuspended(ThreadState::kNative); } -// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. -static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS { - if (kIsDebugBuild) { - ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame(); - CHECK(!native_method->IsFastNative()) << native_method->PrettyMethod(); - } - - self->TransitionFromSuspendedToRunnable(); -} - static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { JNIEnvExt* env = self->GetJniEnv(); @@ -123,8 +114,14 @@ extern "C" void artJniUnlockObject(mirror::Object* locked, Thread* self) // TODO: These should probably be templatized or macro-ized. // Otherwise there's just too much repetitive boilerplate. -extern void JniMethodEnd(Thread* self) { - GoToRunnable(self); +extern "C" void artJniMethodEnd(Thread* self) { + self->TransitionFromSuspendedToRunnable(); + + if (kIsDebugBuild) { + ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame(); + CHECK(!native_method->IsFastNative()) << native_method->PrettyMethod(); + CHECK(!native_method->IsCriticalNative()) << native_method->PrettyMethod(); + } } extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) @@ -142,36 +139,13 @@ extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) return o.Ptr(); } -// Common result handling for EndWithReference. -static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result, Thread* self) - NO_THREAD_SAFETY_ANALYSIS { - // Must decode before pop. The 'result' may not be valid in case of an exception, though. - ObjPtr<mirror::Object> o; - if (!self->IsExceptionPending()) { - o = self->DecodeJObject(result); - } - // Process result. - if (UNLIKELY(self->GetJniEnv()->IsCheckJniEnabled())) { - // CheckReferenceResult can resolve types. - StackHandleScope<1> hs(self); - HandleWrapperObjPtr<mirror::Object> h_obj(hs.NewHandleWrapper(&o)); - CheckReferenceResult(h_obj, self); - } - VerifyObject(o); - return o.Ptr(); -} - -extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) { - GoToRunnable(self); - return JniMethodEndWithReferenceHandleResult(result, self); -} - extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie, jvalue result, uint64_t result_f, ArtMethod* called) - // TODO: NO_THREAD_SAFETY_ANALYSIS as GoToRunnable() is NO_THREAD_SAFETY_ANALYSIS + // NO_THREAD_SAFETY_ANALYSIS because we can enter this function with the mutator lock + // unlocked for normal JNI, or locked for @FastNative and @CriticalNative. NO_THREAD_SAFETY_ANALYSIS { bool critical_native = called->IsCriticalNative(); bool fast_native = called->IsFastNative(); @@ -180,16 +154,19 @@ extern uint64_t GenericJniMethodEnd(Thread* self, // @CriticalNative does not do a state transition. @FastNative usually does not do a state // transition either but it performs a suspend check that may do state transitions. if (LIKELY(normal_native)) { - MONITOR_JNI(PaletteNotifyEndJniInvocation); - GoToRunnable(self); + if (UNLIKELY(self->ReadFlag(ThreadFlag::kMonitorJniEntryExit))) { + artJniMonitoredMethodEnd(self); + } else { + artJniMethodEnd(self); + } } else if (fast_native) { // When we are in @FastNative, we are already Runnable. DCHECK(Locks::mutator_lock_->IsSharedHeld(self)); // Only do a suspend check on the way out of JNI just like compiled stubs. self->CheckSuspend(); } - // We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the - // locked object. + // We need the mutator lock (i.e., calling `artJniMethodEnd()`) before accessing + // the shorty or the locked object. if (called->IsSynchronized()) { DCHECK(normal_native) << "@FastNative/@CriticalNative and synchronize is not supported"; ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called); @@ -198,8 +175,8 @@ extern uint64_t GenericJniMethodEnd(Thread* self, } char return_shorty_char = called->GetShorty()[0]; if (return_shorty_char == 'L') { - uint64_t ret = - reinterpret_cast<uint64_t>(JniMethodEndWithReferenceHandleResult(result.l, self)); + uint64_t ret = reinterpret_cast<uint64_t>( + UNLIKELY(self->IsExceptionPending()) ? nullptr : JniDecodeReferenceResult(result.l, self)); PopLocalReferences(saved_local_ref_cookie, self); return ret; } else { @@ -244,14 +221,9 @@ extern "C" void artJniMonitoredMethodStart(Thread* self) { MONITOR_JNI(PaletteNotifyBeginJniInvocation); } -extern void JniMonitoredMethodEnd(Thread* self) { - MONITOR_JNI(PaletteNotifyEndJniInvocation); - JniMethodEnd(self); -} - -extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) { +extern "C" void artJniMonitoredMethodEnd(Thread* self) { MONITOR_JNI(PaletteNotifyEndJniInvocation); - return JniMethodEndWithReference(result, self); + artJniMethodEnd(self); } } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 00d5523179..240ecbd216 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -218,10 +218,7 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjStatic, pAputObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pJniMethodStart, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodEnd, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndWithReference, - sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference, - pJniDecodeReferenceResult, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniDecodeReferenceResult, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniDecodeReferenceResult, pJniLockObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniLockObject, diff --git a/runtime/oat.h b/runtime/oat.h index 04972eb402..c2ad8c0025 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: JNI: Inline fast-path for `JniMethodStart()`. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '6', '\0' } }; + // Last oat version changed reason: JNI: Inline fast-path for `JniMethodEnd()`. + static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '7', '\0' } }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h index 960a870100..fc8e6cb0c1 100644 --- a/runtime/thread-inl.h +++ b/runtime/thread-inl.h @@ -251,6 +251,12 @@ inline void Thread::TransitionFromRunnableToSuspended(ThreadState new_state) { } inline ThreadState Thread::TransitionFromSuspendedToRunnable() { + // Note: JNI stubs inline a fast path of this method that transitions to Runnable if + // there are no flags set and then stores the mutator lock to `held_mutexes[kMutatorLock]` + // (this comes from a specialized `BaseMutex::RegisterAsUnlockedImpl(., kMutatorLock)` + // inlined from the `GetMutatorLock()->TransitionFromSuspendedToRunnable(this)` below). + // Therefore any code added here (other than debug build assertions) should be gated + // on some flag being set, so that the JNI stub can take the slow path to get here. StateAndFlags old_state_and_flags = GetStateAndFlags(std::memory_order_relaxed); ThreadState old_state = old_state_and_flags.GetState(); DCHECK_NE(old_state, ThreadState::kRunnable); diff --git a/runtime/thread.cc b/runtime/thread.cc index 184d2c1b5b..7988f884e6 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3531,7 +3531,6 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pAputObject) QUICK_ENTRY_POINT_INFO(pJniMethodStart) QUICK_ENTRY_POINT_INFO(pJniMethodEnd) - QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReference) QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult) QUICK_ENTRY_POINT_INFO(pJniLockObject) QUICK_ENTRY_POINT_INFO(pJniUnlockObject) diff --git a/runtime/thread.h b/runtime/thread.h index 7d76956848..3c358d8e39 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -958,6 +958,12 @@ class Thread { } template<PointerSize pointer_size> + static constexpr ThreadOffset<pointer_size> MutatorLockOffset() { + return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, + mutator_lock)); + } + + template<PointerSize pointer_size> static constexpr ThreadOffset<pointer_size> HeldMutexOffset(LockLevel level) { DCHECK_LT(enum_cast<size_t>(level), arraysize(tlsPtr_.held_mutexes)); return ThreadOffsetFromTlsPtr<pointer_size>(OFFSETOF_MEMBER(tls_ptr_sized_values, |