diff options
32 files changed, 651 insertions, 1008 deletions
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 2f96d44977..e3d0abb7d3 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -39,7 +39,6 @@ #include "mirror/stack_trace_element-inl.h" #include "nativehelper/ScopedLocalRef.h" #include "nativeloader/native_loader.h" -#include "oat_quick_method_header.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" @@ -389,41 +388,44 @@ class JniCompilerTest : public CommonCompilerTest { jmethodID jmethod_; private: - // Helper class that overrides original entrypoints with alternative versions - // that check that the object (`this` or class) is locked. class ScopedSynchronizedEntryPointOverrides { public: ScopedSynchronizedEntryPointOverrides() { QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints; - jni_method_start_original_ = qpoints->pJniMethodStart; - qpoints->pJniMethodStart = JniMethodStartSynchronizedOverride; - jni_method_end_original_ = qpoints->pJniMethodEnd; - qpoints->pJniMethodEnd = JniMethodEndSynchronizedOverride; - jni_method_end_with_reference_original_ = qpoints->pJniMethodEndWithReference; - qpoints->pJniMethodEndWithReference = JniMethodEndWithReferenceSynchronizedOverride; + jni_method_start_synchronized_original_ = qpoints->pJniMethodStartSynchronized; + qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronizedOverride; + jni_method_end_synchronized_original_ = qpoints->pJniMethodEndSynchronized; + qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronizedOverride; + jni_method_end_with_reference_synchronized_original_ = + qpoints->pJniMethodEndWithReferenceSynchronized; + qpoints->pJniMethodEndWithReferenceSynchronized = + JniMethodEndWithReferenceSynchronizedOverride; } ~ScopedSynchronizedEntryPointOverrides() { QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints; - qpoints->pJniMethodStart = jni_method_start_original_; - qpoints->pJniMethodEnd = jni_method_end_original_; - qpoints->pJniMethodEndWithReference = jni_method_end_with_reference_original_; + qpoints->pJniMethodStartSynchronized = jni_method_start_synchronized_original_; + qpoints->pJniMethodEndSynchronized = jni_method_end_synchronized_original_; + qpoints->pJniMethodEndWithReferenceSynchronized = + jni_method_end_with_reference_synchronized_original_; } }; - static void AssertCallerObjectLocked(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_); - static void JniMethodStartSynchronizedOverride(Thread* self); - static void JniMethodEndSynchronizedOverride(Thread* self); + static void JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self); + static void JniMethodEndSynchronizedOverride(jobject locked, Thread* self); static mirror::Object* JniMethodEndWithReferenceSynchronizedOverride( - jobject result, Thread* self); + jobject result, + jobject locked, + Thread* self); - using JniStartType = void (*)(Thread*); - using JniEndType = void (*)(Thread*); - using JniEndWithReferenceType = mirror::Object* (*)(jobject, Thread*); + using StartSynchronizedType = void (*)(jobject, Thread*); + using EndSynchronizedType = void (*)(jobject, Thread*); + using EndWithReferenceSynchronizedType = mirror::Object* (*)(jobject, jobject, Thread*); - static JniStartType jni_method_start_original_; - static JniEndType jni_method_end_original_; - static JniEndWithReferenceType jni_method_end_with_reference_original_; + static StartSynchronizedType jni_method_start_synchronized_original_; + static EndSynchronizedType jni_method_end_synchronized_original_; + static EndWithReferenceSynchronizedType jni_method_end_with_reference_synchronized_original_; + static jobject locked_object_; bool check_generic_jni_; }; @@ -431,49 +433,28 @@ class JniCompilerTest : public CommonCompilerTest { jclass JniCompilerTest::jklass_; jobject JniCompilerTest::jobj_; jobject JniCompilerTest::class_loader_; -JniCompilerTest::JniStartType JniCompilerTest::jni_method_start_original_; -JniCompilerTest::JniEndType JniCompilerTest::jni_method_end_original_; -JniCompilerTest::JniEndWithReferenceType JniCompilerTest::jni_method_end_with_reference_original_; - -void JniCompilerTest::AssertCallerObjectLocked(Thread* self) { - ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame(); - CHECK(caller_frame != nullptr); - ArtMethod* caller = *caller_frame; - CHECK(caller != nullptr); - CHECK(caller->IsNative()); - CHECK(!caller->IsFastNative()); - CHECK(!caller->IsCriticalNative()); - CHECK(caller->IsSynchronized()); - ObjPtr<mirror::Object> lock; - if (caller->IsStatic()) { - lock = caller->GetDeclaringClass(); - } else { - uint8_t* sp = reinterpret_cast<uint8_t*>(caller_frame); - const void* code_ptr = EntryPointToCodePointer(caller->GetEntryPointFromQuickCompiledCode()); - OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); - size_t frame_size = method_header->GetFrameSizeInBytes(); - StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>( - sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); - lock = this_ref->AsMirrorPtr(); - } - CHECK_EQ(Monitor::GetLockOwnerThreadId(lock), self->GetThreadId()); -} +JniCompilerTest::StartSynchronizedType JniCompilerTest::jni_method_start_synchronized_original_; +JniCompilerTest::EndSynchronizedType JniCompilerTest::jni_method_end_synchronized_original_; +JniCompilerTest::EndWithReferenceSynchronizedType + JniCompilerTest::jni_method_end_with_reference_synchronized_original_; +jobject JniCompilerTest::locked_object_; -void JniCompilerTest::JniMethodStartSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS { - AssertCallerObjectLocked(self); - jni_method_start_original_(self); +void JniCompilerTest::JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self) { + locked_object_ = to_lock; + jni_method_start_synchronized_original_(to_lock, self); } -void JniCompilerTest::JniMethodEndSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS { - jni_method_end_original_(self); - AssertCallerObjectLocked(self); +void JniCompilerTest::JniMethodEndSynchronizedOverride(jobject locked, Thread* self) { + EXPECT_EQ(locked_object_, locked); + jni_method_end_synchronized_original_(locked, self); } mirror::Object* JniCompilerTest::JniMethodEndWithReferenceSynchronizedOverride( - jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS { - mirror::Object* raw_result = jni_method_end_with_reference_original_(result, self); - AssertCallerObjectLocked(self); - return raw_result; + jobject result, + jobject locked, + Thread* self) { + EXPECT_EQ(locked_object_, locked); + return jni_method_end_with_reference_synchronized_original_(result, locked, self); } // Test the normal compiler and normal generic JNI only. diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index da438bdba6..68c7a94540 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -531,10 +531,10 @@ FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } -// R4 is neither managed callee-save, nor argument register. It is suitable for use as the -// locking argument for synchronized methods and hidden argument for @CriticalNative methods. -// (It is native callee-save but the value coming from managed code can be clobbered.) -static void AssertR4IsNeitherCalleeSaveNorArgumentRegister() { +ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const { + CHECK(IsCriticalNative()); + // R4 is neither managed callee-save, nor argument register, nor scratch register. + // (It is native callee-save but the value coming from managed code can be clobbered.) // TODO: Change to static_assert; std::none_of should be constexpr since C++20. DCHECK(std::none_of(kCalleeSaveRegisters, kCalleeSaveRegisters + std::size(kCalleeSaveRegisters), @@ -543,20 +543,7 @@ static void AssertR4IsNeitherCalleeSaveNorArgumentRegister() { })); DCHECK(std::none_of(kJniArgumentRegisters, kJniArgumentRegisters + std::size(kJniArgumentRegisters), - [](Register arg) { return arg == R4; })); -} - -ManagedRegister ArmJniCallingConvention::LockingArgumentRegister() const { - DCHECK(!IsFastNative()); - DCHECK(!IsCriticalNative()); - DCHECK(IsSynchronized()); - AssertR4IsNeitherCalleeSaveNorArgumentRegister(); - return ArmManagedRegister::FromCoreRegister(R4); -} - -ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const { - CHECK(IsCriticalNative()); - AssertR4IsNeitherCalleeSaveNorArgumentRegister(); + [](Register reg) { return reg == R4; })); return ArmManagedRegister::FromCoreRegister(R4); } diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index 94dacc46e5..149ba39eb4 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -81,10 +81,6 @@ class ArmJniCallingConvention final : public JniCallingConvention { return false; } - // Locking argument register, used to pass the synchronization object for calls - // to `JniLockObject()` and `JniUnlockObject()`. - ManagedRegister LockingArgumentRegister() const override; - // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index d8b0373096..7b9a597805 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -363,9 +363,9 @@ FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } -// X15 is neither managed callee-save, nor argument register. It is suitable for use as the -// locking argument for synchronized methods and hidden argument for @CriticalNative methods. -static void AssertX15IsNeitherCalleeSaveNorArgumentRegister() { +ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const { + CHECK(IsCriticalNative()); + // X15 is neither managed callee-save, nor argument register, nor scratch register. // TODO: Change to static_assert; std::none_of should be constexpr since C++20. DCHECK(std::none_of(kCalleeSaveRegisters, kCalleeSaveRegisters + std::size(kCalleeSaveRegisters), @@ -374,20 +374,7 @@ static void AssertX15IsNeitherCalleeSaveNorArgumentRegister() { })); DCHECK(std::none_of(kXArgumentRegisters, kXArgumentRegisters + std::size(kXArgumentRegisters), - [](XRegister arg) { return arg == X15; })); -} - -ManagedRegister Arm64JniCallingConvention::LockingArgumentRegister() const { - DCHECK(!IsFastNative()); - DCHECK(!IsCriticalNative()); - DCHECK(IsSynchronized()); - AssertX15IsNeitherCalleeSaveNorArgumentRegister(); - return Arm64ManagedRegister::FromWRegister(W15); -} - -ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const { - DCHECK(IsCriticalNative()); - AssertX15IsNeitherCalleeSaveNorArgumentRegister(); + [](XRegister reg) { return reg == X15; })); return Arm64ManagedRegister::FromXRegister(X15); } diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index 003b0c3f15..ade88e4e97 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -72,10 +72,6 @@ class Arm64JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } - // Locking argument register, used to pass the synchronization object for calls - // to `JniLockObject()` and `JniUnlockObject()`. - ManagedRegister LockingArgumentRegister() const override; - // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index 0be523362f..faa83daf7c 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -363,10 +363,6 @@ class JniCallingConvention : public CallingConvention { return !IsCriticalNative(); } - // Locking argument register, used to pass the synchronization object for calls - // to `JniLockObject()` and `JniUnlockObject()`. - virtual ManagedRegister LockingArgumentRegister() const = 0; - // Hidden argument register, used to pass the method pointer for @CriticalNative call. virtual ManagedRegister HiddenArgumentRegister() const = 0; diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 863f47b819..4c1b2f792d 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -81,17 +81,26 @@ enum class JniEntrypoint { template <PointerSize kPointerSize> static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which, - bool reference_return) { + bool reference_return, + bool is_synchronized) { if (which == JniEntrypoint::kStart) { // JniMethodStart - ThreadOffset<kPointerSize> jni_start = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart); + ThreadOffset<kPointerSize> jni_start = + is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart); + return jni_start; } else { // JniMethodEnd ThreadOffset<kPointerSize> jni_end(-1); if (reference_return) { // Pass result. - jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference); + jni_end = is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference); } else { - jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd); + jni_end = is_synchronized + ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized) + : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd); } return jni_end; @@ -185,6 +194,26 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp ManagedRuntimeCallingConvention::Create( &allocator, is_static, is_synchronized, shorty, instruction_set)); + // Calling conventions to call into JNI method "end" possibly passing a returned reference, the + // method and the current thread. + const char* jni_end_shorty; + if (reference_return && is_synchronized) { + jni_end_shorty = "IL"; + } else if (reference_return) { + jni_end_shorty = "I"; + } else { + jni_end_shorty = "V"; + } + + std::unique_ptr<JniCallingConvention> end_jni_conv( + JniCallingConvention::Create(&allocator, + is_static, + is_synchronized, + is_fast_native, + is_critical_native, + jni_end_shorty, + instruction_set)); + // Assembler that holds generated instructions std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm = GetMacroAssembler<kPointerSize>(&allocator, instruction_set, instruction_set_features); @@ -220,28 +249,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(jclass_read_barrier_return.get()); } - // 1.3 Spill reference register arguments. - constexpr FrameOffset kInvalidReferenceOffset = - JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset; - ArenaVector<ArgumentLocation> src_args(allocator.Adapter()); - ArenaVector<ArgumentLocation> dest_args(allocator.Adapter()); - ArenaVector<FrameOffset> refs(allocator.Adapter()); - if (LIKELY(!is_critical_native)) { - mr_conv->ResetIterator(FrameOffset(current_frame_size)); - for (; mr_conv->HasNext(); mr_conv->Next()) { - if (mr_conv->IsCurrentParamInRegister() && mr_conv->IsCurrentParamAReference()) { - // Spill the reference as raw data. - src_args.emplace_back(mr_conv->CurrentParamRegister(), kObjectReferenceSize); - dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); - refs.push_back(kInvalidReferenceOffset); - } - } - __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args), - ArrayRef<ArgumentLocation>(src_args), - ArrayRef<FrameOffset>(refs)); - } - - // 1.4. Write out the end of the quick frames. After this, we can walk the stack. + // 1.3. Write out the end of the quick frames. // NOTE: @CriticalNative does not need to store the stack pointer to the thread // because garbage collections are disabled within the execution of a // @CriticalNative method. @@ -249,32 +257,10 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>()); } - // 2. Lock the object (if synchronized) and transition out of runnable (if normal native). + // 2. Call into appropriate `JniMethodStart*()` to transition out of Runnable for normal native. - // 2.1. Lock the synchronization object (`this` or class) for synchronized methods. - if (UNLIKELY(is_synchronized)) { - // We are using a custom calling convention for locking where the assembly thunk gets - // the object to lock in a register (even on x86), it can use callee-save registers - // as temporaries (they were saved above) and must preserve argument registers. - ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister(); - if (is_static) { - // Pass the declaring class. It was already marked if needed. - DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); - __ Load(to_lock, method_register, MemberOffset(0u), kObjectReferenceSize); - } else { - // Pass the `this` argument. - mr_conv->ResetIterator(FrameOffset(current_frame_size)); - if (mr_conv->IsCurrentParamInRegister()) { - __ Move(to_lock, mr_conv->CurrentParamRegister(), kObjectReferenceSize); - } else { - __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); - } - } - __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniLockObject)); - } - - // 2.2. Move frame down to allow space for out going args. - // This prepares for both the `JniMethodStart()` call as well as the main native call. + // 2.1. Move frame down to allow space for out going args. + // This prepares for both the `JniMethodStart*()` call as well as the main native call. size_t current_out_arg_size = main_out_arg_size; if (UNLIKELY(is_critical_native)) { DCHECK_EQ(main_out_arg_size, current_frame_size); @@ -283,37 +269,41 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp current_frame_size += main_out_arg_size; } - // 2.3. Spill all register arguments to preserve them across the `JniLockObject()` - // call (if synchronized) and `JniMethodStart()` call (if normal native). + // 2.2. Spill all register arguments to preserve them across the `JniMethodStart*()` call. // Native stack arguments are spilled directly to their argument stack slots and // references are converted to `jobject`. Native register arguments are spilled to - // the reserved slots in the caller frame, references are not converted to `jobject`; - // references from registers are actually skipped as they were already spilled above. - // TODO: Implement fast-path for transition to Native and avoid this spilling. - src_args.clear(); - dest_args.clear(); - refs.clear(); + // the reserved slots in the caller frame, references are not converted to `jobject`. + constexpr FrameOffset kInvalidReferenceOffset = + JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset; + ArenaVector<ArgumentLocation> src_args(allocator.Adapter()); + ArenaVector<ArgumentLocation> dest_args(allocator.Adapter()); + ArenaVector<FrameOffset> refs(allocator.Adapter()); if (LIKELY(!is_critical_native && !is_fast_native)) { mr_conv->ResetIterator(FrameOffset(current_frame_size)); main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); main_jni_conv->Next(); // Skip JNIEnv*. - // Add a no-op move for the `jclass` / `this` argument to avoid the - // next argument being treated as non-null if it's a reference. - // Note: We have already spilled `this` as raw reference above. Since `this` - // cannot be null, the argument move before the native call does not need - // to reload the reference, and that argument move also needs to see the - // `this` argument to avoid treating another reference as non-null. - // Note: Using the method register for the no-op move even for `this`. - src_args.emplace_back(method_register, kRawPointerSize); - dest_args.emplace_back(method_register, kRawPointerSize); - refs.push_back(kInvalidReferenceOffset); if (is_static) { main_jni_conv->Next(); // Skip `jclass`. + // Add a no-op move for the `jclass` argument to avoid the next + // argument being treated as non-null if it's a reference. + src_args.emplace_back(method_register, kRawPointerSize); + dest_args.emplace_back(method_register, kRawPointerSize); + refs.push_back(kInvalidReferenceOffset); } else { - // Skip `this` + // Spill `this` as raw reference without conversion to `jobject` even if the `jobject` + // argument is passed on stack. Since `this` cannot be null, the argument move before + // the native call does not need to reload the reference, and that argument move also + // needs to see the `this` argument to avoid treating another reference as non-null. + // This also leaves enough space on stack for `JniMethodStartSynchronized()` + // for architectures that pass the second argument on the stack (x86). DCHECK(mr_conv->HasNext()); DCHECK(main_jni_conv->HasNext()); DCHECK(mr_conv->IsCurrentParamAReference()); + src_args.push_back(mr_conv->IsCurrentParamInRegister() + ? ArgumentLocation(mr_conv->CurrentParamRegister(), kObjectReferenceSize) + : ArgumentLocation(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize)); + dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); + refs.push_back(kInvalidReferenceOffset); mr_conv->Next(); main_jni_conv->Next(); } @@ -321,19 +311,13 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp DCHECK(main_jni_conv->HasNext()); static_assert(kObjectReferenceSize == 4u); bool is_reference = mr_conv->IsCurrentParamAReference(); - bool src_in_reg = mr_conv->IsCurrentParamInRegister(); - bool dest_in_reg = main_jni_conv->IsCurrentParamInRegister(); - if (is_reference && src_in_reg && dest_in_reg) { - // We have already spilled the raw reference above. - continue; - } - bool spill_jobject = is_reference && !dest_in_reg; + bool spill_jobject = is_reference && !main_jni_conv->IsCurrentParamInRegister(); size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u; size_t dest_size = spill_jobject ? kRawPointerSize : src_size; - src_args.push_back(src_in_reg + src_args.push_back(mr_conv->IsCurrentParamInRegister() ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size) : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size)); - dest_args.push_back(dest_in_reg + dest_args.push_back(main_jni_conv->IsCurrentParamInRegister() ? ArgumentLocation(mr_conv->CurrentParamStackOffset(), dest_size) : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size)); refs.push_back(spill_jobject ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset); @@ -343,14 +327,41 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp ArrayRef<FrameOffset>(refs)); } // if (!is_critical_native) - // 2.4. Call into `JniMethodStart()` passing Thread* so that transition out of Runnable + // 2.3. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable // can occur. We abuse the JNI calling convention here, that is guaranteed to support - // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, and we use just one. + // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`. + std::unique_ptr<JNIMacroLabel> monitor_enter_exception_slow_path = + UNLIKELY(is_synchronized) ? __ CreateLabel() : nullptr; if (LIKELY(!is_critical_native && !is_fast_native)) { // Skip this for @CriticalNative and @FastNative methods. They do not call JniMethodStart. ThreadOffset<kPointerSize> jni_start = - GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, reference_return); + GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, + reference_return, + is_synchronized); main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); + if (is_synchronized) { + // Pass object for locking. + if (is_static) { + // Pass the pointer to the method's declaring class as the first argument. + DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); + SetNativeParameter(jni_asm.get(), main_jni_conv.get(), method_register); + } else { + // TODO: Use the register that still holds the `this` reference. + mr_conv->ResetIterator(FrameOffset(current_frame_size)); + FrameOffset this_offset = mr_conv->CurrentParamStackOffset(); + if (main_jni_conv->IsCurrentParamOnStack()) { + FrameOffset out_off = main_jni_conv->CurrentParamStackOffset(); + __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false); + } else { + ManagedRegister out_reg = main_jni_conv->CurrentParamRegister(); + __ CreateJObject(out_reg, + this_offset, + ManagedRegister::NoRegister(), + /*null_allowed=*/ false); + } + } + main_jni_conv->Next(); + } if (main_jni_conv->IsCurrentParamInRegister()) { __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start)); @@ -358,7 +369,10 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset()); __ CallFromThread(jni_start); } - method_register = ManagedRegister::NoRegister(); // Method register is clobbered by the call. + method_register = ManagedRegister::NoRegister(); // Method register is clobbered. + if (is_synchronized) { // Check for exceptions from monitor enter. + __ ExceptionPoll(monitor_enter_exception_slow_path.get()); + } } // 3. Push local reference frame. @@ -525,7 +539,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp } } - // 5. Transition to Runnable (if normal native). + // 5. Call into appropriate JniMethodEnd to transition out of Runnable for normal native. // 5.1. Spill or move the return value if needed. // TODO: Use `callee_save_temp` instead of stack slot when possible. @@ -583,30 +597,72 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp } if (LIKELY(!is_critical_native)) { - // 5.4. Call JniMethodEnd for normal native. + // 5.4. Increase frame size for out args if needed by the end_jni_conv. + const size_t end_out_arg_size = end_jni_conv->OutFrameSize(); + if (end_out_arg_size > current_out_arg_size) { + DCHECK(!is_fast_native); + size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size; + current_out_arg_size = end_out_arg_size; + __ IncreaseFrameSize(out_arg_size_diff); + current_frame_size += out_arg_size_diff; + return_save_location = FrameOffset(return_save_location.SizeValue() + out_arg_size_diff); + } + end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size)); + + // 5.5. Call JniMethodEnd for normal native. // For @FastNative with reference return, decode the `jobject`. - // We abuse the JNI calling convention here, that is guaranteed to support passing - // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, enough for all cases. - main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); if (LIKELY(!is_fast_native) || reference_return) { ThreadOffset<kPointerSize> jni_end = is_fast_native ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult) - : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, reference_return); + : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, + reference_return, + is_synchronized); if (reference_return) { // Pass result. - SetNativeParameter(jni_asm.get(), main_jni_conv.get(), main_jni_conv->ReturnRegister()); - main_jni_conv->Next(); + SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister()); + end_jni_conv->Next(); } - if (main_jni_conv->IsCurrentParamInRegister()) { - __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); - __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_end)); + if (is_synchronized) { + // Pass object for unlocking. + if (is_static) { + // Load reference to the method's declaring class. The method register has been + // clobbered by the above call, so we need to load the method from the stack. + FrameOffset method_offset = + FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue()); + DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); + if (end_jni_conv->IsCurrentParamOnStack()) { + FrameOffset out_off = end_jni_conv->CurrentParamStackOffset(); + __ Copy(out_off, method_offset, kRawPointerSize); + } else { + ManagedRegister out_reg = end_jni_conv->CurrentParamRegister(); + __ Load(out_reg, method_offset, kRawPointerSize); + } + } else { + mr_conv->ResetIterator(FrameOffset(current_frame_size)); + FrameOffset this_offset = mr_conv->CurrentParamStackOffset(); + if (end_jni_conv->IsCurrentParamOnStack()) { + FrameOffset out_off = end_jni_conv->CurrentParamStackOffset(); + __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false); + } else { + ManagedRegister out_reg = end_jni_conv->CurrentParamRegister(); + __ CreateJObject(out_reg, + this_offset, + ManagedRegister::NoRegister(), + /*null_allowed=*/ false); + } + } + end_jni_conv->Next(); + } + if (end_jni_conv->IsCurrentParamInRegister()) { + __ GetCurrentThread(end_jni_conv->CurrentParamRegister()); + __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end)); } else { - __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset()); + __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset()); __ CallFromThread(jni_end); } } - // 5.5. Reload return value if it was spilled. + // 5.6. Reload return value if it was spilled. if (spill_return_value) { __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue()); } @@ -642,26 +698,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(suspend_check_resume.get()); } - // 7.4 Unlock the synchronization object for synchronized methods. - if (UNLIKELY(is_synchronized)) { - ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister(); - mr_conv->ResetIterator(FrameOffset(current_frame_size)); - if (is_static) { - // Pass the declaring class. - DCHECK(method_register.IsNoRegister()); // TODO: Preserve the method in `callee_save_temp`. - ManagedRegister temp = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize); - FrameOffset method_offset = mr_conv->MethodStackOffset(); - __ Load(temp, method_offset, kRawPointerSize); - DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); - __ Load(to_lock, temp, MemberOffset(0u), kObjectReferenceSize); - } else { - // Pass the `this` argument from its spill slot. - __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); - } - __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniUnlockObject)); - } - - // 7.5. Remove activation - need to restore callee save registers since the GC + // 7.4. Remove activation - need to restore callee save registers since the GC // may have changed them. DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size)); if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) { @@ -731,6 +768,14 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // 8.3. Exception poll slow path(s). if (LIKELY(!is_critical_native)) { + if (UNLIKELY(is_synchronized)) { + DCHECK(!is_fast_native); + __ Bind(monitor_enter_exception_slow_path.get()); + if (main_out_arg_size != 0) { + jni_asm->cfi().AdjustCFAOffset(main_out_arg_size); + __ DecreaseFrameSize(main_out_arg_size); + } + } __ Bind(exception_slow_path.get()); if (UNLIKELY(is_fast_native) && reference_return) { // We performed the exception check early, so we need to adjust SP and pop IRT frame. diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index 2fb063f3fd..947320237c 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -294,15 +294,6 @@ FrameOffset X86JniCallingConvention::CurrentParamStackOffset() { FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize)); } -ManagedRegister X86JniCallingConvention::LockingArgumentRegister() const { - DCHECK(!IsFastNative()); - DCHECK(!IsCriticalNative()); - DCHECK(IsSynchronized()); - // The callee-save register is EBP is suitable as a locking argument. - static_assert(kCalleeSaveRegisters[0].Equals(X86ManagedRegister::FromCpuRegister(EBP))); - return X86ManagedRegister::FromCpuRegister(EBP); -} - ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const { CHECK(IsCriticalNative()); // EAX is neither managed callee-save, nor argument register, nor scratch register. diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index f028090c75..7b62161907 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -77,10 +77,6 @@ class X86JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } - // Locking argument register, used to pass the synchronization object for calls - // to `JniLockObject()` and `JniUnlockObject()`. - ManagedRegister LockingArgumentRegister() const override; - // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 469de42eff..ddf3d74adc 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -299,15 +299,6 @@ FrameOffset X86_64JniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } -ManagedRegister X86_64JniCallingConvention::LockingArgumentRegister() const { - DCHECK(!IsFastNative()); - DCHECK(!IsCriticalNative()); - DCHECK(IsSynchronized()); - // The callee-save register is RBX is suitable as a locking argument. - static_assert(kCalleeSaveRegisters[0].Equals(X86_64ManagedRegister::FromCpuRegister(RBX))); - return X86_64ManagedRegister::FromCpuRegister(RBX); -} - ManagedRegister X86_64JniCallingConvention::HiddenArgumentRegister() const { CHECK(IsCriticalNative()); // RAX is neither managed callee-save, nor argument register, nor scratch register. diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index fda5c0e354..ee8603d9ce 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -72,10 +72,6 @@ class X86_64JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } - // Locking argument register, used to pass the synchronization object for calls - // to `JniLockObject()` and `JniUnlockObject()`. - ManagedRegister LockingArgumentRegister() const override; - // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index bd8aa083eb..9ea6f04cb6 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -546,6 +546,32 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(arg_count, srcs.size()); DCHECK_EQ(arg_count, refs.size()); + // Spill reference registers. Spill two references together with STRD where possible. + for (size_t i = 0; i != arg_count; ++i) { + if (refs[i] != kInvalidReferenceOffset) { + DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); + if (srcs[i].IsRegister()) { + DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); + // Use STRD if we're storing 2 consecutive references within the available STRD range. + if (i + 1u != arg_count && + refs[i + 1u] != kInvalidReferenceOffset && + srcs[i + 1u].IsRegister() && + refs[i].SizeValue() < kStrdOffsetCutoff) { + DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize); + DCHECK_EQ(refs[i + 1u].SizeValue(), refs[i].SizeValue() + kObjectReferenceSize); + ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()), + AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()), + MemOperand(sp, refs[i].SizeValue())); + ++i; + } else { + Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize); + } + } else { + DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]); + } + } + } + // Convert reference registers to `jobject` values. // TODO: Delay this for references that are copied to another register. for (size_t i = 0; i != arg_count; ++i) { diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 561cbbd54b..0f1203e232 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -382,6 +382,30 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(arg_count, srcs.size()); DCHECK_EQ(arg_count, refs.size()); + // Spill reference registers. Spill two references together with STP where possible. + for (size_t i = 0; i != arg_count; ++i) { + if (refs[i] != kInvalidReferenceOffset) { + DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); + if (srcs[i].IsRegister()) { + // Use STP if we're storing 2 consecutive references within the available STP range. + if (i + 1u != arg_count && + refs[i + 1u].SizeValue() == refs[i].SizeValue() + kObjectReferenceSize && + srcs[i + 1u].IsRegister() && + refs[i].SizeValue() < kStpWOffsetCutoff) { + DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize); + ___ Stp(reg_w(srcs[i].GetRegister().AsArm64().AsWRegister()), + reg_w(srcs[i + 1u].GetRegister().AsArm64().AsWRegister()), + MEM_OP(sp, refs[i].SizeValue())); + ++i; + } else { + Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize); + } + } else { + DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]); + } + } + } + auto get_mask = [](ManagedRegister reg) -> uint64_t { Arm64ManagedRegister arm64_reg = reg.AsArm64(); if (arm64_reg.IsXRegister()) { @@ -405,12 +429,12 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, }; // More than 8 core or FP reg args are very rare, so we do not optimize for - // that case by using LDP/STP, except for situations that arise even with low - // number of arguments. We use STP for the non-reference spilling which also - // covers the initial spill for native reference register args as they are - // spilled as raw 32-bit values. We also optimize loading args to registers - // with LDP, whether references or not, except for the initial non-null - // reference which we do not need to load at all. + // that case by using LDP/STP, except for situations that arise for normal + // native even with low number of arguments. We use STP for the non-reference + // spilling which also covers the initial spill for native reference register + // args as they are spilled as raw 32-bit values. We also optimize loading + // args to registers with LDP, whether references or not, except for the + // initial non-null reference which we do not need to load at all. // Collect registers to move while storing/copying args to stack slots. // Convert processed references to `jobject`. diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index b35066f434..541458b236 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults = { " 21c: d9 f8 24 80 ldr.w r8, [r9, #36]\n" " 220: 70 47 bx lr\n" " 222: d9 f8 8c 00 ldr.w r0, [r9, #140]\n" - " 226: d9 f8 c4 e2 ldr.w lr, [r9, #708]\n" + " 226: d9 f8 c8 e2 ldr.w lr, [r9, #712]\n" " 22a: f0 47 blx lr\n" }; diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 7dff279944..d0afa72155 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -332,10 +332,6 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references. if (src.IsRegister()) { if (UNLIKELY(dest.IsRegister())) { - if (dest.GetRegister().Equals(src.GetRegister())) { - // JNI compiler sometimes adds a no-op move. - continue; - } // Native ABI has only stack arguments but we may pass one "hidden arg" in register. CHECK(!found_hidden_arg); found_hidden_arg = true; @@ -345,6 +341,7 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, Move(dest.GetRegister(), src.GetRegister(), dest.GetSize()); } else { if (ref != kInvalidReferenceOffset) { + Store(ref, srcs[i].GetRegister(), kObjectReferenceSize); // Note: We can clobber `src` here as the register cannot hold more than one argument. // This overload of `CreateJObject()` currently does not use the scratch // register ECX, so this shall not clobber another argument. diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 2da1b470ac..1425a4cc41 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -388,6 +388,7 @@ void X86_64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(src.GetSize(), dest.GetSize()); } if (src.IsRegister() && ref != kInvalidReferenceOffset) { + Store(ref, src.GetRegister(), kObjectReferenceSize); // Note: We can clobber `src` here as the register cannot hold more than one argument. // This overload of `CreateJObject()` is currently implemented as "test and branch"; // if it was using a conditional move, it would be better to do this at move time. diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index cca5bc2fc3..7bcff2bafc 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -505,7 +505,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(64U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(4U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index ca63914759..5ef1d3e17a 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -493,66 +493,52 @@ END art_quick_do_long_jump */ TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER -.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null - ldr \tmp1, [rSELF, #THREAD_ID_OFFSET] - .if \can_be_null - cbz \obj, \slow_lock - .endif -1: - ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if unlocked + /* + * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the + * possibly null object to lock. + */ + .extern artLockObjectFromCode +ENTRY art_quick_lock_object + ldr r1, [rSELF, #THREAD_ID_OFFSET] + cbz r0, .Lslow_lock +.Lretry_lock: + ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + eor r3, r2, r1 @ Prepare the value to store if unlocked @ (thread id, count of 0 and preserved read barrier bits), @ or prepare to compare thread id for recursive lock check @ (lock_word.ThreadId() ^ self->ThreadId()). - ands ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. - bne 2f @ Check if unlocked. - @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits. - strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz \tmp2, 3f @ If store failed, retry. + ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. + bne .Lnot_unlocked @ Check if unlocked. + @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits. + strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + cbnz r2, .Llock_strex_fail @ If store failed, retry. dmb ish @ Full (LoadLoad|LoadStore) memory barrier. bx lr -2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1 +.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 #if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT #error "Expecting thin lock count and gc state in consecutive bits." #endif - @ Check lock word state and thread id together. - bfc \tmp3, \ - #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \ - #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) - cbnz \tmp3, \slow_lock @ if either of the top two bits are set, or the lock word's + @ Check lock word state and thread id together, + bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) + cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's @ thread id did not match, go slow path. - add \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. + add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. @ Extract the new thin lock count for overflow check. - ubfx \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE - cbz \tmp2, \slow_lock @ Zero as the new count indicates overflow, go slow path. - @ strex necessary for read barrier bits. - strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz \tmp2, 3f @ If strex failed, retry. + ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE + cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path. + strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. + cbnz r2, .Llock_strex_fail @ If strex failed, retry. bx lr -3: - b 1b @ retry -.endm - - /* - * Entry from managed code that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. - * r0 holds the possibly null object to lock. - */ -ENTRY art_quick_lock_object - // Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). - LOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Llock_object_slow, /*can_be_null*/ 1 +.Llock_strex_fail: + b .Lretry_lock @ retry +// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). END art_quick_lock_object - /* - * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. - * r0 holds the possibly null object to lock. - */ - .extern artLockObjectFromCode ENTRY art_quick_lock_object_no_inline // This is also the slow path for art_quick_lock_object. Note that we // need a local label, the assembler complains about target being out of // range if we try to jump to `art_quick_lock_object_no_inline`. -.Llock_object_slow: +.Lslow_lock: SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block mov r1, rSELF @ pass Thread::Current bl artLockObjectFromCode @ (Object* obj, Thread*) @@ -562,78 +548,62 @@ ENTRY art_quick_lock_object_no_inline DELIVER_PENDING_EXCEPTION END art_quick_lock_object_no_inline -.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null - ldr \tmp1, [rSELF, #THREAD_ID_OFFSET] - .if \can_be_null - cbz \obj, \slow_unlock - .endif -1: + /* + * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. + * r0 holds the possibly null object to lock. + */ + .extern artUnlockObjectFromCode +ENTRY art_quick_unlock_object + ldr r1, [rSELF, #THREAD_ID_OFFSET] + cbz r0, .Lslow_unlock +.Lretry_unlock: #ifndef USE_READ_BARRIER - ldr \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else @ Need to use atomic instructions for read barrier. - ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #endif - eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if simply locked + eor r3, r2, r1 @ Prepare the value to store if simply locked @ (mostly 0s, and preserved read barrier bits), @ or prepare to compare thread id for recursive lock check @ (lock_word.ThreadId() ^ self->ThreadId()). - ands ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. - bne 2f @ Locked recursively or by other thread? + ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. + bne .Lnot_simply_locked @ Locked recursively or by other thread? @ Transition to unlocked. dmb ish @ Full (LoadStore|StoreStore) memory barrier. #ifndef USE_READ_BARRIER - str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else - @ strex necessary for read barrier bits - strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz \tmp2, 3f @ If the store failed, retry. + strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits + cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. #endif bx lr -2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1 +.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 #if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT #error "Expecting thin lock count and gc state in consecutive bits." #endif @ Check lock word state and thread id together, - bfc \tmp3, \ - #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \ - #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) - cbnz \tmp3, \slow_unlock @ if either of the top two bits are set, or the lock word's + bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) + cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's @ thread id did not match, go slow path. - sub \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. + sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. #ifndef USE_READ_BARRIER - str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else - @ strex necessary for read barrier bits. - strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz \tmp2, 3f @ If the store failed, retry. + strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. + cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. #endif bx lr -3: - b 1b @ retry -.endm - - /* - * Entry from managed code that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. - * r0 holds the possibly null object to unlock. - */ -ENTRY art_quick_unlock_object - // Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). - UNLOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Lunlock_object_slow, /*can_be_null*/ 1 +.Lunlock_strex_fail: + b .Lretry_unlock @ retry +// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). END art_quick_unlock_object - /* - * Entry from managed code that calls `artUnlockObjectFromCode()` - * and delivers exception on failure. - * r0 holds the possibly null object to unlock. - */ - .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object_no_inline // This is also the slow path for art_quick_unlock_object. Note that we // need a local label, the assembler complains about target being out of // range if we try to jump to `art_quick_unlock_object_no_inline`. -.Lunlock_object_slow: +.Lslow_unlock: @ save callee saves in case exception allocation triggers GC SETUP_SAVE_REFS_ONLY_FRAME r1 mov r1, rSELF @ pass Thread::Current @@ -645,80 +615,6 @@ ENTRY art_quick_unlock_object_no_inline END art_quick_unlock_object_no_inline /* - * Entry from JNI stub that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` (the same as for managed code) for the - * difficult cases, may block for GC. - * Custom calling convention: - * r4 holds the non-null object to lock. - * Callee-save registers have been saved and can be used as temporaries. - * All argument registers need to be preserved. - */ -ENTRY art_quick_lock_object_jni - LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0 - -.Llock_object_jni_slow: - // Save GPR args r0-r3 and return address. Also save r4 for stack alignment. - push {r0-r4, lr} - .cfi_adjust_cfa_offset 24 - .cfi_rel_offset lr, 20 - // Save FPR args. - vpush {s0-s15} - .cfi_adjust_cfa_offset 64 - // Call `artLockObjectFromCode()` - mov r0, r4 @ Pass the object to lock. - mov r1, rSELF @ Pass Thread::Current(). - bl artLockObjectFromCode @ (Object* obj, Thread*) - // Restore FPR args. - vpop {s0-s15} - .cfi_adjust_cfa_offset -64 - // Check result. - cbnz r0, 1f - // Restore GPR args and r4 and return. - pop {r0-r4, pc} -1: - // GPR args are irrelevant when throwing an exception but pop them anyway with the LR we need. - pop {r0-r4, lr} - .cfi_adjust_cfa_offset -24 - .cfi_restore lr - // Make a tail call to `artDeliverPendingExceptionFromCode()`. - // Rely on the JNI transition frame constructed in the JNI stub. - mov r0, rSELF @ Pass Thread::Current(). - b artDeliverPendingExceptionFromCode @ (Thread*) -END art_quick_lock_object_jni - - /* - * Entry from JNI stub that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock - * is fatal, so we do not need to check for exceptions in the slow path. - * Custom calling convention: - * r4 holds the non-null object to unlock. - * Callee-save registers have been saved and can be used as temporaries. - * Return registers r0-r1 and s0-s1 need to be preserved. - */ - .extern artLockObjectFromJni -ENTRY art_quick_unlock_object_jni - UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0 - - .Lunlock_object_jni_slow: - // Save GPR return registers and return address. Also save r4 for stack alignment. - push {r0-r1, r4, lr} - .cfi_adjust_cfa_offset 16 - .cfi_rel_offset lr, 12 - // Save FPR return registers. - vpush {s0-s1} - .cfi_adjust_cfa_offset 8 - // Call `artUnlockObjectFromJni()` - mov r0, r4 @ Pass the object to unlock. - mov r1, rSELF @ Pass Thread::Current(). - bl artUnlockObjectFromJni @ (Object* obj, Thread*) - // Restore FPR return registers. - vpop {s0-s1} - .cfi_adjust_cfa_offset -8 - // Restore GPR return registers and r4 and return. - pop {r0-r1, r4, pc} -END art_quick_unlock_object_jni - - /* * Entry from managed code that calls artInstanceOfFromCode and on failure calls * artThrowClassCastExceptionForObject. */ diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 657ff7831f..e5dbeda42d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -881,52 +881,42 @@ ENTRY art_quick_do_long_jump br xIP1 END art_quick_do_long_jump -.macro LOCK_OBJECT_FAST_PATH obj, slow_lock, can_be_null - // Use scratch registers x8-x11 as temporaries. - ldr w9, [xSELF, #THREAD_ID_OFFSET] - .if \can_be_null - cbz \obj, \slow_lock - .endif + /* + * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the + * possibly null object to lock. + * + * Derived from arm32 code. + */ + .extern artLockObjectFromCode +ENTRY art_quick_lock_object + ldr w1, [xSELF, #THREAD_ID_OFFSET] + cbz w0, art_quick_lock_object_no_inline // Exclusive load/store has no immediate anymore. - add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET -1: - ldaxr w10, [x8] // Acquire needed only in most common case. - eor w11, w10, w9 // Prepare the value to store if unlocked + add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET +.Lretry_lock: + ldaxr w2, [x4] // Acquire needed only in most common case. + eor w3, w2, w1 // Prepare the value to store if unlocked // (thread id, count of 0 and preserved read barrier bits), // or prepare to compare thread id for recursive lock check // (lock_word.ThreadId() ^ self->ThreadId()). - tst w10, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. - b.ne 2f // Check if unlocked. - // Unlocked case - store w11: original lock word plus thread id, preserved read barrier bits. - stxr w10, w11, [x8] - cbnz w10, 1b // If the store failed, retry. + tst w2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. + b.ne .Lnot_unlocked // Check if unlocked. + // unlocked case - store w3: original lock word plus thread id, preserved read barrier bits. + stxr w2, w3, [x4] + cbnz w2, .Lretry_lock // If the store failed, retry. ret -2: // w10: original lock word, w9: thread id, w11: w10 ^ w11 +.Lnot_unlocked: // w2: original lock word, w1: thread id, w3: w2 ^ w1 // Check lock word state and thread id together, - tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) - b.ne \slow_lock - add w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count. - tst w11, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count. - b.eq \slow_lock // Zero as the new count indicates overflow, go slow path. - stxr w10, w11, [x8] - cbnz w10, 1b // If the store failed, retry. + tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) + b.ne art_quick_lock_object_no_inline + add w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count. + tst w3, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count. + b.eq art_quick_lock_object_no_inline // Zero as the new count indicates overflow, go slow path. + stxr w2, w3, [x4] + cbnz w2, .Lretry_lock // If the store failed, retry. ret -.endm - - /* - * Entry from managed code that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. - * x0 holds the possibly null object to lock. - */ -ENTRY art_quick_lock_object - LOCK_OBJECT_FAST_PATH x0, art_quick_lock_object_no_inline, /*can_be_null*/ 1 END art_quick_lock_object - /* - * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. - * x0 holds the possibly null object to lock. - */ - .extern artLockObjectFromCode ENTRY art_quick_lock_object_no_inline // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block @@ -937,63 +927,52 @@ ENTRY art_quick_lock_object_no_inline RETURN_IF_W0_IS_ZERO_OR_DELIVER END art_quick_lock_object_no_inline -.macro UNLOCK_OBJECT_FAST_PATH obj, slow_unlock, can_be_null - // Use scratch registers x8-x11 as temporaries. - ldr w9, [xSELF, #THREAD_ID_OFFSET] - .if \can_be_null - cbz \obj, \slow_unlock - .endif + /* + * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. + * x0 holds the possibly null object to lock. + * + * Derived from arm32 code. + */ + .extern artUnlockObjectFromCode +ENTRY art_quick_unlock_object + ldr w1, [xSELF, #THREAD_ID_OFFSET] + cbz x0, art_quick_unlock_object_no_inline // Exclusive load/store has no immediate anymore. - add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET -1: + add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET +.Lretry_unlock: #ifndef USE_READ_BARRIER - ldr w10, [x8] + ldr w2, [x4] #else - ldxr w10, [x8] // Need to use atomic instructions for read barrier. + ldxr w2, [x4] // Need to use atomic instructions for read barrier. #endif - eor w11, w10, w9 // Prepare the value to store if simply locked + eor w3, w2, w1 // Prepare the value to store if simply locked // (mostly 0s, and preserved read barrier bits), // or prepare to compare thread id for recursive lock check // (lock_word.ThreadId() ^ self->ThreadId()). - tst w11, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. - b.ne 2f // Locked recursively or by other thread? + tst w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. + b.ne .Lnot_simply_locked // Locked recursively or by other thread? // Transition to unlocked. #ifndef USE_READ_BARRIER - stlr w11, [x8] + stlr w3, [x4] #else - stlxr w10, w11, [x8] // Need to use atomic instructions for read barrier. - cbnz w10, 1b // If the store failed, retry. + stlxr w2, w3, [x4] // Need to use atomic instructions for read barrier. + cbnz w2, .Lretry_unlock // If the store failed, retry. #endif ret -2: - // Check lock word state and thread id together. - tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) +.Lnot_simply_locked: + // Check lock word state and thread id together, + tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) b.ne art_quick_unlock_object_no_inline - sub w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count + sub w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count #ifndef USE_READ_BARRIER - str w11, [x8] + str w3, [x4] #else - stxr w10, w11, [x8] // Need to use atomic instructions for read barrier. - cbnz w10, 1b // If the store failed, retry. + stxr w2, w3, [x4] // Need to use atomic instructions for read barrier. + cbnz w2, .Lretry_unlock // If the store failed, retry. #endif ret -.endm - - /* - * Entry from managed code that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. - * x0 holds the possibly null object to unlock. - */ -ENTRY art_quick_unlock_object - UNLOCK_OBJECT_FAST_PATH x0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1 END art_quick_unlock_object - /* - * Entry from managed code that calls `artUnlockObjectFromCode()` - * and delivers exception on failure. - * x0 holds the possibly null object to unlock. - */ - .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object_no_inline // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC @@ -1005,91 +984,6 @@ ENTRY art_quick_unlock_object_no_inline END art_quick_unlock_object_no_inline /* - * Entry from JNI stub that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` (the same as for managed code) for the - * difficult cases, may block for GC. - * Custom calling convention: - * x15 holds the non-null object to lock. - * Callee-save registers have been saved and can be used as temporaries. - * All argument registers need to be preserved. - */ -ENTRY art_quick_lock_object_jni - LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0 - -.Llock_object_jni_slow: - // Save register args x0-x7, d0-d7 and return address. - stp x0, x1, [sp, #-(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)]! - .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) - stp x2, x3, [sp, #16] - stp x4, x5, [sp, #32] - stp x6, x7, [sp, #48] - stp d0, d1, [sp, #64] - stp d2, d3, [sp, #80] - stp d4, d5, [sp, #96] - stp d6, d7, [sp, #112] - str lr, [sp, #136] - .cfi_rel_offset lr, 136 - // Call `artLockObjectFromCode()` - mov x0, x15 // Pass the object to lock. - mov x1, xSELF // Pass Thread::Current(). - bl artLockObjectFromCode // (Object* obj, Thread*) - // Restore return address. - ldr lr, [sp, #136] - .cfi_restore lr - // Check result. - cbnz x0, 1f - // Restore register args x0-x7, d0-d7 and return. - ldp x2, x3, [sp, #16] - ldp x4, x5, [sp, #32] - ldp x6, x7, [sp, #48] - ldp d0, d1, [sp, #64] - ldp d2, d3, [sp, #80] - ldp d4, d5, [sp, #96] - ldp d6, d7, [sp, #112] - ldp x0, x1, [sp], #(8 * 8 + 8 * 8 + /*padding*/ 8 + 8) - .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + /*padding*/ 8 + 8) - ret - .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) -1: - // All args are irrelevant when throwing an exception. Remove the spill area. - DECREASE_FRAME (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) - // Make a tail call to `artDeliverPendingExceptionFromCode()`. - // Rely on the JNI transition frame constructed in the JNI stub. - mov x0, xSELF // Pass Thread::Current(). - b artDeliverPendingExceptionFromCode // (Thread*) -END art_quick_lock_object_jni - - /* - * Entry from JNI stub that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock - * is fatal, so we do not need to check for exceptions in the slow path. - * Custom calling convention: - * x15 holds the non-null object to unlock. - * Callee-save registers have been saved and can be used as temporaries. - * Return registers r0 and d0 need to be preserved. - */ -ENTRY art_quick_unlock_object_jni - UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0 - - .Lunlock_object_jni_slow: - // Save return registers and return address. - stp x0, lr, [sp, #-32]! - .cfi_adjust_cfa_offset 32 - .cfi_rel_offset lr, 8 - str d0, [sp, #16] - // Call `artUnlockObjectFromJni()` - mov x0, x15 // Pass the object to unlock. - mov x1, xSELF // Pass Thread::Current(). - bl artUnlockObjectFromJni // (Object* obj, Thread*) - // Restore return registers and return. - ldr d0, [sp, #16] - ldp x0, lr, [sp], #32 - .cfi_adjust_cfa_offset -32 - .cfi_restore lr - ret -END art_quick_unlock_object_jni - - /* * Entry from managed code that calls artInstanceOfFromCode and on failure calls * artThrowClassCastExceptionForObject. */ diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index d16f15ca21..2f6af4f5de 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1133,236 +1133,145 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO -MACRO4(LOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_lock) -1: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word - movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id. - xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits. - testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits. - jnz 2f // Check if unlocked. - // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits. - // EAX: old val, tmp: new val. - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry - .ifnc \saved_eax, none - movl REG_VAR(saved_eax), %eax // Restore EAX. - .endif +DEFINE_FUNCTION art_quick_lock_object + testl %eax, %eax // null check object/eax + jz .Lslow_lock +.Lretry_lock: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word + test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // test the 2 high bits. + jne .Lslow_lock // slow path if either of the two high bits are set. + movl %ecx, %edx // save lock word (edx) to keep read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. + test %ecx, %ecx + jnz .Lalready_thin // lock word contains a thin lock + // unlocked case - edx: original lock word, eax: obj. + movl %eax, %ecx // remember object in case of retry + movl %edx, %eax // eax: lock word zero except for read barrier bits. + movl %fs:THREAD_ID_OFFSET, %edx // load thread id. + or %eax, %edx // edx: thread id with count of 0 + read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. + jnz .Llock_cmpxchg_fail // cmpxchg failed retry ret -2: // EAX: original lock word, tmp: thread id ^ EAX - // Check lock word state and thread id together, - testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ - REG_VAR(tmp) - jne \slow_lock // Slow path if either of the two high bits are set. - // Increment the recursive lock count. - leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) - testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp) - jz \slow_lock // If count overflowed, go to slow lock. - // Update lockword for recursive lock, cmpxchg necessary for read barrier bits. - // EAX: old val, tmp: new val. - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry - .ifnc \saved_eax, none - movl REG_VAR(saved_eax), %eax // Restore EAX. - .endif +.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), eax: obj. + movl %fs:THREAD_ID_OFFSET, %ecx // ecx := thread id + cmpw %cx, %dx // do we hold the lock already? + jne .Lslow_lock + movl %edx, %ecx // copy the lock word to check count overflow. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check. + test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set. + jne .Lslow_lock // count overflowed so go slow + movl %eax, %ecx // save obj to use eax for cmpxchg. + movl %edx, %eax // copy the lock word as the old val for cmpxchg. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. + // update lockword, cmpxchg necessary for read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. + jnz .Llock_cmpxchg_fail // cmpxchg failed retry ret -END_MACRO - - /* - * Entry from managed code that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. - * EAX holds the possibly null object to lock. - */ -DEFINE_FUNCTION art_quick_lock_object - testl %eax, %eax - jz SYMBOL(art_quick_lock_object_no_inline) - movl %eax, %ecx // Move obj to a different register. - LOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Llock_object_slow -.Llock_object_slow: - movl %ecx, %eax // Move obj back to EAX. - jmp SYMBOL(art_quick_lock_object_no_inline) +.Llock_cmpxchg_fail: + movl %ecx, %eax // restore eax + jmp .Lretry_lock +.Lslow_lock: + SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC + // Outgoing argument set up + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + PUSH eax // pass object + call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object - /* - * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. - * EAX holds the possibly null object to lock. - */ DEFINE_FUNCTION art_quick_lock_object_no_inline - // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC // Outgoing argument set up - INCREASE_FRAME 8 // alignment padding + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH_ARG eax // pass object + PUSH eax // pass object call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) - DECREASE_FRAME 16 // pop arguments + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object_no_inline -MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_unlock) -1: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word - movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id - xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word - test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp) - jnz 2f // Check if simply locked. - // Transition to unlocked. + +DEFINE_FUNCTION art_quick_unlock_object + testl %eax, %eax // null check object/eax + jz .Lslow_unlock +.Lretry_unlock: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word + movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id + test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx + jnz .Lslow_unlock // lock word contains a monitor + cmpw %cx, %dx // does the thread id match? + jne .Lslow_unlock + movl %ecx, %edx // copy the lock word to detect new count of 0. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. + cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx + jae .Lrecursive_thin_unlock + // update lockword, cmpxchg necessary for read barrier bits. + movl %eax, %edx // edx: obj + movl %ecx, %eax // eax: old lock word. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits. #ifndef USE_READ_BARRIER - movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) #else - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. + jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry #endif - .ifnc \saved_eax, none - movl REG_VAR(saved_eax), %eax // Restore EAX. - .endif ret -2: // EAX: original lock word, tmp: lock_word ^ thread id - // Check lock word state and thread id together. - testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ - REG_VAR(tmp) - jnz \slow_unlock - // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits. - // tmp: new lock word with decremented count. - leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) +.Lrecursive_thin_unlock: // ecx: original lock word, eax: obj + // update lockword, cmpxchg necessary for read barrier bits. + movl %eax, %edx // edx: obj + movl %ecx, %eax // eax: old lock word. + subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // ecx: new lock word with decremented count. #ifndef USE_READ_BARRIER - movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) #else - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. + jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry #endif - .ifnc \saved_eax, none - movl REG_VAR(saved_eax), %eax // Restore EAX. - .endif ret -END_MACRO - - /* - * Entry from managed code that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. - * EAX holds the possibly null object to unlock. - */ -DEFINE_FUNCTION art_quick_unlock_object - testl %eax, %eax - jz SYMBOL(art_quick_unlock_object_no_inline) - movl %eax, %ecx // Move obj to a different register. - UNLOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Lunlock_object_slow -.Lunlock_object_slow: - movl %ecx, %eax // Move obj back to EAX. - jmp SYMBOL(art_quick_unlock_object_no_inline) +.Lunlock_cmpxchg_fail: // edx: obj + movl %edx, %eax // restore eax + jmp .Lretry_unlock +.Lslow_unlock: + SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC + // Outgoing argument set up + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + PUSH eax // pass object + call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object - /* - * Entry from managed code that calls `artUnlockObjectFromCode()` - * and delivers exception on failure. - * EAX holds the possibly null object to unlock. - */ DEFINE_FUNCTION art_quick_unlock_object_no_inline - // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC // Outgoing argument set up - INCREASE_FRAME 8 // alignment padding + subl LITERAL(8), %esp // alignment padding + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH_ARG eax // pass object + PUSH eax // pass object call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) - DECREASE_FRAME 16 // pop arguments + addl LITERAL(16), %esp // pop arguments + CFI_ADJUST_CFA_OFFSET(-16) RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object_no_inline - /* - * Entry from JNI stub that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` (the same as for managed code) for the - * difficult cases, may block for GC. - * Custom calling convention: - * EBP holds the non-null object to lock. - * Callee-save registers have been saved and can be used as temporaries (except EBP). - * All argument registers need to be preserved. - */ -DEFINE_FUNCTION art_quick_lock_object_jni - movl %eax, %edi // Preserve EAX in a callee-save register. - LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow - -.Llock_object_jni_slow: - // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3 and align stack. - PUSH_ARG ebx - PUSH_ARG edx - PUSH_ARG ecx - PUSH_ARG edi // Original contents of EAX. - INCREASE_FRAME (/*FPRs*/ 4 * 8 + /*padding*/ 4) // Make xmm<n> spill slots 8-byte aligned. - movsd %xmm0, 0(%esp) - movsd %xmm1, 8(%esp) - movsd %xmm2, 16(%esp) - movsd %xmm3, 24(%esp) - // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call. - // Call `artLockObjectFromCode()` - pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). - CFI_ADJUST_CFA_OFFSET(4) - PUSH_ARG ebp // Pass the object to lock. - call SYMBOL(artLockObjectFromCode) // (object, Thread*) - // Check result. - testl %eax, %eax - jnz 1f - // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return. - movsd 8(%esp), %xmm0 - movsd 16(%esp), %xmm1 - movsd 24(%esp), %xmm2 - movsd 32(%esp), %xmm3 - DECREASE_FRAME /*call args*/ 8 + /*FPR args*/ 4 * 8 + /*padding*/ 4 - POP_ARG eax - POP_ARG ecx - POP_ARG edx - POP_ARG ebx - ret - .cfi_adjust_cfa_offset (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4) -1: - // All args are irrelevant when throwing an exception. - // Remove the spill area except for new padding to align stack. - DECREASE_FRAME \ - (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4 - /*new padding*/ 8) - // Rely on the JNI transition frame constructed in the JNI stub. - pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() - CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*) - UNREACHABLE -END_FUNCTION art_quick_lock_object_jni - - /* - * Entry from JNI stub that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock - * is fatal, so we do not need to check for exceptions in the slow path. - * Custom calling convention: - * EBP holds the non-null object to unlock. - * Callee-save registers have been saved and can be used as temporaries (except EBP). - * Return registers EAX, EDX and mmx0 need to be preserved. - */ - .extern artLockObjectFromJni -DEFINE_FUNCTION art_quick_unlock_object_jni - movl %eax, %edi // Preserve EAX in a different register. - UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow - - .Lunlock_object_jni_slow: - // Save return registers. - PUSH_ARG edx - PUSH_ARG edi // Original contents of EAX. - INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4 - movsd %xmm0, 0(%esp) - // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call. - // Call `artUnlockObjectFromJni()` - pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). - CFI_ADJUST_CFA_OFFSET(4) - PUSH_ARG ebp // Pass the object to unlock. - call SYMBOL(artUnlockObjectFromJni) // (object, Thread*) - // Restore return registers and return. - movsd 8(%esp), %xmm0 - DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4 - POP_ARG eax - POP_ARG edx - ret -END_FUNCTION art_quick_unlock_object_jni - DEFINE_FUNCTION art_quick_instance_of PUSH eax // alignment padding PUSH ecx // pass arg2 - obj->klass diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 06715858a1..136198fe55 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1068,50 +1068,48 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO -MACRO3(LOCK_OBJECT_FAST_PATH, obj, tmp, slow_lock) -1: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word - movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id. - xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits. - testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits. - jnz 2f // Check if unlocked. - // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits. - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry +DEFINE_FUNCTION art_quick_lock_object + testl %edi, %edi // Null check object/rdi. + jz .Lslow_lock +.Lretry_lock: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word. + test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // Test the 2 high bits. + jne .Lslow_lock // Slow path if either of the two high bits are set. + movl %ecx, %edx // save lock word (edx) to keep read barrier bits. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. + test %ecx, %ecx + jnz .Lalready_thin // Lock word contains a thin lock. + // unlocked case - edx: original lock word, edi: obj. + movl %edx, %eax // eax: lock word zero except for read barrier bits. + movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id + or %eax, %edx // edx: thread id with count of 0 + read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) + jnz .Lretry_lock // cmpxchg failed retry ret -2: // EAX: original lock word, tmp: thread id ^ EAX - // Check lock word state and thread id together, - testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ - REG_VAR(tmp) - jne \slow_lock // Slow path if either of the two high bits are set. - // Increment the recursive lock count. - leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) - testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp) - je \slow_lock // If count overflowed, go to slow lock. - // Update lockword for recursive lock, cmpxchg necessary for read barrier bits. - // EAX: old val, tmp: new val. - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry +.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj. + movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id + cmpw %cx, %dx // do we hold the lock already? + jne .Lslow_lock + movl %edx, %ecx // copy the lock word to check count overflow. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count + test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set + jne .Lslow_lock // count overflowed so go slow + movl %edx, %eax // copy the lock word as the old val for cmpxchg. + addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. + // update lockword, cmpxchg necessary for read barrier bits. + lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val. + jnz .Lretry_lock // cmpxchg failed retry ret -END_MACRO - - /* - * Entry from managed code that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. - * RDI holds the possibly null object to lock. - */ -DEFINE_FUNCTION art_quick_lock_object - testq %rdi, %rdi // Null check object. - jz art_quick_lock_object_no_inline - LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline +.Lslow_lock: + SETUP_SAVE_REFS_ONLY_FRAME + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object - /* - * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. - * RDI holds the possibly null object to lock. - */ DEFINE_FUNCTION art_quick_lock_object_no_inline - // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) @@ -1119,63 +1117,50 @@ DEFINE_FUNCTION art_quick_lock_object_no_inline RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object_no_inline -MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_rax, slow_unlock) -1: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word - movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id - xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word - test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp) - jnz 2f // Check if simply locked. - // Transition to unlocked. +DEFINE_FUNCTION art_quick_unlock_object + testl %edi, %edi // null check object/edi + jz .Lslow_unlock +.Lretry_unlock: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word + movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id + test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx + jnz .Lslow_unlock // lock word contains a monitor + cmpw %cx, %dx // does the thread id match? + jne .Lslow_unlock + movl %ecx, %edx // copy the lock word to detect new count of 0. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. + cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx + jae .Lrecursive_thin_unlock + // update lockword, cmpxchg necessary for read barrier bits. + movl %ecx, %eax // eax: old lock word. + andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits. #ifndef USE_READ_BARRIER - movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) #else - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. + jnz .Lretry_unlock // cmpxchg failed retry #endif - .ifnc \saved_rax, none - movq REG_VAR(saved_rax), %rax // Restore RAX. - .endif ret -2: // EAX: original lock word, tmp: lock_word ^ thread id - // Check lock word state and thread id together. - testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ - REG_VAR(tmp) - jnz \slow_unlock - // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits. - // tmp: new lock word with decremented count. - leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) +.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj + // update lockword, cmpxchg necessary for read barrier bits. + movl %ecx, %eax // eax: old lock word. + subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx #ifndef USE_READ_BARRIER - // EAX: new lock word with decremented count. - movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) #else - lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) - jnz 1b // cmpxchg failed retry + lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. + jnz .Lretry_unlock // cmpxchg failed retry #endif - .ifnc \saved_rax, none - movq REG_VAR(saved_rax), %rax // Restore RAX. - .endif ret -END_MACRO - - /* - * Entry from managed code that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. - * RDI holds the possibly null object to unlock. - */ -DEFINE_FUNCTION art_quick_unlock_object - testq %rdi, %rdi // Null check object. - jz art_quick_lock_object_no_inline - UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline +.Lslow_unlock: + SETUP_SAVE_REFS_ONLY_FRAME + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) + RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address + RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object - /* - * Entry from managed code that calls `artUnlockObjectFromCode()` - * and delivers exception on failure. - * RDI holds the possibly null object to unlock. - */ DEFINE_FUNCTION art_quick_unlock_object_no_inline - // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) @@ -1183,97 +1168,6 @@ DEFINE_FUNCTION art_quick_unlock_object_no_inline RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object_no_inline - /* - * Entry from JNI stub that tries to lock the object in a fast path and - * calls `artLockObjectFromCode()` (the same as for managed code) for the - * difficult cases, may block for GC. - * Custom calling convention: - * RBX holds the non-null object to lock. - * Callee-save registers have been saved and can be used as temporaries (except RBX). - * All argument registers need to be preserved. - */ -DEFINE_FUNCTION art_quick_lock_object_jni - LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow - -.Llock_object_jni_slow: - // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack. - PUSH_ARG r9 - PUSH_ARG r8 - PUSH_ARG rcx - PUSH_ARG rdx - PUSH_ARG rsi - PUSH_ARG rdi - INCREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8) - movsd %xmm0, 0(%rsp) - movsd %xmm1, 8(%rsp) - movsd %xmm2, 16(%rsp) - movsd %xmm3, 24(%rsp) - movsd %xmm4, 32(%rsp) - movsd %xmm5, 40(%rsp) - movsd %xmm6, 48(%rsp) - movsd %xmm7, 56(%rsp) - // Call `artLockObjectFromCode()` - movq %rbx, %rdi // Pass the object to lock. - movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current(). - call SYMBOL(artLockObjectFromCode) // (object, Thread*) - // Check result. - testl %eax, %eax - jnz 1f - // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return. - movsd 0(%esp), %xmm0 - movsd 8(%esp), %xmm1 - movsd 16(%esp), %xmm2 - movsd 24(%esp), %xmm3 - movsd 32(%esp), %xmm4 - movsd 40(%esp), %xmm5 - movsd 48(%esp), %xmm6 - movsd 56(%esp), %xmm7 - DECREASE_FRAME /*FPR args*/ 8 * 8 + /*padding*/ 8 - POP_ARG rdi - POP_ARG rsi - POP_ARG rdx - POP_ARG rcx - POP_ARG r8 - POP_ARG r9 - ret - .cfi_adjust_cfa_offset (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8) -1: - // All args are irrelevant when throwing an exception. Remove the spill area. - DECREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8) - // Rely on the JNI transition frame constructed in the JNI stub. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread::Current(). - jmp SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*); tail call. -END_FUNCTION art_quick_lock_object_jni - - /* - * Entry from JNI stub that tries to unlock the object in a fast path and calls - * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock - * is fatal, so we do not need to check for exceptions in the slow path. - * Custom calling convention: - * RBX holds the non-null object to unlock. - * Callee-save registers have been saved and can be used as temporaries (except RBX). - * Return registers RAX and mmx0 need to be preserved. - */ -DEFINE_FUNCTION art_quick_unlock_object_jni - movq %rax, %r12 // Preserve RAX in a different register. - UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow - - .Lunlock_object_jni_slow: - // Save return registers and return address. - PUSH_ARG r12 // Original contents of RAX. - INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 - movsd %xmm0, 0(%rsp) - // Call `artUnlockObjectFromJni()` - movq %rbx, %rdi // Pass the object to unlock. - movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current(). - call SYMBOL(artUnlockObjectFromJni) // (object, Thread*) - // Restore return registers and return. - movsd 0(%rsp), %xmm0 - DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 - POP_ARG rax - ret -END_FUNCTION art_quick_unlock_object_jni - DEFINE_FUNCTION art_quick_check_instance_of // Type check using the bit string passes null as the target class. In that case just throw. testl %esi, %esi diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index a160a7baf0..6e78b53ff8 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -805,27 +805,23 @@ inline bool NeedsClinitCheckBeforeCall(ArtMethod* method) { return method->IsStatic() && !method->IsConstructor(); } -inline ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) +inline jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!called->IsCriticalNative()); DCHECK(!called->IsFastNative()); DCHECK(self->GetManagedStack()->GetTopQuickFrame() != nullptr); DCHECK_EQ(*self->GetManagedStack()->GetTopQuickFrame(), called); - // We do not need read barriers here. - // On method entry, all reference arguments are to-space references and we mark the - // declaring class of a static native method if needed. When visiting thread roots at - // the start of a GC, we visit all these references to ensure they point to the to-space. if (called->IsStatic()) { // Static methods synchronize on the declaring class object. - return called->GetDeclaringClass<kWithoutReadBarrier>(); + // The `jclass` is a pointer to the method's declaring class. + return reinterpret_cast<jobject>(called->GetDeclaringClassAddressWithoutBarrier()); } else { // Instance methods synchronize on the `this` object. // The `this` reference is stored in the first out vreg in the caller's frame. + // The `jobject` is a pointer to the spill slot. uint8_t* sp = reinterpret_cast<uint8_t*>(self->GetManagedStack()->GetTopQuickFrame()); size_t frame_size = RuntimeCalleeSaveFrame::GetFrameSize(CalleeSaveType::kSaveRefsAndArgs); - StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>( - sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); - return this_ref->AsMirrorPtr(); + return reinterpret_cast<jobject>(sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); } } diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 4731a867d2..72b4c030f8 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -217,7 +217,7 @@ bool NeedsClinitCheckBeforeCall(ArtMethod* method) REQUIRES_SHARED(Locks::mutato // Returns the synchronization object for a native method for a GenericJni frame // we have just created or are about to exit. The synchronization object is // the class object for static methods and the `this` object otherwise. -ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) +jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) REQUIRES_SHARED(Locks::mutator_lock_); // Update .bss method entrypoint if the `callee_reference` has an associated oat file diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h index f43e25fec1..6ecf3fd59c 100644 --- a/runtime/entrypoints/quick/quick_default_externs.h +++ b/runtime/entrypoints/quick/quick_default_externs.h @@ -114,13 +114,9 @@ extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, vo extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*); -// JNI read barrier entrypoint. Note: Preserves all registers. +// JNI read barrier entrypoint. extern "C" void art_read_barrier_jni(art::ArtMethod* method); -// JNI lock/unlock entrypoints. Note: Custom calling convention. -extern "C" void art_quick_lock_object_jni(art::mirror::Object*); -extern "C" void art_quick_unlock_object_jni(art::mirror::Object*); - // Polymorphic invoke entrypoints. extern "C" void art_quick_invoke_polymorphic(uint32_t, void*); extern "C" void art_quick_invoke_custom(uint32_t, void*); diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index df52e2344d..9f1766d3f2 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -74,12 +74,13 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp // JNI qpoints->pJniMethodStart = JniMethodStart; + qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized; qpoints->pJniMethodEnd = JniMethodEnd; + qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized; qpoints->pJniMethodEndWithReference = JniMethodEndWithReference; + qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized; qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline; qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult; - qpoints->pJniLockObject = art_quick_lock_object_jni; - qpoints->pJniUnlockObject = art_quick_unlock_object_jni; // Locks if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) { @@ -136,8 +137,12 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp PaletteShouldReportJniInvocations(&should_report); if (should_report) { qpoints->pJniMethodStart = JniMonitoredMethodStart; + qpoints->pJniMethodStartSynchronized = JniMonitoredMethodStartSynchronized; qpoints->pJniMethodEnd = JniMonitoredMethodEnd; + qpoints->pJniMethodEndSynchronized = JniMonitoredMethodEndSynchronized; qpoints->pJniMethodEndWithReference = JniMonitoredMethodEndWithReference; + qpoints->pJniMethodEndWithReferenceSynchronized = + JniMonitoredMethodEndWithReferenceSynchronized; } } diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index cf5c697b76..377a63ee41 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -55,19 +55,35 @@ struct PACKED(4) QuickEntryPoints { // JNI entrypoints. // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. extern void JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern void JniMethodEnd(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern void JniMethodEndSynchronized(jobject locked, Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, + jobject locked, + Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; // JNI entrypoints when monitoring entry/exit. extern void JniMonitoredMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern void JniMonitoredMethodEnd(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; +extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized(jobject result, + jobject locked, + Thread* self) + NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern "C" mirror::String* artStringBuilderAppend(uint32_t format, @@ -77,8 +93,6 @@ extern "C" mirror::String* artStringBuilderAppend(uint32_t format, extern "C" void artReadBarrierJni(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; -extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self) - REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; // Read barrier entrypoints. // diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index 09ce9438ea..a77e849d32 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -73,11 +73,12 @@ V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \ \ V(JniMethodStart, void, Thread*) \ + V(JniMethodStartSynchronized, void, jobject, Thread*) \ V(JniMethodEnd, void, Thread*) \ + V(JniMethodEndSynchronized, void, jobject, Thread*) \ V(JniMethodEndWithReference, mirror::Object*, jobject, Thread*) \ + V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, jobject, Thread*) \ V(JniDecodeReferenceResult, mirror::Object*, jobject, Thread*) \ - V(JniLockObject, void, mirror::Object*) \ - V(JniUnlockObject, void, mirror::Object*) \ V(QuickGenericJniTrampoline, void, ArtMethod*) \ \ V(LockObject, void, mirror::Object*) \ diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index 95072130a9..2ea3c2aca9 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -69,6 +69,11 @@ extern void JniMethodStart(Thread* self) { self->TransitionFromRunnableToSuspended(kNative); } +extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) { + self->DecodeJObject(to_lock)->MonitorEnter(self); + JniMethodStart(self); +} + // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS { if (kIsDebugBuild) { @@ -90,11 +95,8 @@ static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) } // TODO: annotalysis disabled as monitor semantics are maintained in Java code. -extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self) +static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) NO_THREAD_SAFETY_ANALYSIS REQUIRES(!Roles::uninterruptible_) { - // Note: No thread suspension is allowed for successful unlocking, otherwise plain - // `mirror::Object*` return value saved by the assembly stub would need to be updated. - uintptr_t old_poison_object_cookie = kIsDebugBuild ? self->GetPoisonObjectCookie() : 0u; // Save any pending exception over monitor exit call. ObjPtr<mirror::Throwable> saved_exception = nullptr; if (UNLIKELY(self->IsExceptionPending())) { @@ -102,22 +104,17 @@ extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self) self->ClearException(); } // Decode locked object and unlock, before popping local references. - locked->MonitorExit(self); + self->DecodeJObject(locked)->MonitorExit(self); if (UNLIKELY(self->IsExceptionPending())) { - LOG(FATAL) << "Exception during implicit MonitorExit for synchronized native method:\n" - << self->GetException()->Dump() - << (saved_exception != nullptr - ? "\nAn exception was already pending:\n" + saved_exception->Dump() - : ""); - UNREACHABLE(); + LOG(FATAL) << "Synchronized JNI code returning with an exception:\n" + << saved_exception->Dump() + << "\nEncountered second exception during implicit MonitorExit:\n" + << self->GetException()->Dump(); } // Restore pending exception. if (saved_exception != nullptr) { self->SetException(saved_exception); } - if (kIsDebugBuild) { - DCHECK_EQ(old_poison_object_cookie, self->GetPoisonObjectCookie()); - } } // TODO: These should probably be templatized or macro-ized. @@ -127,6 +124,11 @@ extern void JniMethodEnd(Thread* self) { GoToRunnable(self); } +extern void JniMethodEndSynchronized(jobject locked, Thread* self) { + GoToRunnable(self); + UnlockJniSynchronizedMethod(locked, self); // Must decode before pop. +} + extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!self->IsExceptionPending()); @@ -166,6 +168,14 @@ extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) { return JniMethodEndWithReferenceHandleResult(result, self); } +extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, + jobject locked, + Thread* self) { + GoToRunnable(self); + UnlockJniSynchronizedMethod(locked, self); + return JniMethodEndWithReferenceHandleResult(result, self); +} + extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie, jvalue result, @@ -196,9 +206,9 @@ extern uint64_t GenericJniMethodEnd(Thread* self, // locked object. if (called->IsSynchronized()) { DCHECK(normal_native) << "@FastNative/@CriticalNative and synchronize is not supported"; - ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called); + jobject lock = GetGenericJniSynchronizationObject(self, called); DCHECK(lock != nullptr); - artUnlockObjectFromJni(lock.Ptr(), self); + UnlockJniSynchronizedMethod(lock, self); } char return_shorty_char = called->GetShorty()[0]; if (return_shorty_char == 'L') { @@ -248,14 +258,32 @@ extern void JniMonitoredMethodStart(Thread* self) { MONITOR_JNI(PaletteNotifyBeginJniInvocation); } +extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) { + JniMethodStartSynchronized(to_lock, self); + MONITOR_JNI(PaletteNotifyBeginJniInvocation); +} + extern void JniMonitoredMethodEnd(Thread* self) { MONITOR_JNI(PaletteNotifyEndJniInvocation); JniMethodEnd(self); } +extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) { + MONITOR_JNI(PaletteNotifyEndJniInvocation); + JniMethodEndSynchronized(locked, self); +} + extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) { MONITOR_JNI(PaletteNotifyEndJniInvocation); return JniMethodEndWithReference(result, self); } +extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized( + jobject result, + jobject locked, + Thread* self) { + MONITOR_JNI(PaletteNotifyEndJniInvocation); + return JniMethodEndWithReferenceSynchronized(result, locked, self); +} + } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index e214577f7b..c14dee42ec 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -2062,14 +2062,11 @@ void BuildGenericJniFrameVisitor::Visit() { * needed and return to the stub. * * The return value is the pointer to the native code, null on failure. - * - * NO_THREAD_SAFETY_ANALYSIS: Depending on the use case, the trampoline may - * or may not lock a synchronization object and transition out of Runnable. */ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, ArtMethod** managed_sp, uintptr_t* reserved_area) - REQUIRES_SHARED(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS { + REQUIRES_SHARED(Locks::mutator_lock_) { // Note: We cannot walk the stack properly until fixed up below. ArtMethod* called = *managed_sp; DCHECK(called->IsNative()) << called->PrettyMethod(true); @@ -2124,14 +2121,14 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, if (LIKELY(normal_native)) { // Start JNI. if (called->IsSynchronized()) { - ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called); - DCHECK(lock != nullptr); - lock->MonitorEnter(self); + jobject lock = GetGenericJniSynchronizationObject(self, called); + JniMethodStartSynchronized(lock, self); if (self->IsExceptionPending()) { return nullptr; // Report error. } + } else { + JniMethodStart(self); } - JniMethodStart(self); } else { DCHECK(!called->IsSynchronized()) << "@FastNative/@CriticalNative and synchronize is not supported"; diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index c3f1dba967..c19e000d1e 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -217,16 +217,18 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjInstance, pGetObjStatic, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjStatic, pAputObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pJniMethodStart, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodEnd, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndWithReference, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference, + pJniMethodEndWithReferenceSynchronized, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized, pJniDecodeReferenceResult, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniDecodeReferenceResult, - pJniLockObject, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniLockObject, - pJniUnlockObject, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniUnlockObject, pQuickGenericJniTrampoline, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline, pLockObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLockObject, pUnlockObject, sizeof(void*)); diff --git a/runtime/oat.h b/runtime/oat.h index 0b6bf7db91..acb3d30fa2 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: JNI: Rewrite locking for synchronized methods. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '3', '\0' } }; + // Last oat version changed reason: JNI: Faster mutator locking during transition. + static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '2', '\0' } }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/thread.cc b/runtime/thread.cc index 46aa38e035..9fb8d62147 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3475,11 +3475,12 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pGetObjStatic) QUICK_ENTRY_POINT_INFO(pAputObject) QUICK_ENTRY_POINT_INFO(pJniMethodStart) + QUICK_ENTRY_POINT_INFO(pJniMethodStartSynchronized) QUICK_ENTRY_POINT_INFO(pJniMethodEnd) + QUICK_ENTRY_POINT_INFO(pJniMethodEndSynchronized) QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReference) + QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReferenceSynchronized) QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult) - QUICK_ENTRY_POINT_INFO(pJniLockObject) - QUICK_ENTRY_POINT_INFO(pJniUnlockObject) QUICK_ENTRY_POINT_INFO(pQuickGenericJniTrampoline) QUICK_ENTRY_POINT_INFO(pLockObject) QUICK_ENTRY_POINT_INFO(pUnlockObject) |