diff options
32 files changed, 1008 insertions, 651 deletions
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index e3d0abb7d3..2f96d44977 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -39,6 +39,7 @@ #include "mirror/stack_trace_element-inl.h" #include "nativehelper/ScopedLocalRef.h" #include "nativeloader/native_loader.h" +#include "oat_quick_method_header.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" @@ -388,44 +389,41 @@ class JniCompilerTest : public CommonCompilerTest { jmethodID jmethod_; private: + // Helper class that overrides original entrypoints with alternative versions + // that check that the object (`this` or class) is locked. class ScopedSynchronizedEntryPointOverrides { public: ScopedSynchronizedEntryPointOverrides() { QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints; - jni_method_start_synchronized_original_ = qpoints->pJniMethodStartSynchronized; - qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronizedOverride; - jni_method_end_synchronized_original_ = qpoints->pJniMethodEndSynchronized; - qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronizedOverride; - jni_method_end_with_reference_synchronized_original_ = - qpoints->pJniMethodEndWithReferenceSynchronized; - qpoints->pJniMethodEndWithReferenceSynchronized = - JniMethodEndWithReferenceSynchronizedOverride; + jni_method_start_original_ = qpoints->pJniMethodStart; + qpoints->pJniMethodStart = JniMethodStartSynchronizedOverride; + jni_method_end_original_ = qpoints->pJniMethodEnd; + qpoints->pJniMethodEnd = JniMethodEndSynchronizedOverride; + jni_method_end_with_reference_original_ = qpoints->pJniMethodEndWithReference; + qpoints->pJniMethodEndWithReference = JniMethodEndWithReferenceSynchronizedOverride; } ~ScopedSynchronizedEntryPointOverrides() { QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints; - qpoints->pJniMethodStartSynchronized = jni_method_start_synchronized_original_; - qpoints->pJniMethodEndSynchronized = jni_method_end_synchronized_original_; - qpoints->pJniMethodEndWithReferenceSynchronized = - jni_method_end_with_reference_synchronized_original_; + qpoints->pJniMethodStart = jni_method_start_original_; + qpoints->pJniMethodEnd = jni_method_end_original_; + qpoints->pJniMethodEndWithReference = jni_method_end_with_reference_original_; } }; - static void JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self); - static void JniMethodEndSynchronizedOverride(jobject locked, Thread* self); + static void AssertCallerObjectLocked(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_); + static void JniMethodStartSynchronizedOverride(Thread* self); + static void JniMethodEndSynchronizedOverride(Thread* self); static mirror::Object* JniMethodEndWithReferenceSynchronizedOverride( - jobject result, - jobject locked, - Thread* self); + jobject result, Thread* self); - using StartSynchronizedType = void (*)(jobject, Thread*); - using EndSynchronizedType = void (*)(jobject, Thread*); - using EndWithReferenceSynchronizedType = mirror::Object* (*)(jobject, jobject, Thread*); + using JniStartType = void (*)(Thread*); + using JniEndType = void (*)(Thread*); + using JniEndWithReferenceType = mirror::Object* (*)(jobject, Thread*); - static StartSynchronizedType jni_method_start_synchronized_original_; - static EndSynchronizedType jni_method_end_synchronized_original_; - static EndWithReferenceSynchronizedType jni_method_end_with_reference_synchronized_original_; - static jobject locked_object_; + static JniStartType jni_method_start_original_; + static JniEndType jni_method_end_original_; + static JniEndWithReferenceType jni_method_end_with_reference_original_; bool check_generic_jni_; }; @@ -433,28 +431,49 @@ class JniCompilerTest : public CommonCompilerTest { jclass JniCompilerTest::jklass_; jobject JniCompilerTest::jobj_; jobject JniCompilerTest::class_loader_; -JniCompilerTest::StartSynchronizedType JniCompilerTest::jni_method_start_synchronized_original_; -JniCompilerTest::EndSynchronizedType JniCompilerTest::jni_method_end_synchronized_original_; -JniCompilerTest::EndWithReferenceSynchronizedType - JniCompilerTest::jni_method_end_with_reference_synchronized_original_; -jobject JniCompilerTest::locked_object_; +JniCompilerTest::JniStartType JniCompilerTest::jni_method_start_original_; +JniCompilerTest::JniEndType JniCompilerTest::jni_method_end_original_; +JniCompilerTest::JniEndWithReferenceType JniCompilerTest::jni_method_end_with_reference_original_; + +void JniCompilerTest::AssertCallerObjectLocked(Thread* self) { + ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame(); + CHECK(caller_frame != nullptr); + ArtMethod* caller = *caller_frame; + CHECK(caller != nullptr); + CHECK(caller->IsNative()); + CHECK(!caller->IsFastNative()); + CHECK(!caller->IsCriticalNative()); + CHECK(caller->IsSynchronized()); + ObjPtr<mirror::Object> lock; + if (caller->IsStatic()) { + lock = caller->GetDeclaringClass(); + } else { + uint8_t* sp = reinterpret_cast<uint8_t*>(caller_frame); + const void* code_ptr = EntryPointToCodePointer(caller->GetEntryPointFromQuickCompiledCode()); + OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); + size_t frame_size = method_header->GetFrameSizeInBytes(); + StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>( + sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); + lock = this_ref->AsMirrorPtr(); + } + CHECK_EQ(Monitor::GetLockOwnerThreadId(lock), self->GetThreadId()); +} -void JniCompilerTest::JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self) { - locked_object_ = to_lock; - jni_method_start_synchronized_original_(to_lock, self); +void JniCompilerTest::JniMethodStartSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS { + AssertCallerObjectLocked(self); + jni_method_start_original_(self); } -void JniCompilerTest::JniMethodEndSynchronizedOverride(jobject locked, Thread* self) { - EXPECT_EQ(locked_object_, locked); - jni_method_end_synchronized_original_(locked, self); +void JniCompilerTest::JniMethodEndSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS { + jni_method_end_original_(self); + AssertCallerObjectLocked(self); } mirror::Object* JniCompilerTest::JniMethodEndWithReferenceSynchronizedOverride( - jobject result, - jobject locked, - Thread* self) { - EXPECT_EQ(locked_object_, locked); - return jni_method_end_with_reference_synchronized_original_(result, locked, self); + jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS { + mirror::Object* raw_result = jni_method_end_with_reference_original_(result, self); + AssertCallerObjectLocked(self); + return raw_result; } // Test the normal compiler and normal generic JNI only. diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index 68c7a94540..da438bdba6 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -531,10 +531,10 @@ FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } -ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const { - CHECK(IsCriticalNative()); - // R4 is neither managed callee-save, nor argument register, nor scratch register. - // (It is native callee-save but the value coming from managed code can be clobbered.) +// R4 is neither managed callee-save, nor argument register. It is suitable for use as the +// locking argument for synchronized methods and hidden argument for @CriticalNative methods. +// (It is native callee-save but the value coming from managed code can be clobbered.) +static void AssertR4IsNeitherCalleeSaveNorArgumentRegister() { // TODO: Change to static_assert; std::none_of should be constexpr since C++20. DCHECK(std::none_of(kCalleeSaveRegisters, kCalleeSaveRegisters + std::size(kCalleeSaveRegisters), @@ -543,7 +543,20 @@ ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const { })); DCHECK(std::none_of(kJniArgumentRegisters, kJniArgumentRegisters + std::size(kJniArgumentRegisters), - [](Register reg) { return reg == R4; })); + [](Register arg) { return arg == R4; })); +} + +ManagedRegister ArmJniCallingConvention::LockingArgumentRegister() const { + DCHECK(!IsFastNative()); + DCHECK(!IsCriticalNative()); + DCHECK(IsSynchronized()); + AssertR4IsNeitherCalleeSaveNorArgumentRegister(); + return ArmManagedRegister::FromCoreRegister(R4); +} + +ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const { + CHECK(IsCriticalNative()); + AssertR4IsNeitherCalleeSaveNorArgumentRegister(); return ArmManagedRegister::FromCoreRegister(R4); } diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index 149ba39eb4..94dacc46e5 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -81,6 +81,10 @@ class ArmJniCallingConvention final : public JniCallingConvention { return false; } + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + ManagedRegister LockingArgumentRegister() const override; + // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 7b9a597805..d8b0373096 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -363,9 +363,9 @@ FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } -ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const { - CHECK(IsCriticalNative()); - // X15 is neither managed callee-save, nor argument register, nor scratch register. +// X15 is neither managed callee-save, nor argument register. It is suitable for use as the +// locking argument for synchronized methods and hidden argument for @CriticalNative methods. +static void AssertX15IsNeitherCalleeSaveNorArgumentRegister() { // TODO: Change to static_assert; std::none_of should be constexpr since C++20. DCHECK(std::none_of(kCalleeSaveRegisters, kCalleeSaveRegisters + std::size(kCalleeSaveRegisters), @@ -374,7 +374,20 @@ ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const { })); DCHECK(std::none_of(kXArgumentRegisters, kXArgumentRegisters + std::size(kXArgumentRegisters), - [](XRegister reg) { return reg == X15; })); + [](XRegister arg) { return arg == X15; })); +} + +ManagedRegister Arm64JniCallingConvention::LockingArgumentRegister() const { + DCHECK(!IsFastNative()); + DCHECK(!IsCriticalNative()); + DCHECK(IsSynchronized()); + AssertX15IsNeitherCalleeSaveNorArgumentRegister(); + return Arm64ManagedRegister::FromWRegister(W15); +} + +ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const { + DCHECK(IsCriticalNative()); + AssertX15IsNeitherCalleeSaveNorArgumentRegister(); return Arm64ManagedRegister::FromXRegister(X15); } diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index ade88e4e97..003b0c3f15 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -72,6 +72,10 @@ class Arm64JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + ManagedRegister LockingArgumentRegister() const override; + // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index faa83daf7c..0be523362f 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -363,6 +363,10 @@ class JniCallingConvention : public CallingConvention { return !IsCriticalNative(); } + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + virtual ManagedRegister LockingArgumentRegister() const = 0; + // Hidden argument register, used to pass the method pointer for @CriticalNative call. virtual ManagedRegister HiddenArgumentRegister() const = 0; diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index 4c1b2f792d..863f47b819 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -81,26 +81,17 @@ enum class JniEntrypoint { template <PointerSize kPointerSize> static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which, - bool reference_return, - bool is_synchronized) { + bool reference_return) { if (which == JniEntrypoint::kStart) { // JniMethodStart - ThreadOffset<kPointerSize> jni_start = - is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart); - + ThreadOffset<kPointerSize> jni_start = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart); return jni_start; } else { // JniMethodEnd ThreadOffset<kPointerSize> jni_end(-1); if (reference_return) { // Pass result. - jni_end = is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference); + jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference); } else { - jni_end = is_synchronized - ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized) - : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd); + jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd); } return jni_end; @@ -194,26 +185,6 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp ManagedRuntimeCallingConvention::Create( &allocator, is_static, is_synchronized, shorty, instruction_set)); - // Calling conventions to call into JNI method "end" possibly passing a returned reference, the - // method and the current thread. - const char* jni_end_shorty; - if (reference_return && is_synchronized) { - jni_end_shorty = "IL"; - } else if (reference_return) { - jni_end_shorty = "I"; - } else { - jni_end_shorty = "V"; - } - - std::unique_ptr<JniCallingConvention> end_jni_conv( - JniCallingConvention::Create(&allocator, - is_static, - is_synchronized, - is_fast_native, - is_critical_native, - jni_end_shorty, - instruction_set)); - // Assembler that holds generated instructions std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm = GetMacroAssembler<kPointerSize>(&allocator, instruction_set, instruction_set_features); @@ -249,7 +220,28 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(jclass_read_barrier_return.get()); } - // 1.3. Write out the end of the quick frames. + // 1.3 Spill reference register arguments. + constexpr FrameOffset kInvalidReferenceOffset = + JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset; + ArenaVector<ArgumentLocation> src_args(allocator.Adapter()); + ArenaVector<ArgumentLocation> dest_args(allocator.Adapter()); + ArenaVector<FrameOffset> refs(allocator.Adapter()); + if (LIKELY(!is_critical_native)) { + mr_conv->ResetIterator(FrameOffset(current_frame_size)); + for (; mr_conv->HasNext(); mr_conv->Next()) { + if (mr_conv->IsCurrentParamInRegister() && mr_conv->IsCurrentParamAReference()) { + // Spill the reference as raw data. + src_args.emplace_back(mr_conv->CurrentParamRegister(), kObjectReferenceSize); + dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); + refs.push_back(kInvalidReferenceOffset); + } + } + __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args), + ArrayRef<ArgumentLocation>(src_args), + ArrayRef<FrameOffset>(refs)); + } + + // 1.4. Write out the end of the quick frames. After this, we can walk the stack. // NOTE: @CriticalNative does not need to store the stack pointer to the thread // because garbage collections are disabled within the execution of a // @CriticalNative method. @@ -257,10 +249,32 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>()); } - // 2. Call into appropriate `JniMethodStart*()` to transition out of Runnable for normal native. + // 2. Lock the object (if synchronized) and transition out of runnable (if normal native). - // 2.1. Move frame down to allow space for out going args. - // This prepares for both the `JniMethodStart*()` call as well as the main native call. + // 2.1. Lock the synchronization object (`this` or class) for synchronized methods. + if (UNLIKELY(is_synchronized)) { + // We are using a custom calling convention for locking where the assembly thunk gets + // the object to lock in a register (even on x86), it can use callee-save registers + // as temporaries (they were saved above) and must preserve argument registers. + ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister(); + if (is_static) { + // Pass the declaring class. It was already marked if needed. + DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); + __ Load(to_lock, method_register, MemberOffset(0u), kObjectReferenceSize); + } else { + // Pass the `this` argument. + mr_conv->ResetIterator(FrameOffset(current_frame_size)); + if (mr_conv->IsCurrentParamInRegister()) { + __ Move(to_lock, mr_conv->CurrentParamRegister(), kObjectReferenceSize); + } else { + __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); + } + } + __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniLockObject)); + } + + // 2.2. Move frame down to allow space for out going args. + // This prepares for both the `JniMethodStart()` call as well as the main native call. size_t current_out_arg_size = main_out_arg_size; if (UNLIKELY(is_critical_native)) { DCHECK_EQ(main_out_arg_size, current_frame_size); @@ -269,41 +283,37 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp current_frame_size += main_out_arg_size; } - // 2.2. Spill all register arguments to preserve them across the `JniMethodStart*()` call. + // 2.3. Spill all register arguments to preserve them across the `JniLockObject()` + // call (if synchronized) and `JniMethodStart()` call (if normal native). // Native stack arguments are spilled directly to their argument stack slots and // references are converted to `jobject`. Native register arguments are spilled to - // the reserved slots in the caller frame, references are not converted to `jobject`. - constexpr FrameOffset kInvalidReferenceOffset = - JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset; - ArenaVector<ArgumentLocation> src_args(allocator.Adapter()); - ArenaVector<ArgumentLocation> dest_args(allocator.Adapter()); - ArenaVector<FrameOffset> refs(allocator.Adapter()); + // the reserved slots in the caller frame, references are not converted to `jobject`; + // references from registers are actually skipped as they were already spilled above. + // TODO: Implement fast-path for transition to Native and avoid this spilling. + src_args.clear(); + dest_args.clear(); + refs.clear(); if (LIKELY(!is_critical_native && !is_fast_native)) { mr_conv->ResetIterator(FrameOffset(current_frame_size)); main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); main_jni_conv->Next(); // Skip JNIEnv*. + // Add a no-op move for the `jclass` / `this` argument to avoid the + // next argument being treated as non-null if it's a reference. + // Note: We have already spilled `this` as raw reference above. Since `this` + // cannot be null, the argument move before the native call does not need + // to reload the reference, and that argument move also needs to see the + // `this` argument to avoid treating another reference as non-null. + // Note: Using the method register for the no-op move even for `this`. + src_args.emplace_back(method_register, kRawPointerSize); + dest_args.emplace_back(method_register, kRawPointerSize); + refs.push_back(kInvalidReferenceOffset); if (is_static) { main_jni_conv->Next(); // Skip `jclass`. - // Add a no-op move for the `jclass` argument to avoid the next - // argument being treated as non-null if it's a reference. - src_args.emplace_back(method_register, kRawPointerSize); - dest_args.emplace_back(method_register, kRawPointerSize); - refs.push_back(kInvalidReferenceOffset); } else { - // Spill `this` as raw reference without conversion to `jobject` even if the `jobject` - // argument is passed on stack. Since `this` cannot be null, the argument move before - // the native call does not need to reload the reference, and that argument move also - // needs to see the `this` argument to avoid treating another reference as non-null. - // This also leaves enough space on stack for `JniMethodStartSynchronized()` - // for architectures that pass the second argument on the stack (x86). + // Skip `this` DCHECK(mr_conv->HasNext()); DCHECK(main_jni_conv->HasNext()); DCHECK(mr_conv->IsCurrentParamAReference()); - src_args.push_back(mr_conv->IsCurrentParamInRegister() - ? ArgumentLocation(mr_conv->CurrentParamRegister(), kObjectReferenceSize) - : ArgumentLocation(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize)); - dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); - refs.push_back(kInvalidReferenceOffset); mr_conv->Next(); main_jni_conv->Next(); } @@ -311,13 +321,19 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp DCHECK(main_jni_conv->HasNext()); static_assert(kObjectReferenceSize == 4u); bool is_reference = mr_conv->IsCurrentParamAReference(); - bool spill_jobject = is_reference && !main_jni_conv->IsCurrentParamInRegister(); + bool src_in_reg = mr_conv->IsCurrentParamInRegister(); + bool dest_in_reg = main_jni_conv->IsCurrentParamInRegister(); + if (is_reference && src_in_reg && dest_in_reg) { + // We have already spilled the raw reference above. + continue; + } + bool spill_jobject = is_reference && !dest_in_reg; size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u; size_t dest_size = spill_jobject ? kRawPointerSize : src_size; - src_args.push_back(mr_conv->IsCurrentParamInRegister() + src_args.push_back(src_in_reg ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size) : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size)); - dest_args.push_back(main_jni_conv->IsCurrentParamInRegister() + dest_args.push_back(dest_in_reg ? ArgumentLocation(mr_conv->CurrentParamStackOffset(), dest_size) : ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size)); refs.push_back(spill_jobject ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset); @@ -327,41 +343,14 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp ArrayRef<FrameOffset>(refs)); } // if (!is_critical_native) - // 2.3. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable + // 2.4. Call into `JniMethodStart()` passing Thread* so that transition out of Runnable // can occur. We abuse the JNI calling convention here, that is guaranteed to support - // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`. - std::unique_ptr<JNIMacroLabel> monitor_enter_exception_slow_path = - UNLIKELY(is_synchronized) ? __ CreateLabel() : nullptr; + // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, and we use just one. if (LIKELY(!is_critical_native && !is_fast_native)) { // Skip this for @CriticalNative and @FastNative methods. They do not call JniMethodStart. ThreadOffset<kPointerSize> jni_start = - GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, - reference_return, - is_synchronized); + GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, reference_return); main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); - if (is_synchronized) { - // Pass object for locking. - if (is_static) { - // Pass the pointer to the method's declaring class as the first argument. - DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); - SetNativeParameter(jni_asm.get(), main_jni_conv.get(), method_register); - } else { - // TODO: Use the register that still holds the `this` reference. - mr_conv->ResetIterator(FrameOffset(current_frame_size)); - FrameOffset this_offset = mr_conv->CurrentParamStackOffset(); - if (main_jni_conv->IsCurrentParamOnStack()) { - FrameOffset out_off = main_jni_conv->CurrentParamStackOffset(); - __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false); - } else { - ManagedRegister out_reg = main_jni_conv->CurrentParamRegister(); - __ CreateJObject(out_reg, - this_offset, - ManagedRegister::NoRegister(), - /*null_allowed=*/ false); - } - } - main_jni_conv->Next(); - } if (main_jni_conv->IsCurrentParamInRegister()) { __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start)); @@ -369,10 +358,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset()); __ CallFromThread(jni_start); } - method_register = ManagedRegister::NoRegister(); // Method register is clobbered. - if (is_synchronized) { // Check for exceptions from monitor enter. - __ ExceptionPoll(monitor_enter_exception_slow_path.get()); - } + method_register = ManagedRegister::NoRegister(); // Method register is clobbered by the call. } // 3. Push local reference frame. @@ -539,7 +525,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp } } - // 5. Call into appropriate JniMethodEnd to transition out of Runnable for normal native. + // 5. Transition to Runnable (if normal native). // 5.1. Spill or move the return value if needed. // TODO: Use `callee_save_temp` instead of stack slot when possible. @@ -597,72 +583,30 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp } if (LIKELY(!is_critical_native)) { - // 5.4. Increase frame size for out args if needed by the end_jni_conv. - const size_t end_out_arg_size = end_jni_conv->OutFrameSize(); - if (end_out_arg_size > current_out_arg_size) { - DCHECK(!is_fast_native); - size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size; - current_out_arg_size = end_out_arg_size; - __ IncreaseFrameSize(out_arg_size_diff); - current_frame_size += out_arg_size_diff; - return_save_location = FrameOffset(return_save_location.SizeValue() + out_arg_size_diff); - } - end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size)); - - // 5.5. Call JniMethodEnd for normal native. + // 5.4. Call JniMethodEnd for normal native. // For @FastNative with reference return, decode the `jobject`. + // We abuse the JNI calling convention here, that is guaranteed to support passing + // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, enough for all cases. + main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size)); if (LIKELY(!is_fast_native) || reference_return) { ThreadOffset<kPointerSize> jni_end = is_fast_native ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult) - : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, - reference_return, - is_synchronized); + : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, reference_return); if (reference_return) { // Pass result. - SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister()); - end_jni_conv->Next(); + SetNativeParameter(jni_asm.get(), main_jni_conv.get(), main_jni_conv->ReturnRegister()); + main_jni_conv->Next(); } - if (is_synchronized) { - // Pass object for unlocking. - if (is_static) { - // Load reference to the method's declaring class. The method register has been - // clobbered by the above call, so we need to load the method from the stack. - FrameOffset method_offset = - FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue()); - DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); - if (end_jni_conv->IsCurrentParamOnStack()) { - FrameOffset out_off = end_jni_conv->CurrentParamStackOffset(); - __ Copy(out_off, method_offset, kRawPointerSize); - } else { - ManagedRegister out_reg = end_jni_conv->CurrentParamRegister(); - __ Load(out_reg, method_offset, kRawPointerSize); - } - } else { - mr_conv->ResetIterator(FrameOffset(current_frame_size)); - FrameOffset this_offset = mr_conv->CurrentParamStackOffset(); - if (end_jni_conv->IsCurrentParamOnStack()) { - FrameOffset out_off = end_jni_conv->CurrentParamStackOffset(); - __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false); - } else { - ManagedRegister out_reg = end_jni_conv->CurrentParamRegister(); - __ CreateJObject(out_reg, - this_offset, - ManagedRegister::NoRegister(), - /*null_allowed=*/ false); - } - } - end_jni_conv->Next(); - } - if (end_jni_conv->IsCurrentParamInRegister()) { - __ GetCurrentThread(end_jni_conv->CurrentParamRegister()); - __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end)); + if (main_jni_conv->IsCurrentParamInRegister()) { + __ GetCurrentThread(main_jni_conv->CurrentParamRegister()); + __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_end)); } else { - __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset()); + __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset()); __ CallFromThread(jni_end); } } - // 5.6. Reload return value if it was spilled. + // 5.5. Reload return value if it was spilled. if (spill_return_value) { __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue()); } @@ -698,7 +642,26 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp __ Bind(suspend_check_resume.get()); } - // 7.4. Remove activation - need to restore callee save registers since the GC + // 7.4 Unlock the synchronization object for synchronized methods. + if (UNLIKELY(is_synchronized)) { + ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister(); + mr_conv->ResetIterator(FrameOffset(current_frame_size)); + if (is_static) { + // Pass the declaring class. + DCHECK(method_register.IsNoRegister()); // TODO: Preserve the method in `callee_save_temp`. + ManagedRegister temp = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize); + FrameOffset method_offset = mr_conv->MethodStackOffset(); + __ Load(temp, method_offset, kRawPointerSize); + DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u); + __ Load(to_lock, temp, MemberOffset(0u), kObjectReferenceSize); + } else { + // Pass the `this` argument from its spill slot. + __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize); + } + __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniUnlockObject)); + } + + // 7.5. Remove activation - need to restore callee save registers since the GC // may have changed them. DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size)); if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) { @@ -768,14 +731,6 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp // 8.3. Exception poll slow path(s). if (LIKELY(!is_critical_native)) { - if (UNLIKELY(is_synchronized)) { - DCHECK(!is_fast_native); - __ Bind(monitor_enter_exception_slow_path.get()); - if (main_out_arg_size != 0) { - jni_asm->cfi().AdjustCFAOffset(main_out_arg_size); - __ DecreaseFrameSize(main_out_arg_size); - } - } __ Bind(exception_slow_path.get()); if (UNLIKELY(is_fast_native) && reference_return) { // We performed the exception check early, so we need to adjust SP and pop IRT frame. diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index 947320237c..2fb063f3fd 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -294,6 +294,15 @@ FrameOffset X86JniCallingConvention::CurrentParamStackOffset() { FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize)); } +ManagedRegister X86JniCallingConvention::LockingArgumentRegister() const { + DCHECK(!IsFastNative()); + DCHECK(!IsCriticalNative()); + DCHECK(IsSynchronized()); + // The callee-save register is EBP is suitable as a locking argument. + static_assert(kCalleeSaveRegisters[0].Equals(X86ManagedRegister::FromCpuRegister(EBP))); + return X86ManagedRegister::FromCpuRegister(EBP); +} + ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const { CHECK(IsCriticalNative()); // EAX is neither managed callee-save, nor argument register, nor scratch register. diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index 7b62161907..f028090c75 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -77,6 +77,10 @@ class X86JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + ManagedRegister LockingArgumentRegister() const override; + // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index ddf3d74adc..469de42eff 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -299,6 +299,15 @@ FrameOffset X86_64JniCallingConvention::CurrentParamStackOffset() { return FrameOffset(offset); } +ManagedRegister X86_64JniCallingConvention::LockingArgumentRegister() const { + DCHECK(!IsFastNative()); + DCHECK(!IsCriticalNative()); + DCHECK(IsSynchronized()); + // The callee-save register is RBX is suitable as a locking argument. + static_assert(kCalleeSaveRegisters[0].Equals(X86_64ManagedRegister::FromCpuRegister(RBX))); + return X86_64ManagedRegister::FromCpuRegister(RBX); +} + ManagedRegister X86_64JniCallingConvention::HiddenArgumentRegister() const { CHECK(IsCriticalNative()); // RAX is neither managed callee-save, nor argument register, nor scratch register. diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index ee8603d9ce..fda5c0e354 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -72,6 +72,10 @@ class X86_64JniCallingConvention final : public JniCallingConvention { return HasSmallReturnType(); } + // Locking argument register, used to pass the synchronization object for calls + // to `JniLockObject()` and `JniUnlockObject()`. + ManagedRegister LockingArgumentRegister() const override; + // Hidden argument register, used to pass the method pointer for @CriticalNative call. ManagedRegister HiddenArgumentRegister() const override; diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 9ea6f04cb6..bd8aa083eb 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -546,32 +546,6 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(arg_count, srcs.size()); DCHECK_EQ(arg_count, refs.size()); - // Spill reference registers. Spill two references together with STRD where possible. - for (size_t i = 0; i != arg_count; ++i) { - if (refs[i] != kInvalidReferenceOffset) { - DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); - if (srcs[i].IsRegister()) { - DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); - // Use STRD if we're storing 2 consecutive references within the available STRD range. - if (i + 1u != arg_count && - refs[i + 1u] != kInvalidReferenceOffset && - srcs[i + 1u].IsRegister() && - refs[i].SizeValue() < kStrdOffsetCutoff) { - DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize); - DCHECK_EQ(refs[i + 1u].SizeValue(), refs[i].SizeValue() + kObjectReferenceSize); - ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()), - AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()), - MemOperand(sp, refs[i].SizeValue())); - ++i; - } else { - Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize); - } - } else { - DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]); - } - } - } - // Convert reference registers to `jobject` values. // TODO: Delay this for references that are copied to another register. for (size_t i = 0; i != arg_count; ++i) { diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index 0f1203e232..561cbbd54b 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -382,30 +382,6 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(arg_count, srcs.size()); DCHECK_EQ(arg_count, refs.size()); - // Spill reference registers. Spill two references together with STP where possible. - for (size_t i = 0; i != arg_count; ++i) { - if (refs[i] != kInvalidReferenceOffset) { - DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize); - if (srcs[i].IsRegister()) { - // Use STP if we're storing 2 consecutive references within the available STP range. - if (i + 1u != arg_count && - refs[i + 1u].SizeValue() == refs[i].SizeValue() + kObjectReferenceSize && - srcs[i + 1u].IsRegister() && - refs[i].SizeValue() < kStpWOffsetCutoff) { - DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize); - ___ Stp(reg_w(srcs[i].GetRegister().AsArm64().AsWRegister()), - reg_w(srcs[i + 1u].GetRegister().AsArm64().AsWRegister()), - MEM_OP(sp, refs[i].SizeValue())); - ++i; - } else { - Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize); - } - } else { - DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]); - } - } - } - auto get_mask = [](ManagedRegister reg) -> uint64_t { Arm64ManagedRegister arm64_reg = reg.AsArm64(); if (arm64_reg.IsXRegister()) { @@ -429,12 +405,12 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, }; // More than 8 core or FP reg args are very rare, so we do not optimize for - // that case by using LDP/STP, except for situations that arise for normal - // native even with low number of arguments. We use STP for the non-reference - // spilling which also covers the initial spill for native reference register - // args as they are spilled as raw 32-bit values. We also optimize loading - // args to registers with LDP, whether references or not, except for the - // initial non-null reference which we do not need to load at all. + // that case by using LDP/STP, except for situations that arise even with low + // number of arguments. We use STP for the non-reference spilling which also + // covers the initial spill for native reference register args as they are + // spilled as raw 32-bit values. We also optimize loading args to registers + // with LDP, whether references or not, except for the initial non-null + // reference which we do not need to load at all. // Collect registers to move while storing/copying args to stack slots. // Convert processed references to `jobject`. diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 541458b236..b35066f434 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults = { " 21c: d9 f8 24 80 ldr.w r8, [r9, #36]\n" " 220: 70 47 bx lr\n" " 222: d9 f8 8c 00 ldr.w r0, [r9, #140]\n" - " 226: d9 f8 c8 e2 ldr.w lr, [r9, #712]\n" + " 226: d9 f8 c4 e2 ldr.w lr, [r9, #708]\n" " 22a: f0 47 blx lr\n" }; diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index d0afa72155..7dff279944 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -332,6 +332,10 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references. if (src.IsRegister()) { if (UNLIKELY(dest.IsRegister())) { + if (dest.GetRegister().Equals(src.GetRegister())) { + // JNI compiler sometimes adds a no-op move. + continue; + } // Native ABI has only stack arguments but we may pass one "hidden arg" in register. CHECK(!found_hidden_arg); found_hidden_arg = true; @@ -341,7 +345,6 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, Move(dest.GetRegister(), src.GetRegister(), dest.GetSize()); } else { if (ref != kInvalidReferenceOffset) { - Store(ref, srcs[i].GetRegister(), kObjectReferenceSize); // Note: We can clobber `src` here as the register cannot hold more than one argument. // This overload of `CreateJObject()` currently does not use the scratch // register ECX, so this shall not clobber another argument. diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 1425a4cc41..2da1b470ac 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -388,7 +388,6 @@ void X86_64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests, DCHECK_EQ(src.GetSize(), dest.GetSize()); } if (src.IsRegister() && ref != kInvalidReferenceOffset) { - Store(ref, src.GetRegister(), kObjectReferenceSize); // Note: We can clobber `src` here as the register cannot hold more than one argument. // This overload of `CreateJObject()` is currently implemented as "test and branch"; // if it was using a conditional move, it would be better to do this at move time. diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 7bcff2bafc..cca5bc2fc3 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -505,7 +505,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(64U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(4U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 5ef1d3e17a..ca63914759 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -493,52 +493,66 @@ END art_quick_do_long_jump */ TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER - /* - * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the - * possibly null object to lock. - */ - .extern artLockObjectFromCode -ENTRY art_quick_lock_object - ldr r1, [rSELF, #THREAD_ID_OFFSET] - cbz r0, .Lslow_lock -.Lretry_lock: - ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - eor r3, r2, r1 @ Prepare the value to store if unlocked +.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null + ldr \tmp1, [rSELF, #THREAD_ID_OFFSET] + .if \can_be_null + cbz \obj, \slow_lock + .endif +1: + ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if unlocked @ (thread id, count of 0 and preserved read barrier bits), @ or prepare to compare thread id for recursive lock check @ (lock_word.ThreadId() ^ self->ThreadId()). - ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. - bne .Lnot_unlocked @ Check if unlocked. - @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits. - strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] - cbnz r2, .Llock_strex_fail @ If store failed, retry. + ands ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. + bne 2f @ Check if unlocked. + @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits. + strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + cbnz \tmp2, 3f @ If store failed, retry. dmb ish @ Full (LoadLoad|LoadStore) memory barrier. bx lr -.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 +2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1 #if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT #error "Expecting thin lock count and gc state in consecutive bits." #endif - @ Check lock word state and thread id together, - bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) - cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's + @ Check lock word state and thread id together. + bfc \tmp3, \ + #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \ + #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) + cbnz \tmp3, \slow_lock @ if either of the top two bits are set, or the lock word's @ thread id did not match, go slow path. - add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. + add \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count. @ Extract the new thin lock count for overflow check. - ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE - cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path. - strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. - cbnz r2, .Llock_strex_fail @ If strex failed, retry. + ubfx \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE + cbz \tmp2, \slow_lock @ Zero as the new count indicates overflow, go slow path. + @ strex necessary for read barrier bits. + strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + cbnz \tmp2, 3f @ If strex failed, retry. bx lr -.Llock_strex_fail: - b .Lretry_lock @ retry -// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). +3: + b 1b @ retry +.endm + + /* + * Entry from managed code that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. + * r0 holds the possibly null object to lock. + */ +ENTRY art_quick_lock_object + // Note: the slow path is actually the art_quick_lock_object_no_inline (tail call). + LOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Llock_object_slow, /*can_be_null*/ 1 END art_quick_lock_object + /* + * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. + * r0 holds the possibly null object to lock. + */ + .extern artLockObjectFromCode ENTRY art_quick_lock_object_no_inline // This is also the slow path for art_quick_lock_object. Note that we // need a local label, the assembler complains about target being out of // range if we try to jump to `art_quick_lock_object_no_inline`. -.Lslow_lock: +.Llock_object_slow: SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block mov r1, rSELF @ pass Thread::Current bl artLockObjectFromCode @ (Object* obj, Thread*) @@ -548,62 +562,78 @@ ENTRY art_quick_lock_object_no_inline DELIVER_PENDING_EXCEPTION END art_quick_lock_object_no_inline - /* - * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. - * r0 holds the possibly null object to lock. - */ - .extern artUnlockObjectFromCode -ENTRY art_quick_unlock_object - ldr r1, [rSELF, #THREAD_ID_OFFSET] - cbz r0, .Lslow_unlock -.Lretry_unlock: +.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null + ldr \tmp1, [rSELF, #THREAD_ID_OFFSET] + .if \can_be_null + cbz \obj, \slow_unlock + .endif +1: #ifndef USE_READ_BARRIER - ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + ldr \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else @ Need to use atomic instructions for read barrier. - ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #endif - eor r3, r2, r1 @ Prepare the value to store if simply locked + eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if simply locked @ (mostly 0s, and preserved read barrier bits), @ or prepare to compare thread id for recursive lock check @ (lock_word.ThreadId() ^ self->ThreadId()). - ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. - bne .Lnot_simply_locked @ Locked recursively or by other thread? + ands ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits. + bne 2f @ Locked recursively or by other thread? @ Transition to unlocked. dmb ish @ Full (LoadStore|StoreStore) memory barrier. #ifndef USE_READ_BARRIER - str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else - strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits - cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. + @ strex necessary for read barrier bits + strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + cbnz \tmp2, 3f @ If the store failed, retry. #endif bx lr -.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1 +2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1 #if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT #error "Expecting thin lock count and gc state in consecutive bits." #endif @ Check lock word state and thread id together, - bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) - cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's + bfc \tmp3, \ + #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \ + #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE) + cbnz \tmp3, \slow_unlock @ if either of the top two bits are set, or the lock word's @ thread id did not match, go slow path. - sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. + sub \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count. #ifndef USE_READ_BARRIER - str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] #else - strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits. - cbnz r2, .Lunlock_strex_fail @ If the store failed, retry. + @ strex necessary for read barrier bits. + strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + cbnz \tmp2, 3f @ If the store failed, retry. #endif bx lr -.Lunlock_strex_fail: - b .Lretry_unlock @ retry -// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). +3: + b 1b @ retry +.endm + + /* + * Entry from managed code that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. + * r0 holds the possibly null object to unlock. + */ +ENTRY art_quick_unlock_object + // Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call). + UNLOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Lunlock_object_slow, /*can_be_null*/ 1 END art_quick_unlock_object + /* + * Entry from managed code that calls `artUnlockObjectFromCode()` + * and delivers exception on failure. + * r0 holds the possibly null object to unlock. + */ + .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object_no_inline // This is also the slow path for art_quick_unlock_object. Note that we // need a local label, the assembler complains about target being out of // range if we try to jump to `art_quick_unlock_object_no_inline`. -.Lslow_unlock: +.Lunlock_object_slow: @ save callee saves in case exception allocation triggers GC SETUP_SAVE_REFS_ONLY_FRAME r1 mov r1, rSELF @ pass Thread::Current @@ -615,6 +645,80 @@ ENTRY art_quick_unlock_object_no_inline END art_quick_unlock_object_no_inline /* + * Entry from JNI stub that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` (the same as for managed code) for the + * difficult cases, may block for GC. + * Custom calling convention: + * r4 holds the non-null object to lock. + * Callee-save registers have been saved and can be used as temporaries. + * All argument registers need to be preserved. + */ +ENTRY art_quick_lock_object_jni + LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0 + +.Llock_object_jni_slow: + // Save GPR args r0-r3 and return address. Also save r4 for stack alignment. + push {r0-r4, lr} + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset lr, 20 + // Save FPR args. + vpush {s0-s15} + .cfi_adjust_cfa_offset 64 + // Call `artLockObjectFromCode()` + mov r0, r4 @ Pass the object to lock. + mov r1, rSELF @ Pass Thread::Current(). + bl artLockObjectFromCode @ (Object* obj, Thread*) + // Restore FPR args. + vpop {s0-s15} + .cfi_adjust_cfa_offset -64 + // Check result. + cbnz r0, 1f + // Restore GPR args and r4 and return. + pop {r0-r4, pc} +1: + // GPR args are irrelevant when throwing an exception but pop them anyway with the LR we need. + pop {r0-r4, lr} + .cfi_adjust_cfa_offset -24 + .cfi_restore lr + // Make a tail call to `artDeliverPendingExceptionFromCode()`. + // Rely on the JNI transition frame constructed in the JNI stub. + mov r0, rSELF @ Pass Thread::Current(). + b artDeliverPendingExceptionFromCode @ (Thread*) +END art_quick_lock_object_jni + + /* + * Entry from JNI stub that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock + * is fatal, so we do not need to check for exceptions in the slow path. + * Custom calling convention: + * r4 holds the non-null object to unlock. + * Callee-save registers have been saved and can be used as temporaries. + * Return registers r0-r1 and s0-s1 need to be preserved. + */ + .extern artLockObjectFromJni +ENTRY art_quick_unlock_object_jni + UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0 + + .Lunlock_object_jni_slow: + // Save GPR return registers and return address. Also save r4 for stack alignment. + push {r0-r1, r4, lr} + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset lr, 12 + // Save FPR return registers. + vpush {s0-s1} + .cfi_adjust_cfa_offset 8 + // Call `artUnlockObjectFromJni()` + mov r0, r4 @ Pass the object to unlock. + mov r1, rSELF @ Pass Thread::Current(). + bl artUnlockObjectFromJni @ (Object* obj, Thread*) + // Restore FPR return registers. + vpop {s0-s1} + .cfi_adjust_cfa_offset -8 + // Restore GPR return registers and r4 and return. + pop {r0-r1, r4, pc} +END art_quick_unlock_object_jni + + /* * Entry from managed code that calls artInstanceOfFromCode and on failure calls * artThrowClassCastExceptionForObject. */ diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index e5dbeda42d..657ff7831f 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -881,42 +881,52 @@ ENTRY art_quick_do_long_jump br xIP1 END art_quick_do_long_jump - /* - * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the - * possibly null object to lock. - * - * Derived from arm32 code. - */ - .extern artLockObjectFromCode -ENTRY art_quick_lock_object - ldr w1, [xSELF, #THREAD_ID_OFFSET] - cbz w0, art_quick_lock_object_no_inline +.macro LOCK_OBJECT_FAST_PATH obj, slow_lock, can_be_null + // Use scratch registers x8-x11 as temporaries. + ldr w9, [xSELF, #THREAD_ID_OFFSET] + .if \can_be_null + cbz \obj, \slow_lock + .endif // Exclusive load/store has no immediate anymore. - add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET -.Lretry_lock: - ldaxr w2, [x4] // Acquire needed only in most common case. - eor w3, w2, w1 // Prepare the value to store if unlocked + add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET +1: + ldaxr w10, [x8] // Acquire needed only in most common case. + eor w11, w10, w9 // Prepare the value to store if unlocked // (thread id, count of 0 and preserved read barrier bits), // or prepare to compare thread id for recursive lock check // (lock_word.ThreadId() ^ self->ThreadId()). - tst w2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. - b.ne .Lnot_unlocked // Check if unlocked. - // unlocked case - store w3: original lock word plus thread id, preserved read barrier bits. - stxr w2, w3, [x4] - cbnz w2, .Lretry_lock // If the store failed, retry. + tst w10, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. + b.ne 2f // Check if unlocked. + // Unlocked case - store w11: original lock word plus thread id, preserved read barrier bits. + stxr w10, w11, [x8] + cbnz w10, 1b // If the store failed, retry. ret -.Lnot_unlocked: // w2: original lock word, w1: thread id, w3: w2 ^ w1 +2: // w10: original lock word, w9: thread id, w11: w10 ^ w11 // Check lock word state and thread id together, - tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) - b.ne art_quick_lock_object_no_inline - add w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count. - tst w3, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count. - b.eq art_quick_lock_object_no_inline // Zero as the new count indicates overflow, go slow path. - stxr w2, w3, [x4] - cbnz w2, .Lretry_lock // If the store failed, retry. + tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) + b.ne \slow_lock + add w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count. + tst w11, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count. + b.eq \slow_lock // Zero as the new count indicates overflow, go slow path. + stxr w10, w11, [x8] + cbnz w10, 1b // If the store failed, retry. ret +.endm + + /* + * Entry from managed code that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. + * x0 holds the possibly null object to lock. + */ +ENTRY art_quick_lock_object + LOCK_OBJECT_FAST_PATH x0, art_quick_lock_object_no_inline, /*can_be_null*/ 1 END art_quick_lock_object + /* + * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. + * x0 holds the possibly null object to lock. + */ + .extern artLockObjectFromCode ENTRY art_quick_lock_object_no_inline // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block @@ -927,52 +937,63 @@ ENTRY art_quick_lock_object_no_inline RETURN_IF_W0_IS_ZERO_OR_DELIVER END art_quick_lock_object_no_inline - /* - * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. - * x0 holds the possibly null object to lock. - * - * Derived from arm32 code. - */ - .extern artUnlockObjectFromCode -ENTRY art_quick_unlock_object - ldr w1, [xSELF, #THREAD_ID_OFFSET] - cbz x0, art_quick_unlock_object_no_inline +.macro UNLOCK_OBJECT_FAST_PATH obj, slow_unlock, can_be_null + // Use scratch registers x8-x11 as temporaries. + ldr w9, [xSELF, #THREAD_ID_OFFSET] + .if \can_be_null + cbz \obj, \slow_unlock + .endif // Exclusive load/store has no immediate anymore. - add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET -.Lretry_unlock: + add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET +1: #ifndef USE_READ_BARRIER - ldr w2, [x4] + ldr w10, [x8] #else - ldxr w2, [x4] // Need to use atomic instructions for read barrier. + ldxr w10, [x8] // Need to use atomic instructions for read barrier. #endif - eor w3, w2, w1 // Prepare the value to store if simply locked + eor w11, w10, w9 // Prepare the value to store if simply locked // (mostly 0s, and preserved read barrier bits), // or prepare to compare thread id for recursive lock check // (lock_word.ThreadId() ^ self->ThreadId()). - tst w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. - b.ne .Lnot_simply_locked // Locked recursively or by other thread? + tst w11, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits. + b.ne 2f // Locked recursively or by other thread? // Transition to unlocked. #ifndef USE_READ_BARRIER - stlr w3, [x4] + stlr w11, [x8] #else - stlxr w2, w3, [x4] // Need to use atomic instructions for read barrier. - cbnz w2, .Lretry_unlock // If the store failed, retry. + stlxr w10, w11, [x8] // Need to use atomic instructions for read barrier. + cbnz w10, 1b // If the store failed, retry. #endif ret -.Lnot_simply_locked: - // Check lock word state and thread id together, - tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) +2: + // Check lock word state and thread id together. + tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED) b.ne art_quick_unlock_object_no_inline - sub w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count + sub w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count #ifndef USE_READ_BARRIER - str w3, [x4] + str w11, [x8] #else - stxr w2, w3, [x4] // Need to use atomic instructions for read barrier. - cbnz w2, .Lretry_unlock // If the store failed, retry. + stxr w10, w11, [x8] // Need to use atomic instructions for read barrier. + cbnz w10, 1b // If the store failed, retry. #endif ret +.endm + + /* + * Entry from managed code that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. + * x0 holds the possibly null object to unlock. + */ +ENTRY art_quick_unlock_object + UNLOCK_OBJECT_FAST_PATH x0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1 END art_quick_unlock_object + /* + * Entry from managed code that calls `artUnlockObjectFromCode()` + * and delivers exception on failure. + * x0 holds the possibly null object to unlock. + */ + .extern artUnlockObjectFromCode ENTRY art_quick_unlock_object_no_inline // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC @@ -984,6 +1005,91 @@ ENTRY art_quick_unlock_object_no_inline END art_quick_unlock_object_no_inline /* + * Entry from JNI stub that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` (the same as for managed code) for the + * difficult cases, may block for GC. + * Custom calling convention: + * x15 holds the non-null object to lock. + * Callee-save registers have been saved and can be used as temporaries. + * All argument registers need to be preserved. + */ +ENTRY art_quick_lock_object_jni + LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0 + +.Llock_object_jni_slow: + // Save register args x0-x7, d0-d7 and return address. + stp x0, x1, [sp, #-(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)]! + .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + stp d0, d1, [sp, #64] + stp d2, d3, [sp, #80] + stp d4, d5, [sp, #96] + stp d6, d7, [sp, #112] + str lr, [sp, #136] + .cfi_rel_offset lr, 136 + // Call `artLockObjectFromCode()` + mov x0, x15 // Pass the object to lock. + mov x1, xSELF // Pass Thread::Current(). + bl artLockObjectFromCode // (Object* obj, Thread*) + // Restore return address. + ldr lr, [sp, #136] + .cfi_restore lr + // Check result. + cbnz x0, 1f + // Restore register args x0-x7, d0-d7 and return. + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + ldp d0, d1, [sp, #64] + ldp d2, d3, [sp, #80] + ldp d4, d5, [sp, #96] + ldp d6, d7, [sp, #112] + ldp x0, x1, [sp], #(8 * 8 + 8 * 8 + /*padding*/ 8 + 8) + .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + /*padding*/ 8 + 8) + ret + .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) +1: + // All args are irrelevant when throwing an exception. Remove the spill area. + DECREASE_FRAME (8 * 8 + 8 * 8 + /*padding*/ 8 + 8) + // Make a tail call to `artDeliverPendingExceptionFromCode()`. + // Rely on the JNI transition frame constructed in the JNI stub. + mov x0, xSELF // Pass Thread::Current(). + b artDeliverPendingExceptionFromCode // (Thread*) +END art_quick_lock_object_jni + + /* + * Entry from JNI stub that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock + * is fatal, so we do not need to check for exceptions in the slow path. + * Custom calling convention: + * x15 holds the non-null object to unlock. + * Callee-save registers have been saved and can be used as temporaries. + * Return registers r0 and d0 need to be preserved. + */ +ENTRY art_quick_unlock_object_jni + UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0 + + .Lunlock_object_jni_slow: + // Save return registers and return address. + stp x0, lr, [sp, #-32]! + .cfi_adjust_cfa_offset 32 + .cfi_rel_offset lr, 8 + str d0, [sp, #16] + // Call `artUnlockObjectFromJni()` + mov x0, x15 // Pass the object to unlock. + mov x1, xSELF // Pass Thread::Current(). + bl artUnlockObjectFromJni // (Object* obj, Thread*) + // Restore return registers and return. + ldr d0, [sp, #16] + ldp x0, lr, [sp], #32 + .cfi_adjust_cfa_offset -32 + .cfi_restore lr + ret +END art_quick_unlock_object_jni + + /* * Entry from managed code that calls artInstanceOfFromCode and on failure calls * artThrowClassCastExceptionForObject. */ diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 2f6af4f5de..d16f15ca21 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1133,145 +1133,236 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO -DEFINE_FUNCTION art_quick_lock_object - testl %eax, %eax // null check object/eax - jz .Lslow_lock -.Lretry_lock: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word - test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // test the 2 high bits. - jne .Lslow_lock // slow path if either of the two high bits are set. - movl %ecx, %edx // save lock word (edx) to keep read barrier bits. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. - test %ecx, %ecx - jnz .Lalready_thin // lock word contains a thin lock - // unlocked case - edx: original lock word, eax: obj. - movl %eax, %ecx // remember object in case of retry - movl %edx, %eax // eax: lock word zero except for read barrier bits. - movl %fs:THREAD_ID_OFFSET, %edx // load thread id. - or %eax, %edx // edx: thread id with count of 0 + read barrier bits. - lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. - jnz .Llock_cmpxchg_fail // cmpxchg failed retry +MACRO4(LOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_lock) +1: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word + movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id. + xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits. + testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits. + jnz 2f // Check if unlocked. + // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits. + // EAX: old val, tmp: new val. + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry + .ifnc \saved_eax, none + movl REG_VAR(saved_eax), %eax // Restore EAX. + .endif ret -.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), eax: obj. - movl %fs:THREAD_ID_OFFSET, %ecx // ecx := thread id - cmpw %cx, %dx // do we hold the lock already? - jne .Lslow_lock - movl %edx, %ecx // copy the lock word to check count overflow. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits. - addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check. - test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set. - jne .Lslow_lock // count overflowed so go slow - movl %eax, %ecx // save obj to use eax for cmpxchg. - movl %edx, %eax // copy the lock word as the old val for cmpxchg. - addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. - // update lockword, cmpxchg necessary for read barrier bits. - lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val. - jnz .Llock_cmpxchg_fail // cmpxchg failed retry +2: // EAX: original lock word, tmp: thread id ^ EAX + // Check lock word state and thread id together, + testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ + REG_VAR(tmp) + jne \slow_lock // Slow path if either of the two high bits are set. + // Increment the recursive lock count. + leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) + testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp) + jz \slow_lock // If count overflowed, go to slow lock. + // Update lockword for recursive lock, cmpxchg necessary for read barrier bits. + // EAX: old val, tmp: new val. + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry + .ifnc \saved_eax, none + movl REG_VAR(saved_eax), %eax // Restore EAX. + .endif ret -.Llock_cmpxchg_fail: - movl %ecx, %eax // restore eax - jmp .Lretry_lock -.Lslow_lock: - SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC - // Outgoing argument set up - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) - pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() - CFI_ADJUST_CFA_OFFSET(4) - PUSH eax // pass object - call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) - addl LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_EAX_ZERO +END_MACRO + + /* + * Entry from managed code that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. + * EAX holds the possibly null object to lock. + */ +DEFINE_FUNCTION art_quick_lock_object + testl %eax, %eax + jz SYMBOL(art_quick_lock_object_no_inline) + movl %eax, %ecx // Move obj to a different register. + LOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Llock_object_slow +.Llock_object_slow: + movl %ecx, %eax // Move obj back to EAX. + jmp SYMBOL(art_quick_lock_object_no_inline) END_FUNCTION art_quick_lock_object + /* + * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. + * EAX holds the possibly null object to lock. + */ DEFINE_FUNCTION art_quick_lock_object_no_inline + // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC // Outgoing argument set up - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) + INCREASE_FRAME 8 // alignment padding pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH eax // pass object + PUSH_ARG eax // pass object call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) - addl LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) + DECREASE_FRAME 16 // pop arguments RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object_no_inline - -DEFINE_FUNCTION art_quick_unlock_object - testl %eax, %eax // null check object/eax - jz .Lslow_unlock -.Lretry_unlock: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word - movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id - test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx - jnz .Lslow_unlock // lock word contains a monitor - cmpw %cx, %dx // does the thread id match? - jne .Lslow_unlock - movl %ecx, %edx // copy the lock word to detect new count of 0. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. - cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx - jae .Lrecursive_thin_unlock - // update lockword, cmpxchg necessary for read barrier bits. - movl %eax, %edx // edx: obj - movl %ecx, %eax // eax: old lock word. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits. +MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_unlock) +1: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word + movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id + xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word + test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp) + jnz 2f // Check if simply locked. + // Transition to unlocked. #ifndef USE_READ_BARRIER - movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) #else - lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. - jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry #endif + .ifnc \saved_eax, none + movl REG_VAR(saved_eax), %eax // Restore EAX. + .endif ret -.Lrecursive_thin_unlock: // ecx: original lock word, eax: obj - // update lockword, cmpxchg necessary for read barrier bits. - movl %eax, %edx // edx: obj - movl %ecx, %eax // eax: old lock word. - subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // ecx: new lock word with decremented count. +2: // EAX: original lock word, tmp: lock_word ^ thread id + // Check lock word state and thread id together. + testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ + REG_VAR(tmp) + jnz \slow_unlock + // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits. + // tmp: new lock word with decremented count. + leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) #ifndef USE_READ_BARRIER - mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) + movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) #else - lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val. - jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry #endif + .ifnc \saved_eax, none + movl REG_VAR(saved_eax), %eax // Restore EAX. + .endif ret -.Lunlock_cmpxchg_fail: // edx: obj - movl %edx, %eax // restore eax - jmp .Lretry_unlock -.Lslow_unlock: - SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC - // Outgoing argument set up - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) - pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() - CFI_ADJUST_CFA_OFFSET(4) - PUSH eax // pass object - call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) - addl LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_EAX_ZERO +END_MACRO + + /* + * Entry from managed code that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. + * EAX holds the possibly null object to unlock. + */ +DEFINE_FUNCTION art_quick_unlock_object + testl %eax, %eax + jz SYMBOL(art_quick_unlock_object_no_inline) + movl %eax, %ecx // Move obj to a different register. + UNLOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Lunlock_object_slow +.Lunlock_object_slow: + movl %ecx, %eax // Move obj back to EAX. + jmp SYMBOL(art_quick_unlock_object_no_inline) END_FUNCTION art_quick_unlock_object + /* + * Entry from managed code that calls `artUnlockObjectFromCode()` + * and delivers exception on failure. + * EAX holds the possibly null object to unlock. + */ DEFINE_FUNCTION art_quick_unlock_object_no_inline + // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC // Outgoing argument set up - subl LITERAL(8), %esp // alignment padding - CFI_ADJUST_CFA_OFFSET(8) + INCREASE_FRAME 8 // alignment padding pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() CFI_ADJUST_CFA_OFFSET(4) - PUSH eax // pass object + PUSH_ARG eax // pass object call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) - addl LITERAL(16), %esp // pop arguments - CFI_ADJUST_CFA_OFFSET(-16) + DECREASE_FRAME 16 // pop arguments RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object_no_inline + /* + * Entry from JNI stub that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` (the same as for managed code) for the + * difficult cases, may block for GC. + * Custom calling convention: + * EBP holds the non-null object to lock. + * Callee-save registers have been saved and can be used as temporaries (except EBP). + * All argument registers need to be preserved. + */ +DEFINE_FUNCTION art_quick_lock_object_jni + movl %eax, %edi // Preserve EAX in a callee-save register. + LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow + +.Llock_object_jni_slow: + // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3 and align stack. + PUSH_ARG ebx + PUSH_ARG edx + PUSH_ARG ecx + PUSH_ARG edi // Original contents of EAX. + INCREASE_FRAME (/*FPRs*/ 4 * 8 + /*padding*/ 4) // Make xmm<n> spill slots 8-byte aligned. + movsd %xmm0, 0(%esp) + movsd %xmm1, 8(%esp) + movsd %xmm2, 16(%esp) + movsd %xmm3, 24(%esp) + // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call. + // Call `artLockObjectFromCode()` + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + PUSH_ARG ebp // Pass the object to lock. + call SYMBOL(artLockObjectFromCode) // (object, Thread*) + // Check result. + testl %eax, %eax + jnz 1f + // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return. + movsd 8(%esp), %xmm0 + movsd 16(%esp), %xmm1 + movsd 24(%esp), %xmm2 + movsd 32(%esp), %xmm3 + DECREASE_FRAME /*call args*/ 8 + /*FPR args*/ 4 * 8 + /*padding*/ 4 + POP_ARG eax + POP_ARG ecx + POP_ARG edx + POP_ARG ebx + ret + .cfi_adjust_cfa_offset (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4) +1: + // All args are irrelevant when throwing an exception. + // Remove the spill area except for new padding to align stack. + DECREASE_FRAME \ + (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4 - /*new padding*/ 8) + // Rely on the JNI transition frame constructed in the JNI stub. + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*) + UNREACHABLE +END_FUNCTION art_quick_lock_object_jni + + /* + * Entry from JNI stub that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock + * is fatal, so we do not need to check for exceptions in the slow path. + * Custom calling convention: + * EBP holds the non-null object to unlock. + * Callee-save registers have been saved and can be used as temporaries (except EBP). + * Return registers EAX, EDX and mmx0 need to be preserved. + */ + .extern artLockObjectFromJni +DEFINE_FUNCTION art_quick_unlock_object_jni + movl %eax, %edi // Preserve EAX in a different register. + UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow + + .Lunlock_object_jni_slow: + // Save return registers. + PUSH_ARG edx + PUSH_ARG edi // Original contents of EAX. + INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4 + movsd %xmm0, 0(%esp) + // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call. + // Call `artUnlockObjectFromJni()` + pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). + CFI_ADJUST_CFA_OFFSET(4) + PUSH_ARG ebp // Pass the object to unlock. + call SYMBOL(artUnlockObjectFromJni) // (object, Thread*) + // Restore return registers and return. + movsd 8(%esp), %xmm0 + DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4 + POP_ARG eax + POP_ARG edx + ret +END_FUNCTION art_quick_unlock_object_jni + DEFINE_FUNCTION art_quick_instance_of PUSH eax // alignment padding PUSH ecx // pass arg2 - obj->klass diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 136198fe55..06715858a1 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1068,48 +1068,50 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO -DEFINE_FUNCTION art_quick_lock_object - testl %edi, %edi // Null check object/rdi. - jz .Lslow_lock -.Lretry_lock: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word. - test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // Test the 2 high bits. - jne .Lslow_lock // Slow path if either of the two high bits are set. - movl %ecx, %edx // save lock word (edx) to keep read barrier bits. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. - test %ecx, %ecx - jnz .Lalready_thin // Lock word contains a thin lock. - // unlocked case - edx: original lock word, edi: obj. - movl %edx, %eax // eax: lock word zero except for read barrier bits. - movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id - or %eax, %edx // edx: thread id with count of 0 + read barrier bits. - lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) - jnz .Lretry_lock // cmpxchg failed retry +MACRO3(LOCK_OBJECT_FAST_PATH, obj, tmp, slow_lock) +1: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word + movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id. + xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits. + testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits. + jnz 2f // Check if unlocked. + // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits. + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry ret -.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj. - movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id - cmpw %cx, %dx // do we hold the lock already? - jne .Lslow_lock - movl %edx, %ecx // copy the lock word to check count overflow. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. - addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count - test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set - jne .Lslow_lock // count overflowed so go slow - movl %edx, %eax // copy the lock word as the old val for cmpxchg. - addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. - // update lockword, cmpxchg necessary for read barrier bits. - lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val. - jnz .Lretry_lock // cmpxchg failed retry +2: // EAX: original lock word, tmp: thread id ^ EAX + // Check lock word state and thread id together, + testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ + REG_VAR(tmp) + jne \slow_lock // Slow path if either of the two high bits are set. + // Increment the recursive lock count. + leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) + testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp) + je \slow_lock // If count overflowed, go to slow lock. + // Update lockword for recursive lock, cmpxchg necessary for read barrier bits. + // EAX: old val, tmp: new val. + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry ret -.Lslow_lock: - SETUP_SAVE_REFS_ONLY_FRAME - movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() - call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_EAX_ZERO +END_MACRO + + /* + * Entry from managed code that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` for the difficult cases, may block for GC. + * RDI holds the possibly null object to lock. + */ +DEFINE_FUNCTION art_quick_lock_object + testq %rdi, %rdi // Null check object. + jz art_quick_lock_object_no_inline + LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline END_FUNCTION art_quick_lock_object + /* + * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. + * RDI holds the possibly null object to lock. + */ DEFINE_FUNCTION art_quick_lock_object_no_inline + // This is also the slow path for art_quick_lock_object. SETUP_SAVE_REFS_ONLY_FRAME movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) @@ -1117,50 +1119,63 @@ DEFINE_FUNCTION art_quick_lock_object_no_inline RETURN_IF_EAX_ZERO END_FUNCTION art_quick_lock_object_no_inline -DEFINE_FUNCTION art_quick_unlock_object - testl %edi, %edi // null check object/edi - jz .Lslow_unlock -.Lretry_unlock: - movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word - movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id - test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx - jnz .Lslow_unlock // lock word contains a monitor - cmpw %cx, %dx // does the thread id match? - jne .Lslow_unlock - movl %ecx, %edx // copy the lock word to detect new count of 0. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. - cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx - jae .Lrecursive_thin_unlock - // update lockword, cmpxchg necessary for read barrier bits. - movl %ecx, %eax // eax: old lock word. - andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits. +MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_rax, slow_unlock) +1: + movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word + movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id + xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word + test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp) + jnz 2f // Check if simply locked. + // Transition to unlocked. #ifndef USE_READ_BARRIER - movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) + movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) #else - lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. - jnz .Lretry_unlock // cmpxchg failed retry + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry #endif + .ifnc \saved_rax, none + movq REG_VAR(saved_rax), %rax // Restore RAX. + .endif ret -.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj - // update lockword, cmpxchg necessary for read barrier bits. - movl %ecx, %eax // eax: old lock word. - subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx +2: // EAX: original lock word, tmp: lock_word ^ thread id + // Check lock word state and thread id together. + testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \ + REG_VAR(tmp) + jnz \slow_unlock + // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits. + // tmp: new lock word with decremented count. + leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp) #ifndef USE_READ_BARRIER - mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) + // EAX: new lock word with decremented count. + movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) #else - lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. - jnz .Lretry_unlock // cmpxchg failed retry + lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)) + jnz 1b // cmpxchg failed retry #endif + .ifnc \saved_rax, none + movq REG_VAR(saved_rax), %rax // Restore RAX. + .endif ret -.Lslow_unlock: - SETUP_SAVE_REFS_ONLY_FRAME - movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() - call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) - RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address - RETURN_IF_EAX_ZERO +END_MACRO + + /* + * Entry from managed code that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. + * RDI holds the possibly null object to unlock. + */ +DEFINE_FUNCTION art_quick_unlock_object + testq %rdi, %rdi // Null check object. + jz art_quick_lock_object_no_inline + UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline END_FUNCTION art_quick_unlock_object + /* + * Entry from managed code that calls `artUnlockObjectFromCode()` + * and delivers exception on failure. + * RDI holds the possibly null object to unlock. + */ DEFINE_FUNCTION art_quick_unlock_object_no_inline + // This is also the slow path for art_quick_unlock_object. SETUP_SAVE_REFS_ONLY_FRAME movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) @@ -1168,6 +1183,97 @@ DEFINE_FUNCTION art_quick_unlock_object_no_inline RETURN_IF_EAX_ZERO END_FUNCTION art_quick_unlock_object_no_inline + /* + * Entry from JNI stub that tries to lock the object in a fast path and + * calls `artLockObjectFromCode()` (the same as for managed code) for the + * difficult cases, may block for GC. + * Custom calling convention: + * RBX holds the non-null object to lock. + * Callee-save registers have been saved and can be used as temporaries (except RBX). + * All argument registers need to be preserved. + */ +DEFINE_FUNCTION art_quick_lock_object_jni + LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow + +.Llock_object_jni_slow: + // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack. + PUSH_ARG r9 + PUSH_ARG r8 + PUSH_ARG rcx + PUSH_ARG rdx + PUSH_ARG rsi + PUSH_ARG rdi + INCREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8) + movsd %xmm0, 0(%rsp) + movsd %xmm1, 8(%rsp) + movsd %xmm2, 16(%rsp) + movsd %xmm3, 24(%rsp) + movsd %xmm4, 32(%rsp) + movsd %xmm5, 40(%rsp) + movsd %xmm6, 48(%rsp) + movsd %xmm7, 56(%rsp) + // Call `artLockObjectFromCode()` + movq %rbx, %rdi // Pass the object to lock. + movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current(). + call SYMBOL(artLockObjectFromCode) // (object, Thread*) + // Check result. + testl %eax, %eax + jnz 1f + // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return. + movsd 0(%esp), %xmm0 + movsd 8(%esp), %xmm1 + movsd 16(%esp), %xmm2 + movsd 24(%esp), %xmm3 + movsd 32(%esp), %xmm4 + movsd 40(%esp), %xmm5 + movsd 48(%esp), %xmm6 + movsd 56(%esp), %xmm7 + DECREASE_FRAME /*FPR args*/ 8 * 8 + /*padding*/ 8 + POP_ARG rdi + POP_ARG rsi + POP_ARG rdx + POP_ARG rcx + POP_ARG r8 + POP_ARG r9 + ret + .cfi_adjust_cfa_offset (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8) +1: + // All args are irrelevant when throwing an exception. Remove the spill area. + DECREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8) + // Rely on the JNI transition frame constructed in the JNI stub. + movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread::Current(). + jmp SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*); tail call. +END_FUNCTION art_quick_lock_object_jni + + /* + * Entry from JNI stub that tries to unlock the object in a fast path and calls + * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock + * is fatal, so we do not need to check for exceptions in the slow path. + * Custom calling convention: + * RBX holds the non-null object to unlock. + * Callee-save registers have been saved and can be used as temporaries (except RBX). + * Return registers RAX and mmx0 need to be preserved. + */ +DEFINE_FUNCTION art_quick_unlock_object_jni + movq %rax, %r12 // Preserve RAX in a different register. + UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow + + .Lunlock_object_jni_slow: + // Save return registers and return address. + PUSH_ARG r12 // Original contents of RAX. + INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 + movsd %xmm0, 0(%rsp) + // Call `artUnlockObjectFromJni()` + movq %rbx, %rdi // Pass the object to unlock. + movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current(). + call SYMBOL(artUnlockObjectFromJni) // (object, Thread*) + // Restore return registers and return. + movsd 0(%rsp), %xmm0 + DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8 + POP_ARG rax + ret +END_FUNCTION art_quick_unlock_object_jni + DEFINE_FUNCTION art_quick_check_instance_of // Type check using the bit string passes null as the target class. In that case just throw. testl %esi, %esi diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h index 3333b5fe0e..fd6bf1fb86 100644 --- a/runtime/entrypoints/entrypoint_utils-inl.h +++ b/runtime/entrypoints/entrypoint_utils-inl.h @@ -777,23 +777,27 @@ inline bool NeedsClinitCheckBeforeCall(ArtMethod* method) { return method->IsStatic() && !method->IsConstructor(); } -inline jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) +inline ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!called->IsCriticalNative()); DCHECK(!called->IsFastNative()); DCHECK(self->GetManagedStack()->GetTopQuickFrame() != nullptr); DCHECK_EQ(*self->GetManagedStack()->GetTopQuickFrame(), called); + // We do not need read barriers here. + // On method entry, all reference arguments are to-space references and we mark the + // declaring class of a static native method if needed. When visiting thread roots at + // the start of a GC, we visit all these references to ensure they point to the to-space. if (called->IsStatic()) { // Static methods synchronize on the declaring class object. - // The `jclass` is a pointer to the method's declaring class. - return reinterpret_cast<jobject>(called->GetDeclaringClassAddressWithoutBarrier()); + return called->GetDeclaringClass<kWithoutReadBarrier>(); } else { // Instance methods synchronize on the `this` object. // The `this` reference is stored in the first out vreg in the caller's frame. - // The `jobject` is a pointer to the spill slot. uint8_t* sp = reinterpret_cast<uint8_t*>(self->GetManagedStack()->GetTopQuickFrame()); size_t frame_size = RuntimeCalleeSaveFrame::GetFrameSize(CalleeSaveType::kSaveRefsAndArgs); - return reinterpret_cast<jobject>(sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); + StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>( + sp + frame_size + static_cast<size_t>(kRuntimePointerSize)); + return this_ref->AsMirrorPtr(); } } diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h index 72b4c030f8..4731a867d2 100644 --- a/runtime/entrypoints/entrypoint_utils.h +++ b/runtime/entrypoints/entrypoint_utils.h @@ -217,7 +217,7 @@ bool NeedsClinitCheckBeforeCall(ArtMethod* method) REQUIRES_SHARED(Locks::mutato // Returns the synchronization object for a native method for a GenericJni frame // we have just created or are about to exit. The synchronization object is // the class object for static methods and the `this` object otherwise. -jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) +ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called) REQUIRES_SHARED(Locks::mutator_lock_); // Update .bss method entrypoint if the `callee_reference` has an associated oat file diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h index 6ecf3fd59c..f43e25fec1 100644 --- a/runtime/entrypoints/quick/quick_default_externs.h +++ b/runtime/entrypoints/quick/quick_default_externs.h @@ -114,9 +114,13 @@ extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, vo extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*); -// JNI read barrier entrypoint. +// JNI read barrier entrypoint. Note: Preserves all registers. extern "C" void art_read_barrier_jni(art::ArtMethod* method); +// JNI lock/unlock entrypoints. Note: Custom calling convention. +extern "C" void art_quick_lock_object_jni(art::mirror::Object*); +extern "C" void art_quick_unlock_object_jni(art::mirror::Object*); + // Polymorphic invoke entrypoints. extern "C" void art_quick_invoke_polymorphic(uint32_t, void*); extern "C" void art_quick_invoke_custom(uint32_t, void*); diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index 9f1766d3f2..df52e2344d 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -74,13 +74,12 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp // JNI qpoints->pJniMethodStart = JniMethodStart; - qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized; qpoints->pJniMethodEnd = JniMethodEnd; - qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized; qpoints->pJniMethodEndWithReference = JniMethodEndWithReference; - qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized; qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline; qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult; + qpoints->pJniLockObject = art_quick_lock_object_jni; + qpoints->pJniUnlockObject = art_quick_unlock_object_jni; // Locks if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) { @@ -137,12 +136,8 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp PaletteShouldReportJniInvocations(&should_report); if (should_report) { qpoints->pJniMethodStart = JniMonitoredMethodStart; - qpoints->pJniMethodStartSynchronized = JniMonitoredMethodStartSynchronized; qpoints->pJniMethodEnd = JniMonitoredMethodEnd; - qpoints->pJniMethodEndSynchronized = JniMonitoredMethodEndSynchronized; qpoints->pJniMethodEndWithReference = JniMonitoredMethodEndWithReference; - qpoints->pJniMethodEndWithReferenceSynchronized = - JniMonitoredMethodEndWithReferenceSynchronized; } } diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 377a63ee41..cf5c697b76 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -55,35 +55,19 @@ struct PACKED(4) QuickEntryPoints { // JNI entrypoints. // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. extern void JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern void JniMethodEnd(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMethodEndSynchronized(jobject locked, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, - jobject locked, - Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; // JNI entrypoints when monitoring entry/exit. extern void JniMonitoredMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern void JniMonitoredMethodEnd(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; -extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized(jobject result, - jobject locked, - Thread* self) - NO_THREAD_SAFETY_ANALYSIS HOT_ATTR; extern "C" mirror::String* artStringBuilderAppend(uint32_t format, @@ -93,6 +77,8 @@ extern "C" mirror::String* artStringBuilderAppend(uint32_t format, extern "C" void artReadBarrierJni(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; +extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR; // Read barrier entrypoints. // diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index a77e849d32..09ce9438ea 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -73,12 +73,11 @@ V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \ \ V(JniMethodStart, void, Thread*) \ - V(JniMethodStartSynchronized, void, jobject, Thread*) \ V(JniMethodEnd, void, Thread*) \ - V(JniMethodEndSynchronized, void, jobject, Thread*) \ V(JniMethodEndWithReference, mirror::Object*, jobject, Thread*) \ - V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, jobject, Thread*) \ V(JniDecodeReferenceResult, mirror::Object*, jobject, Thread*) \ + V(JniLockObject, void, mirror::Object*) \ + V(JniUnlockObject, void, mirror::Object*) \ V(QuickGenericJniTrampoline, void, ArtMethod*) \ \ V(LockObject, void, mirror::Object*) \ diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc index 2ea3c2aca9..95072130a9 100644 --- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc @@ -69,11 +69,6 @@ extern void JniMethodStart(Thread* self) { self->TransitionFromRunnableToSuspended(kNative); } -extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) { - self->DecodeJObject(to_lock)->MonitorEnter(self); - JniMethodStart(self); -} - // TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI. static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS { if (kIsDebugBuild) { @@ -95,8 +90,11 @@ static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self) } // TODO: annotalysis disabled as monitor semantics are maintained in Java code. -static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) +extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self) NO_THREAD_SAFETY_ANALYSIS REQUIRES(!Roles::uninterruptible_) { + // Note: No thread suspension is allowed for successful unlocking, otherwise plain + // `mirror::Object*` return value saved by the assembly stub would need to be updated. + uintptr_t old_poison_object_cookie = kIsDebugBuild ? self->GetPoisonObjectCookie() : 0u; // Save any pending exception over monitor exit call. ObjPtr<mirror::Throwable> saved_exception = nullptr; if (UNLIKELY(self->IsExceptionPending())) { @@ -104,17 +102,22 @@ static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self) self->ClearException(); } // Decode locked object and unlock, before popping local references. - self->DecodeJObject(locked)->MonitorExit(self); + locked->MonitorExit(self); if (UNLIKELY(self->IsExceptionPending())) { - LOG(FATAL) << "Synchronized JNI code returning with an exception:\n" - << saved_exception->Dump() - << "\nEncountered second exception during implicit MonitorExit:\n" - << self->GetException()->Dump(); + LOG(FATAL) << "Exception during implicit MonitorExit for synchronized native method:\n" + << self->GetException()->Dump() + << (saved_exception != nullptr + ? "\nAn exception was already pending:\n" + saved_exception->Dump() + : ""); + UNREACHABLE(); } // Restore pending exception. if (saved_exception != nullptr) { self->SetException(saved_exception); } + if (kIsDebugBuild) { + DCHECK_EQ(old_poison_object_cookie, self->GetPoisonObjectCookie()); + } } // TODO: These should probably be templatized or macro-ized. @@ -124,11 +127,6 @@ extern void JniMethodEnd(Thread* self) { GoToRunnable(self); } -extern void JniMethodEndSynchronized(jobject locked, Thread* self) { - GoToRunnable(self); - UnlockJniSynchronizedMethod(locked, self); // Must decode before pop. -} - extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(!self->IsExceptionPending()); @@ -168,14 +166,6 @@ extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) { return JniMethodEndWithReferenceHandleResult(result, self); } -extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result, - jobject locked, - Thread* self) { - GoToRunnable(self); - UnlockJniSynchronizedMethod(locked, self); - return JniMethodEndWithReferenceHandleResult(result, self); -} - extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie, jvalue result, @@ -206,9 +196,9 @@ extern uint64_t GenericJniMethodEnd(Thread* self, // locked object. if (called->IsSynchronized()) { DCHECK(normal_native) << "@FastNative/@CriticalNative and synchronize is not supported"; - jobject lock = GetGenericJniSynchronizationObject(self, called); + ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called); DCHECK(lock != nullptr); - UnlockJniSynchronizedMethod(lock, self); + artUnlockObjectFromJni(lock.Ptr(), self); } char return_shorty_char = called->GetShorty()[0]; if (return_shorty_char == 'L') { @@ -258,32 +248,14 @@ extern void JniMonitoredMethodStart(Thread* self) { MONITOR_JNI(PaletteNotifyBeginJniInvocation); } -extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) { - JniMethodStartSynchronized(to_lock, self); - MONITOR_JNI(PaletteNotifyBeginJniInvocation); -} - extern void JniMonitoredMethodEnd(Thread* self) { MONITOR_JNI(PaletteNotifyEndJniInvocation); JniMethodEnd(self); } -extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) { - MONITOR_JNI(PaletteNotifyEndJniInvocation); - JniMethodEndSynchronized(locked, self); -} - extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) { MONITOR_JNI(PaletteNotifyEndJniInvocation); return JniMethodEndWithReference(result, self); } -extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized( - jobject result, - jobject locked, - Thread* self) { - MONITOR_JNI(PaletteNotifyEndJniInvocation); - return JniMethodEndWithReferenceSynchronized(result, locked, self); -} - } // namespace art diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index c14dee42ec..e214577f7b 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -2062,11 +2062,14 @@ void BuildGenericJniFrameVisitor::Visit() { * needed and return to the stub. * * The return value is the pointer to the native code, null on failure. + * + * NO_THREAD_SAFETY_ANALYSIS: Depending on the use case, the trampoline may + * or may not lock a synchronization object and transition out of Runnable. */ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, ArtMethod** managed_sp, uintptr_t* reserved_area) - REQUIRES_SHARED(Locks::mutator_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS { // Note: We cannot walk the stack properly until fixed up below. ArtMethod* called = *managed_sp; DCHECK(called->IsNative()) << called->PrettyMethod(true); @@ -2121,14 +2124,14 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self, if (LIKELY(normal_native)) { // Start JNI. if (called->IsSynchronized()) { - jobject lock = GetGenericJniSynchronizationObject(self, called); - JniMethodStartSynchronized(lock, self); + ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called); + DCHECK(lock != nullptr); + lock->MonitorEnter(self); if (self->IsExceptionPending()) { return nullptr; // Report error. } - } else { - JniMethodStart(self); } + JniMethodStart(self); } else { DCHECK(!called->IsSynchronized()) << "@FastNative/@CriticalNative and synchronize is not supported"; diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index c19e000d1e..c3f1dba967 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -217,18 +217,16 @@ class EntrypointsOrderTest : public CommonRuntimeTest { EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjInstance, pGetObjStatic, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjStatic, pAputObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pJniMethodStart, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized, - sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd, - sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodEnd, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndWithReference, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference, - pJniMethodEndWithReferenceSynchronized, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized, pJniDecodeReferenceResult, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniDecodeReferenceResult, + pJniLockObject, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniLockObject, + pJniUnlockObject, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniUnlockObject, pQuickGenericJniTrampoline, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline, pLockObject, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLockObject, pUnlockObject, sizeof(void*)); diff --git a/runtime/oat.h b/runtime/oat.h index acb3d30fa2..0b6bf7db91 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: JNI: Faster mutator locking during transition. - static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '2', '\0' } }; + // Last oat version changed reason: JNI: Rewrite locking for synchronized methods. + static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '3', '\0' } }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/thread.cc b/runtime/thread.cc index 9fb8d62147..46aa38e035 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3475,12 +3475,11 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pGetObjStatic) QUICK_ENTRY_POINT_INFO(pAputObject) QUICK_ENTRY_POINT_INFO(pJniMethodStart) - QUICK_ENTRY_POINT_INFO(pJniMethodStartSynchronized) QUICK_ENTRY_POINT_INFO(pJniMethodEnd) - QUICK_ENTRY_POINT_INFO(pJniMethodEndSynchronized) QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReference) - QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReferenceSynchronized) QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult) + QUICK_ENTRY_POINT_INFO(pJniLockObject) + QUICK_ENTRY_POINT_INFO(pJniUnlockObject) QUICK_ENTRY_POINT_INFO(pQuickGenericJniTrampoline) QUICK_ENTRY_POINT_INFO(pLockObject) QUICK_ENTRY_POINT_INFO(pUnlockObject) |