summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/jni/jni_compiler_test.cc99
-rw-r--r--compiler/jni/quick/arm/calling_convention_arm.cc23
-rw-r--r--compiler/jni/quick/arm/calling_convention_arm.h4
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.cc21
-rw-r--r--compiler/jni/quick/arm64/calling_convention_arm64.h4
-rw-r--r--compiler/jni/quick/calling_convention.h4
-rw-r--r--compiler/jni/quick/jni_compiler.cc277
-rw-r--r--compiler/jni/quick/x86/calling_convention_x86.cc9
-rw-r--r--compiler/jni/quick/x86/calling_convention_x86.h4
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.cc9
-rw-r--r--compiler/jni/quick/x86_64/calling_convention_x86_64.h4
-rw-r--r--compiler/utils/arm/jni_macro_assembler_arm_vixl.cc26
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.cc36
-rw-r--r--compiler/utils/assembler_thumb_test_expected.cc.inc2
-rw-r--r--compiler/utils/x86/jni_macro_assembler_x86.cc5
-rw-r--r--compiler/utils/x86_64/jni_macro_assembler_x86_64.cc1
-rw-r--r--dex2oat/linker/oat_writer_test.cc2
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S218
-rw-r--r--runtime/arch/arm64/quick_entrypoints_arm64.S214
-rw-r--r--runtime/arch/x86/quick_entrypoints_x86.S301
-rw-r--r--runtime/arch/x86_64/quick_entrypoints_x86_64.S246
-rw-r--r--runtime/entrypoints/entrypoint_utils-inl.h14
-rw-r--r--runtime/entrypoints/entrypoint_utils.h2
-rw-r--r--runtime/entrypoints/quick/quick_default_externs.h6
-rw-r--r--runtime/entrypoints/quick/quick_default_init_entrypoints.h9
-rw-r--r--runtime/entrypoints/quick/quick_entrypoints.h18
-rw-r--r--runtime/entrypoints/quick/quick_entrypoints_list.h5
-rw-r--r--runtime/entrypoints/quick/quick_jni_entrypoints.cc60
-rw-r--r--runtime/entrypoints/quick/quick_trampoline_entrypoints.cc13
-rw-r--r--runtime/entrypoints_order_test.cc14
-rw-r--r--runtime/oat.h4
-rw-r--r--runtime/thread.cc5
32 files changed, 1008 insertions, 651 deletions
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index e3d0abb7d3..2f96d44977 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -39,6 +39,7 @@
#include "mirror/stack_trace_element-inl.h"
#include "nativehelper/ScopedLocalRef.h"
#include "nativeloader/native_loader.h"
+#include "oat_quick_method_header.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
@@ -388,44 +389,41 @@ class JniCompilerTest : public CommonCompilerTest {
jmethodID jmethod_;
private:
+ // Helper class that overrides original entrypoints with alternative versions
+ // that check that the object (`this` or class) is locked.
class ScopedSynchronizedEntryPointOverrides {
public:
ScopedSynchronizedEntryPointOverrides() {
QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints;
- jni_method_start_synchronized_original_ = qpoints->pJniMethodStartSynchronized;
- qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronizedOverride;
- jni_method_end_synchronized_original_ = qpoints->pJniMethodEndSynchronized;
- qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronizedOverride;
- jni_method_end_with_reference_synchronized_original_ =
- qpoints->pJniMethodEndWithReferenceSynchronized;
- qpoints->pJniMethodEndWithReferenceSynchronized =
- JniMethodEndWithReferenceSynchronizedOverride;
+ jni_method_start_original_ = qpoints->pJniMethodStart;
+ qpoints->pJniMethodStart = JniMethodStartSynchronizedOverride;
+ jni_method_end_original_ = qpoints->pJniMethodEnd;
+ qpoints->pJniMethodEnd = JniMethodEndSynchronizedOverride;
+ jni_method_end_with_reference_original_ = qpoints->pJniMethodEndWithReference;
+ qpoints->pJniMethodEndWithReference = JniMethodEndWithReferenceSynchronizedOverride;
}
~ScopedSynchronizedEntryPointOverrides() {
QuickEntryPoints* qpoints = &Thread::Current()->tlsPtr_.quick_entrypoints;
- qpoints->pJniMethodStartSynchronized = jni_method_start_synchronized_original_;
- qpoints->pJniMethodEndSynchronized = jni_method_end_synchronized_original_;
- qpoints->pJniMethodEndWithReferenceSynchronized =
- jni_method_end_with_reference_synchronized_original_;
+ qpoints->pJniMethodStart = jni_method_start_original_;
+ qpoints->pJniMethodEnd = jni_method_end_original_;
+ qpoints->pJniMethodEndWithReference = jni_method_end_with_reference_original_;
}
};
- static void JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self);
- static void JniMethodEndSynchronizedOverride(jobject locked, Thread* self);
+ static void AssertCallerObjectLocked(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+ static void JniMethodStartSynchronizedOverride(Thread* self);
+ static void JniMethodEndSynchronizedOverride(Thread* self);
static mirror::Object* JniMethodEndWithReferenceSynchronizedOverride(
- jobject result,
- jobject locked,
- Thread* self);
+ jobject result, Thread* self);
- using StartSynchronizedType = void (*)(jobject, Thread*);
- using EndSynchronizedType = void (*)(jobject, Thread*);
- using EndWithReferenceSynchronizedType = mirror::Object* (*)(jobject, jobject, Thread*);
+ using JniStartType = void (*)(Thread*);
+ using JniEndType = void (*)(Thread*);
+ using JniEndWithReferenceType = mirror::Object* (*)(jobject, Thread*);
- static StartSynchronizedType jni_method_start_synchronized_original_;
- static EndSynchronizedType jni_method_end_synchronized_original_;
- static EndWithReferenceSynchronizedType jni_method_end_with_reference_synchronized_original_;
- static jobject locked_object_;
+ static JniStartType jni_method_start_original_;
+ static JniEndType jni_method_end_original_;
+ static JniEndWithReferenceType jni_method_end_with_reference_original_;
bool check_generic_jni_;
};
@@ -433,28 +431,49 @@ class JniCompilerTest : public CommonCompilerTest {
jclass JniCompilerTest::jklass_;
jobject JniCompilerTest::jobj_;
jobject JniCompilerTest::class_loader_;
-JniCompilerTest::StartSynchronizedType JniCompilerTest::jni_method_start_synchronized_original_;
-JniCompilerTest::EndSynchronizedType JniCompilerTest::jni_method_end_synchronized_original_;
-JniCompilerTest::EndWithReferenceSynchronizedType
- JniCompilerTest::jni_method_end_with_reference_synchronized_original_;
-jobject JniCompilerTest::locked_object_;
+JniCompilerTest::JniStartType JniCompilerTest::jni_method_start_original_;
+JniCompilerTest::JniEndType JniCompilerTest::jni_method_end_original_;
+JniCompilerTest::JniEndWithReferenceType JniCompilerTest::jni_method_end_with_reference_original_;
+
+void JniCompilerTest::AssertCallerObjectLocked(Thread* self) {
+ ArtMethod** caller_frame = self->GetManagedStack()->GetTopQuickFrame();
+ CHECK(caller_frame != nullptr);
+ ArtMethod* caller = *caller_frame;
+ CHECK(caller != nullptr);
+ CHECK(caller->IsNative());
+ CHECK(!caller->IsFastNative());
+ CHECK(!caller->IsCriticalNative());
+ CHECK(caller->IsSynchronized());
+ ObjPtr<mirror::Object> lock;
+ if (caller->IsStatic()) {
+ lock = caller->GetDeclaringClass();
+ } else {
+ uint8_t* sp = reinterpret_cast<uint8_t*>(caller_frame);
+ const void* code_ptr = EntryPointToCodePointer(caller->GetEntryPointFromQuickCompiledCode());
+ OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+ size_t frame_size = method_header->GetFrameSizeInBytes();
+ StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>(
+ sp + frame_size + static_cast<size_t>(kRuntimePointerSize));
+ lock = this_ref->AsMirrorPtr();
+ }
+ CHECK_EQ(Monitor::GetLockOwnerThreadId(lock), self->GetThreadId());
+}
-void JniCompilerTest::JniMethodStartSynchronizedOverride(jobject to_lock, Thread* self) {
- locked_object_ = to_lock;
- jni_method_start_synchronized_original_(to_lock, self);
+void JniCompilerTest::JniMethodStartSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+ AssertCallerObjectLocked(self);
+ jni_method_start_original_(self);
}
-void JniCompilerTest::JniMethodEndSynchronizedOverride(jobject locked, Thread* self) {
- EXPECT_EQ(locked_object_, locked);
- jni_method_end_synchronized_original_(locked, self);
+void JniCompilerTest::JniMethodEndSynchronizedOverride(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+ jni_method_end_original_(self);
+ AssertCallerObjectLocked(self);
}
mirror::Object* JniCompilerTest::JniMethodEndWithReferenceSynchronizedOverride(
- jobject result,
- jobject locked,
- Thread* self) {
- EXPECT_EQ(locked_object_, locked);
- return jni_method_end_with_reference_synchronized_original_(result, locked, self);
+ jobject result, Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+ mirror::Object* raw_result = jni_method_end_with_reference_original_(result, self);
+ AssertCallerObjectLocked(self);
+ return raw_result;
}
// Test the normal compiler and normal generic JNI only.
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 68c7a94540..da438bdba6 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -531,10 +531,10 @@ FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() {
return FrameOffset(offset);
}
-ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const {
- CHECK(IsCriticalNative());
- // R4 is neither managed callee-save, nor argument register, nor scratch register.
- // (It is native callee-save but the value coming from managed code can be clobbered.)
+// R4 is neither managed callee-save, nor argument register. It is suitable for use as the
+// locking argument for synchronized methods and hidden argument for @CriticalNative methods.
+// (It is native callee-save but the value coming from managed code can be clobbered.)
+static void AssertR4IsNeitherCalleeSaveNorArgumentRegister() {
// TODO: Change to static_assert; std::none_of should be constexpr since C++20.
DCHECK(std::none_of(kCalleeSaveRegisters,
kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
@@ -543,7 +543,20 @@ ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const {
}));
DCHECK(std::none_of(kJniArgumentRegisters,
kJniArgumentRegisters + std::size(kJniArgumentRegisters),
- [](Register reg) { return reg == R4; }));
+ [](Register arg) { return arg == R4; }));
+}
+
+ManagedRegister ArmJniCallingConvention::LockingArgumentRegister() const {
+ DCHECK(!IsFastNative());
+ DCHECK(!IsCriticalNative());
+ DCHECK(IsSynchronized());
+ AssertR4IsNeitherCalleeSaveNorArgumentRegister();
+ return ArmManagedRegister::FromCoreRegister(R4);
+}
+
+ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const {
+ CHECK(IsCriticalNative());
+ AssertR4IsNeitherCalleeSaveNorArgumentRegister();
return ArmManagedRegister::FromCoreRegister(R4);
}
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 149ba39eb4..94dacc46e5 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -81,6 +81,10 @@ class ArmJniCallingConvention final : public JniCallingConvention {
return false;
}
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ ManagedRegister LockingArgumentRegister() const override;
+
// Hidden argument register, used to pass the method pointer for @CriticalNative call.
ManagedRegister HiddenArgumentRegister() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 7b9a597805..d8b0373096 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -363,9 +363,9 @@ FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() {
return FrameOffset(offset);
}
-ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const {
- CHECK(IsCriticalNative());
- // X15 is neither managed callee-save, nor argument register, nor scratch register.
+// X15 is neither managed callee-save, nor argument register. It is suitable for use as the
+// locking argument for synchronized methods and hidden argument for @CriticalNative methods.
+static void AssertX15IsNeitherCalleeSaveNorArgumentRegister() {
// TODO: Change to static_assert; std::none_of should be constexpr since C++20.
DCHECK(std::none_of(kCalleeSaveRegisters,
kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
@@ -374,7 +374,20 @@ ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const {
}));
DCHECK(std::none_of(kXArgumentRegisters,
kXArgumentRegisters + std::size(kXArgumentRegisters),
- [](XRegister reg) { return reg == X15; }));
+ [](XRegister arg) { return arg == X15; }));
+}
+
+ManagedRegister Arm64JniCallingConvention::LockingArgumentRegister() const {
+ DCHECK(!IsFastNative());
+ DCHECK(!IsCriticalNative());
+ DCHECK(IsSynchronized());
+ AssertX15IsNeitherCalleeSaveNorArgumentRegister();
+ return Arm64ManagedRegister::FromWRegister(W15);
+}
+
+ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const {
+ DCHECK(IsCriticalNative());
+ AssertX15IsNeitherCalleeSaveNorArgumentRegister();
return Arm64ManagedRegister::FromXRegister(X15);
}
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index ade88e4e97..003b0c3f15 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -72,6 +72,10 @@ class Arm64JniCallingConvention final : public JniCallingConvention {
return HasSmallReturnType();
}
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ ManagedRegister LockingArgumentRegister() const override;
+
// Hidden argument register, used to pass the method pointer for @CriticalNative call.
ManagedRegister HiddenArgumentRegister() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index faa83daf7c..0be523362f 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -363,6 +363,10 @@ class JniCallingConvention : public CallingConvention {
return !IsCriticalNative();
}
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ virtual ManagedRegister LockingArgumentRegister() const = 0;
+
// Hidden argument register, used to pass the method pointer for @CriticalNative call.
virtual ManagedRegister HiddenArgumentRegister() const = 0;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 4c1b2f792d..863f47b819 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -81,26 +81,17 @@ enum class JniEntrypoint {
template <PointerSize kPointerSize>
static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which,
- bool reference_return,
- bool is_synchronized) {
+ bool reference_return) {
if (which == JniEntrypoint::kStart) { // JniMethodStart
- ThreadOffset<kPointerSize> jni_start =
- is_synchronized
- ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
- : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
-
+ ThreadOffset<kPointerSize> jni_start = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart);
return jni_start;
} else { // JniMethodEnd
ThreadOffset<kPointerSize> jni_end(-1);
if (reference_return) {
// Pass result.
- jni_end = is_synchronized
- ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
- : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
+ jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
} else {
- jni_end = is_synchronized
- ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
- : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
+ jni_end = QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd);
}
return jni_end;
@@ -194,26 +185,6 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
ManagedRuntimeCallingConvention::Create(
&allocator, is_static, is_synchronized, shorty, instruction_set));
- // Calling conventions to call into JNI method "end" possibly passing a returned reference, the
- // method and the current thread.
- const char* jni_end_shorty;
- if (reference_return && is_synchronized) {
- jni_end_shorty = "IL";
- } else if (reference_return) {
- jni_end_shorty = "I";
- } else {
- jni_end_shorty = "V";
- }
-
- std::unique_ptr<JniCallingConvention> end_jni_conv(
- JniCallingConvention::Create(&allocator,
- is_static,
- is_synchronized,
- is_fast_native,
- is_critical_native,
- jni_end_shorty,
- instruction_set));
-
// Assembler that holds generated instructions
std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
GetMacroAssembler<kPointerSize>(&allocator, instruction_set, instruction_set_features);
@@ -249,7 +220,28 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Bind(jclass_read_barrier_return.get());
}
- // 1.3. Write out the end of the quick frames.
+ // 1.3 Spill reference register arguments.
+ constexpr FrameOffset kInvalidReferenceOffset =
+ JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset;
+ ArenaVector<ArgumentLocation> src_args(allocator.Adapter());
+ ArenaVector<ArgumentLocation> dest_args(allocator.Adapter());
+ ArenaVector<FrameOffset> refs(allocator.Adapter());
+ if (LIKELY(!is_critical_native)) {
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
+ for (; mr_conv->HasNext(); mr_conv->Next()) {
+ if (mr_conv->IsCurrentParamInRegister() && mr_conv->IsCurrentParamAReference()) {
+ // Spill the reference as raw data.
+ src_args.emplace_back(mr_conv->CurrentParamRegister(), kObjectReferenceSize);
+ dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize);
+ refs.push_back(kInvalidReferenceOffset);
+ }
+ }
+ __ MoveArguments(ArrayRef<ArgumentLocation>(dest_args),
+ ArrayRef<ArgumentLocation>(src_args),
+ ArrayRef<FrameOffset>(refs));
+ }
+
+ // 1.4. Write out the end of the quick frames. After this, we can walk the stack.
// NOTE: @CriticalNative does not need to store the stack pointer to the thread
// because garbage collections are disabled within the execution of a
// @CriticalNative method.
@@ -257,10 +249,32 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
}
- // 2. Call into appropriate `JniMethodStart*()` to transition out of Runnable for normal native.
+ // 2. Lock the object (if synchronized) and transition out of runnable (if normal native).
- // 2.1. Move frame down to allow space for out going args.
- // This prepares for both the `JniMethodStart*()` call as well as the main native call.
+ // 2.1. Lock the synchronization object (`this` or class) for synchronized methods.
+ if (UNLIKELY(is_synchronized)) {
+ // We are using a custom calling convention for locking where the assembly thunk gets
+ // the object to lock in a register (even on x86), it can use callee-save registers
+ // as temporaries (they were saved above) and must preserve argument registers.
+ ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister();
+ if (is_static) {
+ // Pass the declaring class. It was already marked if needed.
+ DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
+ __ Load(to_lock, method_register, MemberOffset(0u), kObjectReferenceSize);
+ } else {
+ // Pass the `this` argument.
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
+ if (mr_conv->IsCurrentParamInRegister()) {
+ __ Move(to_lock, mr_conv->CurrentParamRegister(), kObjectReferenceSize);
+ } else {
+ __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize);
+ }
+ }
+ __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniLockObject));
+ }
+
+ // 2.2. Move frame down to allow space for out going args.
+ // This prepares for both the `JniMethodStart()` call as well as the main native call.
size_t current_out_arg_size = main_out_arg_size;
if (UNLIKELY(is_critical_native)) {
DCHECK_EQ(main_out_arg_size, current_frame_size);
@@ -269,41 +283,37 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
current_frame_size += main_out_arg_size;
}
- // 2.2. Spill all register arguments to preserve them across the `JniMethodStart*()` call.
+ // 2.3. Spill all register arguments to preserve them across the `JniLockObject()`
+ // call (if synchronized) and `JniMethodStart()` call (if normal native).
// Native stack arguments are spilled directly to their argument stack slots and
// references are converted to `jobject`. Native register arguments are spilled to
- // the reserved slots in the caller frame, references are not converted to `jobject`.
- constexpr FrameOffset kInvalidReferenceOffset =
- JNIMacroAssembler<kPointerSize>::kInvalidReferenceOffset;
- ArenaVector<ArgumentLocation> src_args(allocator.Adapter());
- ArenaVector<ArgumentLocation> dest_args(allocator.Adapter());
- ArenaVector<FrameOffset> refs(allocator.Adapter());
+ // the reserved slots in the caller frame, references are not converted to `jobject`;
+ // references from registers are actually skipped as they were already spilled above.
+ // TODO: Implement fast-path for transition to Native and avoid this spilling.
+ src_args.clear();
+ dest_args.clear();
+ refs.clear();
if (LIKELY(!is_critical_native && !is_fast_native)) {
mr_conv->ResetIterator(FrameOffset(current_frame_size));
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
main_jni_conv->Next(); // Skip JNIEnv*.
+ // Add a no-op move for the `jclass` / `this` argument to avoid the
+ // next argument being treated as non-null if it's a reference.
+ // Note: We have already spilled `this` as raw reference above. Since `this`
+ // cannot be null, the argument move before the native call does not need
+ // to reload the reference, and that argument move also needs to see the
+ // `this` argument to avoid treating another reference as non-null.
+ // Note: Using the method register for the no-op move even for `this`.
+ src_args.emplace_back(method_register, kRawPointerSize);
+ dest_args.emplace_back(method_register, kRawPointerSize);
+ refs.push_back(kInvalidReferenceOffset);
if (is_static) {
main_jni_conv->Next(); // Skip `jclass`.
- // Add a no-op move for the `jclass` argument to avoid the next
- // argument being treated as non-null if it's a reference.
- src_args.emplace_back(method_register, kRawPointerSize);
- dest_args.emplace_back(method_register, kRawPointerSize);
- refs.push_back(kInvalidReferenceOffset);
} else {
- // Spill `this` as raw reference without conversion to `jobject` even if the `jobject`
- // argument is passed on stack. Since `this` cannot be null, the argument move before
- // the native call does not need to reload the reference, and that argument move also
- // needs to see the `this` argument to avoid treating another reference as non-null.
- // This also leaves enough space on stack for `JniMethodStartSynchronized()`
- // for architectures that pass the second argument on the stack (x86).
+ // Skip `this`
DCHECK(mr_conv->HasNext());
DCHECK(main_jni_conv->HasNext());
DCHECK(mr_conv->IsCurrentParamAReference());
- src_args.push_back(mr_conv->IsCurrentParamInRegister()
- ? ArgumentLocation(mr_conv->CurrentParamRegister(), kObjectReferenceSize)
- : ArgumentLocation(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize));
- dest_args.emplace_back(mr_conv->CurrentParamStackOffset(), kObjectReferenceSize);
- refs.push_back(kInvalidReferenceOffset);
mr_conv->Next();
main_jni_conv->Next();
}
@@ -311,13 +321,19 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
DCHECK(main_jni_conv->HasNext());
static_assert(kObjectReferenceSize == 4u);
bool is_reference = mr_conv->IsCurrentParamAReference();
- bool spill_jobject = is_reference && !main_jni_conv->IsCurrentParamInRegister();
+ bool src_in_reg = mr_conv->IsCurrentParamInRegister();
+ bool dest_in_reg = main_jni_conv->IsCurrentParamInRegister();
+ if (is_reference && src_in_reg && dest_in_reg) {
+ // We have already spilled the raw reference above.
+ continue;
+ }
+ bool spill_jobject = is_reference && !dest_in_reg;
size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
size_t dest_size = spill_jobject ? kRawPointerSize : src_size;
- src_args.push_back(mr_conv->IsCurrentParamInRegister()
+ src_args.push_back(src_in_reg
? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
: ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
- dest_args.push_back(main_jni_conv->IsCurrentParamInRegister()
+ dest_args.push_back(dest_in_reg
? ArgumentLocation(mr_conv->CurrentParamStackOffset(), dest_size)
: ArgumentLocation(main_jni_conv->CurrentParamStackOffset(), dest_size));
refs.push_back(spill_jobject ? mr_conv->CurrentParamStackOffset() : kInvalidReferenceOffset);
@@ -327,41 +343,14 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
ArrayRef<FrameOffset>(refs));
} // if (!is_critical_native)
- // 2.3. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
+ // 2.4. Call into `JniMethodStart()` passing Thread* so that transition out of Runnable
// can occur. We abuse the JNI calling convention here, that is guaranteed to support
- // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`.
- std::unique_ptr<JNIMacroLabel> monitor_enter_exception_slow_path =
- UNLIKELY(is_synchronized) ? __ CreateLabel() : nullptr;
+ // passing two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, and we use just one.
if (LIKELY(!is_critical_native && !is_fast_native)) {
// Skip this for @CriticalNative and @FastNative methods. They do not call JniMethodStart.
ThreadOffset<kPointerSize> jni_start =
- GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart,
- reference_return,
- is_synchronized);
+ GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart, reference_return);
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
- if (is_synchronized) {
- // Pass object for locking.
- if (is_static) {
- // Pass the pointer to the method's declaring class as the first argument.
- DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
- SetNativeParameter(jni_asm.get(), main_jni_conv.get(), method_register);
- } else {
- // TODO: Use the register that still holds the `this` reference.
- mr_conv->ResetIterator(FrameOffset(current_frame_size));
- FrameOffset this_offset = mr_conv->CurrentParamStackOffset();
- if (main_jni_conv->IsCurrentParamOnStack()) {
- FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
- __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false);
- } else {
- ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
- __ CreateJObject(out_reg,
- this_offset,
- ManagedRegister::NoRegister(),
- /*null_allowed=*/ false);
- }
- }
- main_jni_conv->Next();
- }
if (main_jni_conv->IsCurrentParamInRegister()) {
__ GetCurrentThread(main_jni_conv->CurrentParamRegister());
__ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_start));
@@ -369,10 +358,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
__ CallFromThread(jni_start);
}
- method_register = ManagedRegister::NoRegister(); // Method register is clobbered.
- if (is_synchronized) { // Check for exceptions from monitor enter.
- __ ExceptionPoll(monitor_enter_exception_slow_path.get());
- }
+ method_register = ManagedRegister::NoRegister(); // Method register is clobbered by the call.
}
// 3. Push local reference frame.
@@ -539,7 +525,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
}
}
- // 5. Call into appropriate JniMethodEnd to transition out of Runnable for normal native.
+ // 5. Transition to Runnable (if normal native).
// 5.1. Spill or move the return value if needed.
// TODO: Use `callee_save_temp` instead of stack slot when possible.
@@ -597,72 +583,30 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
}
if (LIKELY(!is_critical_native)) {
- // 5.4. Increase frame size for out args if needed by the end_jni_conv.
- const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
- if (end_out_arg_size > current_out_arg_size) {
- DCHECK(!is_fast_native);
- size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
- current_out_arg_size = end_out_arg_size;
- __ IncreaseFrameSize(out_arg_size_diff);
- current_frame_size += out_arg_size_diff;
- return_save_location = FrameOffset(return_save_location.SizeValue() + out_arg_size_diff);
- }
- end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-
- // 5.5. Call JniMethodEnd for normal native.
+ // 5.4. Call JniMethodEnd for normal native.
// For @FastNative with reference return, decode the `jobject`.
+ // We abuse the JNI calling convention here, that is guaranteed to support passing
+ // two pointer arguments, `JNIEnv*` and `jclass`/`jobject`, enough for all cases.
+ main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
if (LIKELY(!is_fast_native) || reference_return) {
ThreadOffset<kPointerSize> jni_end = is_fast_native
? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniDecodeReferenceResult)
- : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
- reference_return,
- is_synchronized);
+ : GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd, reference_return);
if (reference_return) {
// Pass result.
- SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
- end_jni_conv->Next();
+ SetNativeParameter(jni_asm.get(), main_jni_conv.get(), main_jni_conv->ReturnRegister());
+ main_jni_conv->Next();
}
- if (is_synchronized) {
- // Pass object for unlocking.
- if (is_static) {
- // Load reference to the method's declaring class. The method register has been
- // clobbered by the above call, so we need to load the method from the stack.
- FrameOffset method_offset =
- FrameOffset(current_out_arg_size + mr_conv->MethodStackOffset().SizeValue());
- DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
- if (end_jni_conv->IsCurrentParamOnStack()) {
- FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
- __ Copy(out_off, method_offset, kRawPointerSize);
- } else {
- ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
- __ Load(out_reg, method_offset, kRawPointerSize);
- }
- } else {
- mr_conv->ResetIterator(FrameOffset(current_frame_size));
- FrameOffset this_offset = mr_conv->CurrentParamStackOffset();
- if (end_jni_conv->IsCurrentParamOnStack()) {
- FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
- __ CreateJObject(out_off, this_offset, /*null_allowed=*/ false);
- } else {
- ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
- __ CreateJObject(out_reg,
- this_offset,
- ManagedRegister::NoRegister(),
- /*null_allowed=*/ false);
- }
- }
- end_jni_conv->Next();
- }
- if (end_jni_conv->IsCurrentParamInRegister()) {
- __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
- __ Call(end_jni_conv->CurrentParamRegister(), Offset(jni_end));
+ if (main_jni_conv->IsCurrentParamInRegister()) {
+ __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
+ __ Call(main_jni_conv->CurrentParamRegister(), Offset(jni_end));
} else {
- __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset());
+ __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset());
__ CallFromThread(jni_end);
}
}
- // 5.6. Reload return value if it was spilled.
+ // 5.5. Reload return value if it was spilled.
if (spill_return_value) {
__ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue());
}
@@ -698,7 +642,26 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
__ Bind(suspend_check_resume.get());
}
- // 7.4. Remove activation - need to restore callee save registers since the GC
+ // 7.4 Unlock the synchronization object for synchronized methods.
+ if (UNLIKELY(is_synchronized)) {
+ ManagedRegister to_lock = main_jni_conv->LockingArgumentRegister();
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
+ if (is_static) {
+ // Pass the declaring class.
+ DCHECK(method_register.IsNoRegister()); // TODO: Preserve the method in `callee_save_temp`.
+ ManagedRegister temp = __ CoreRegisterWithSize(callee_save_temp, kRawPointerSize);
+ FrameOffset method_offset = mr_conv->MethodStackOffset();
+ __ Load(temp, method_offset, kRawPointerSize);
+ DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
+ __ Load(to_lock, temp, MemberOffset(0u), kObjectReferenceSize);
+ } else {
+ // Pass the `this` argument from its spill slot.
+ __ Load(to_lock, mr_conv->CurrentParamStackOffset(), kObjectReferenceSize);
+ }
+ __ CallFromThread(QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniUnlockObject));
+ }
+
+ // 7.5. Remove activation - need to restore callee save registers since the GC
// may have changed them.
DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) {
@@ -768,14 +731,6 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
// 8.3. Exception poll slow path(s).
if (LIKELY(!is_critical_native)) {
- if (UNLIKELY(is_synchronized)) {
- DCHECK(!is_fast_native);
- __ Bind(monitor_enter_exception_slow_path.get());
- if (main_out_arg_size != 0) {
- jni_asm->cfi().AdjustCFAOffset(main_out_arg_size);
- __ DecreaseFrameSize(main_out_arg_size);
- }
- }
__ Bind(exception_slow_path.get());
if (UNLIKELY(is_fast_native) && reference_return) {
// We performed the exception check early, so we need to adjust SP and pop IRT frame.
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 947320237c..2fb063f3fd 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -294,6 +294,15 @@ FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize));
}
+ManagedRegister X86JniCallingConvention::LockingArgumentRegister() const {
+ DCHECK(!IsFastNative());
+ DCHECK(!IsCriticalNative());
+ DCHECK(IsSynchronized());
+ // The callee-save register is EBP is suitable as a locking argument.
+ static_assert(kCalleeSaveRegisters[0].Equals(X86ManagedRegister::FromCpuRegister(EBP)));
+ return X86ManagedRegister::FromCpuRegister(EBP);
+}
+
ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
CHECK(IsCriticalNative());
// EAX is neither managed callee-save, nor argument register, nor scratch register.
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 7b62161907..f028090c75 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -77,6 +77,10 @@ class X86JniCallingConvention final : public JniCallingConvention {
return HasSmallReturnType();
}
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ ManagedRegister LockingArgumentRegister() const override;
+
// Hidden argument register, used to pass the method pointer for @CriticalNative call.
ManagedRegister HiddenArgumentRegister() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index ddf3d74adc..469de42eff 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -299,6 +299,15 @@ FrameOffset X86_64JniCallingConvention::CurrentParamStackOffset() {
return FrameOffset(offset);
}
+ManagedRegister X86_64JniCallingConvention::LockingArgumentRegister() const {
+ DCHECK(!IsFastNative());
+ DCHECK(!IsCriticalNative());
+ DCHECK(IsSynchronized());
+ // The callee-save register is RBX is suitable as a locking argument.
+ static_assert(kCalleeSaveRegisters[0].Equals(X86_64ManagedRegister::FromCpuRegister(RBX)));
+ return X86_64ManagedRegister::FromCpuRegister(RBX);
+}
+
ManagedRegister X86_64JniCallingConvention::HiddenArgumentRegister() const {
CHECK(IsCriticalNative());
// RAX is neither managed callee-save, nor argument register, nor scratch register.
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index ee8603d9ce..fda5c0e354 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -72,6 +72,10 @@ class X86_64JniCallingConvention final : public JniCallingConvention {
return HasSmallReturnType();
}
+ // Locking argument register, used to pass the synchronization object for calls
+ // to `JniLockObject()` and `JniUnlockObject()`.
+ ManagedRegister LockingArgumentRegister() const override;
+
// Hidden argument register, used to pass the method pointer for @CriticalNative call.
ManagedRegister HiddenArgumentRegister() const override;
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 9ea6f04cb6..bd8aa083eb 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -546,32 +546,6 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
DCHECK_EQ(arg_count, srcs.size());
DCHECK_EQ(arg_count, refs.size());
- // Spill reference registers. Spill two references together with STRD where possible.
- for (size_t i = 0; i != arg_count; ++i) {
- if (refs[i] != kInvalidReferenceOffset) {
- DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
- if (srcs[i].IsRegister()) {
- DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
- // Use STRD if we're storing 2 consecutive references within the available STRD range.
- if (i + 1u != arg_count &&
- refs[i + 1u] != kInvalidReferenceOffset &&
- srcs[i + 1u].IsRegister() &&
- refs[i].SizeValue() < kStrdOffsetCutoff) {
- DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize);
- DCHECK_EQ(refs[i + 1u].SizeValue(), refs[i].SizeValue() + kObjectReferenceSize);
- ___ Strd(AsVIXLRegister(srcs[i].GetRegister().AsArm()),
- AsVIXLRegister(srcs[i + 1u].GetRegister().AsArm()),
- MemOperand(sp, refs[i].SizeValue()));
- ++i;
- } else {
- Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize);
- }
- } else {
- DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
- }
- }
- }
-
// Convert reference registers to `jobject` values.
// TODO: Delay this for references that are copied to another register.
for (size_t i = 0; i != arg_count; ++i) {
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 0f1203e232..561cbbd54b 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -382,30 +382,6 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
DCHECK_EQ(arg_count, srcs.size());
DCHECK_EQ(arg_count, refs.size());
- // Spill reference registers. Spill two references together with STP where possible.
- for (size_t i = 0; i != arg_count; ++i) {
- if (refs[i] != kInvalidReferenceOffset) {
- DCHECK_EQ(srcs[i].GetSize(), kObjectReferenceSize);
- if (srcs[i].IsRegister()) {
- // Use STP if we're storing 2 consecutive references within the available STP range.
- if (i + 1u != arg_count &&
- refs[i + 1u].SizeValue() == refs[i].SizeValue() + kObjectReferenceSize &&
- srcs[i + 1u].IsRegister() &&
- refs[i].SizeValue() < kStpWOffsetCutoff) {
- DCHECK_EQ(srcs[i + 1u].GetSize(), kObjectReferenceSize);
- ___ Stp(reg_w(srcs[i].GetRegister().AsArm64().AsWRegister()),
- reg_w(srcs[i + 1u].GetRegister().AsArm64().AsWRegister()),
- MEM_OP(sp, refs[i].SizeValue()));
- ++i;
- } else {
- Store(refs[i], srcs[i].GetRegister(), kObjectReferenceSize);
- }
- } else {
- DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
- }
- }
- }
-
auto get_mask = [](ManagedRegister reg) -> uint64_t {
Arm64ManagedRegister arm64_reg = reg.AsArm64();
if (arm64_reg.IsXRegister()) {
@@ -429,12 +405,12 @@ void Arm64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
};
// More than 8 core or FP reg args are very rare, so we do not optimize for
- // that case by using LDP/STP, except for situations that arise for normal
- // native even with low number of arguments. We use STP for the non-reference
- // spilling which also covers the initial spill for native reference register
- // args as they are spilled as raw 32-bit values. We also optimize loading
- // args to registers with LDP, whether references or not, except for the
- // initial non-null reference which we do not need to load at all.
+ // that case by using LDP/STP, except for situations that arise even with low
+ // number of arguments. We use STP for the non-reference spilling which also
+ // covers the initial spill for native reference register args as they are
+ // spilled as raw 32-bit values. We also optimize loading args to registers
+ // with LDP, whether references or not, except for the initial non-null
+ // reference which we do not need to load at all.
// Collect registers to move while storing/copying args to stack slots.
// Convert processed references to `jobject`.
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 541458b236..b35066f434 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults = {
" 21c: d9 f8 24 80 ldr.w r8, [r9, #36]\n"
" 220: 70 47 bx lr\n"
" 222: d9 f8 8c 00 ldr.w r0, [r9, #140]\n"
- " 226: d9 f8 c8 e2 ldr.w lr, [r9, #712]\n"
+ " 226: d9 f8 c4 e2 ldr.w lr, [r9, #708]\n"
" 22a: f0 47 blx lr\n"
};
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index d0afa72155..7dff279944 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -332,6 +332,10 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
if (src.IsRegister()) {
if (UNLIKELY(dest.IsRegister())) {
+ if (dest.GetRegister().Equals(src.GetRegister())) {
+ // JNI compiler sometimes adds a no-op move.
+ continue;
+ }
// Native ABI has only stack arguments but we may pass one "hidden arg" in register.
CHECK(!found_hidden_arg);
found_hidden_arg = true;
@@ -341,7 +345,6 @@ void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
} else {
if (ref != kInvalidReferenceOffset) {
- Store(ref, srcs[i].GetRegister(), kObjectReferenceSize);
// Note: We can clobber `src` here as the register cannot hold more than one argument.
// This overload of `CreateJObject()` currently does not use the scratch
// register ECX, so this shall not clobber another argument.
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 1425a4cc41..2da1b470ac 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -388,7 +388,6 @@ void X86_64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
DCHECK_EQ(src.GetSize(), dest.GetSize());
}
if (src.IsRegister() && ref != kInvalidReferenceOffset) {
- Store(ref, src.GetRegister(), kObjectReferenceSize);
// Note: We can clobber `src` here as the register cannot hold more than one argument.
// This overload of `CreateJObject()` is currently implemented as "test and branch";
// if it was using a conditional move, it would be better to do this at move time.
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 7bcff2bafc..cca5bc2fc3 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -505,7 +505,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
EXPECT_EQ(64U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(4U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+ EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
sizeof(QuickEntryPoints));
}
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5ef1d3e17a..ca63914759 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -493,52 +493,66 @@ END art_quick_do_long_jump
*/
TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
- /*
- * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
- * possibly null object to lock.
- */
- .extern artLockObjectFromCode
-ENTRY art_quick_lock_object
- ldr r1, [rSELF, #THREAD_ID_OFFSET]
- cbz r0, .Lslow_lock
-.Lretry_lock:
- ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- eor r3, r2, r1 @ Prepare the value to store if unlocked
+.macro LOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_lock, can_be_null
+ ldr \tmp1, [rSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_lock
+ .endif
+1:
+ ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if unlocked
@ (thread id, count of 0 and preserved read barrier bits),
@ or prepare to compare thread id for recursive lock check
@ (lock_word.ThreadId() ^ self->ThreadId()).
- ands ip, r2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
- bne .Lnot_unlocked @ Check if unlocked.
- @ unlocked case - store r3: original lock word plus thread id, preserved read barrier bits.
- strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
- cbnz r2, .Llock_strex_fail @ If store failed, retry.
+ ands ip, \tmp2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
+ bne 2f @ Check if unlocked.
+ @ unlocked case - store tmp3: original lock word plus thread id, preserved read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If store failed, retry.
dmb ish @ Full (LoadLoad|LoadStore) memory barrier.
bx lr
-.Lnot_unlocked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
+2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
#error "Expecting thin lock count and gc state in consecutive bits."
#endif
- @ Check lock word state and thread id together,
- bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
- cbnz r3, .Lslow_lock @ if either of the top two bits are set, or the lock word's
+ @ Check lock word state and thread id together.
+ bfc \tmp3, \
+ #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+ #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+ cbnz \tmp3, \slow_lock @ if either of the top two bits are set, or the lock word's
@ thread id did not match, go slow path.
- add r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count.
+ add \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Increment the recursive lock count.
@ Extract the new thin lock count for overflow check.
- ubfx r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
- cbz r2, .Lslow_lock @ Zero as the new count indicates overflow, go slow path.
- strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits.
- cbnz r2, .Llock_strex_fail @ If strex failed, retry.
+ ubfx \tmp2, \tmp3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #LOCK_WORD_THIN_LOCK_COUNT_SIZE
+ cbz \tmp2, \slow_lock @ Zero as the new count indicates overflow, go slow path.
+ @ strex necessary for read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If strex failed, retry.
bx lr
-.Llock_strex_fail:
- b .Lretry_lock @ retry
-// Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
+3:
+ b 1b @ retry
+.endm
+
+ /*
+ * Entry from managed code that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
+ * r0 holds the possibly null object to lock.
+ */
+ENTRY art_quick_lock_object
+ // Note: the slow path is actually the art_quick_lock_object_no_inline (tail call).
+ LOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Llock_object_slow, /*can_be_null*/ 1
END art_quick_lock_object
+ /*
+ * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
+ * r0 holds the possibly null object to lock.
+ */
+ .extern artLockObjectFromCode
ENTRY art_quick_lock_object_no_inline
// This is also the slow path for art_quick_lock_object. Note that we
// need a local label, the assembler complains about target being out of
// range if we try to jump to `art_quick_lock_object_no_inline`.
-.Lslow_lock:
+.Llock_object_slow:
SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block
mov r1, rSELF @ pass Thread::Current
bl artLockObjectFromCode @ (Object* obj, Thread*)
@@ -548,62 +562,78 @@ ENTRY art_quick_lock_object_no_inline
DELIVER_PENDING_EXCEPTION
END art_quick_lock_object_no_inline
- /*
- * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
- * r0 holds the possibly null object to lock.
- */
- .extern artUnlockObjectFromCode
-ENTRY art_quick_unlock_object
- ldr r1, [rSELF, #THREAD_ID_OFFSET]
- cbz r0, .Lslow_unlock
-.Lretry_unlock:
+.macro UNLOCK_OBJECT_FAST_PATH obj, tmp1, tmp2, tmp3, slow_unlock, can_be_null
+ ldr \tmp1, [rSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_unlock
+ .endif
+1:
#ifndef USE_READ_BARRIER
- ldr r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ ldr \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
#else
@ Need to use atomic instructions for read barrier.
- ldrex r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ ldrex \tmp2, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
#endif
- eor r3, r2, r1 @ Prepare the value to store if simply locked
+ eor \tmp3, \tmp2, \tmp1 @ Prepare the value to store if simply locked
@ (mostly 0s, and preserved read barrier bits),
@ or prepare to compare thread id for recursive lock check
@ (lock_word.ThreadId() ^ self->ThreadId()).
- ands ip, r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
- bne .Lnot_simply_locked @ Locked recursively or by other thread?
+ ands ip, \tmp3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ Test the non-gc bits.
+ bne 2f @ Locked recursively or by other thread?
@ Transition to unlocked.
dmb ish @ Full (LoadStore|StoreStore) memory barrier.
#ifndef USE_READ_BARRIER
- str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
#else
- strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
- cbnz r2, .Lunlock_strex_fail @ If the store failed, retry.
+ @ strex necessary for read barrier bits
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If the store failed, retry.
#endif
bx lr
-.Lnot_simply_locked: @ r2: original lock word, r1: thread_id, r3: r2 ^ r1
+2: @ tmp2: original lock word, tmp1: thread_id, tmp3: tmp2 ^ tmp1
#if LOCK_WORD_THIN_LOCK_COUNT_SHIFT + LOCK_WORD_THIN_LOCK_COUNT_SIZE != LOCK_WORD_GC_STATE_SHIFT
#error "Expecting thin lock count and gc state in consecutive bits."
#endif
@ Check lock word state and thread id together,
- bfc r3, #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
- cbnz r3, .Lslow_unlock @ if either of the top two bits are set, or the lock word's
+ bfc \tmp3, \
+ #LOCK_WORD_THIN_LOCK_COUNT_SHIFT, \
+ #(LOCK_WORD_THIN_LOCK_COUNT_SIZE + LOCK_WORD_GC_STATE_SIZE)
+ cbnz \tmp3, \slow_unlock @ if either of the top two bits are set, or the lock word's
@ thread id did not match, go slow path.
- sub r3, r2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count.
+ sub \tmp3, \tmp2, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ Decrement recursive lock count.
#ifndef USE_READ_BARRIER
- str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ str \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
#else
- strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits.
- cbnz r2, .Lunlock_strex_fail @ If the store failed, retry.
+ @ strex necessary for read barrier bits.
+ strex \tmp2, \tmp3, [\obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+ cbnz \tmp2, 3f @ If the store failed, retry.
#endif
bx lr
-.Lunlock_strex_fail:
- b .Lretry_unlock @ retry
-// Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
+3:
+ b 1b @ retry
+.endm
+
+ /*
+ * Entry from managed code that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
+ * r0 holds the possibly null object to unlock.
+ */
+ENTRY art_quick_unlock_object
+ // Note: the slow path is actually the art_quick_unlock_object_no_inline (tail call).
+ UNLOCK_OBJECT_FAST_PATH r0, r1, r2, r3, .Lunlock_object_slow, /*can_be_null*/ 1
END art_quick_unlock_object
+ /*
+ * Entry from managed code that calls `artUnlockObjectFromCode()`
+ * and delivers exception on failure.
+ * r0 holds the possibly null object to unlock.
+ */
+ .extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object_no_inline
// This is also the slow path for art_quick_unlock_object. Note that we
// need a local label, the assembler complains about target being out of
// range if we try to jump to `art_quick_unlock_object_no_inline`.
-.Lslow_unlock:
+.Lunlock_object_slow:
@ save callee saves in case exception allocation triggers GC
SETUP_SAVE_REFS_ONLY_FRAME r1
mov r1, rSELF @ pass Thread::Current
@@ -615,6 +645,80 @@ ENTRY art_quick_unlock_object_no_inline
END art_quick_unlock_object_no_inline
/*
+ * Entry from JNI stub that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` (the same as for managed code) for the
+ * difficult cases, may block for GC.
+ * Custom calling convention:
+ * r4 holds the non-null object to lock.
+ * Callee-save registers have been saved and can be used as temporaries.
+ * All argument registers need to be preserved.
+ */
+ENTRY art_quick_lock_object_jni
+ LOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Llock_object_jni_slow, /*can_be_null*/ 0
+
+.Llock_object_jni_slow:
+ // Save GPR args r0-r3 and return address. Also save r4 for stack alignment.
+ push {r0-r4, lr}
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset lr, 20
+ // Save FPR args.
+ vpush {s0-s15}
+ .cfi_adjust_cfa_offset 64
+ // Call `artLockObjectFromCode()`
+ mov r0, r4 @ Pass the object to lock.
+ mov r1, rSELF @ Pass Thread::Current().
+ bl artLockObjectFromCode @ (Object* obj, Thread*)
+ // Restore FPR args.
+ vpop {s0-s15}
+ .cfi_adjust_cfa_offset -64
+ // Check result.
+ cbnz r0, 1f
+ // Restore GPR args and r4 and return.
+ pop {r0-r4, pc}
+1:
+ // GPR args are irrelevant when throwing an exception but pop them anyway with the LR we need.
+ pop {r0-r4, lr}
+ .cfi_adjust_cfa_offset -24
+ .cfi_restore lr
+ // Make a tail call to `artDeliverPendingExceptionFromCode()`.
+ // Rely on the JNI transition frame constructed in the JNI stub.
+ mov r0, rSELF @ Pass Thread::Current().
+ b artDeliverPendingExceptionFromCode @ (Thread*)
+END art_quick_lock_object_jni
+
+ /*
+ * Entry from JNI stub that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
+ * is fatal, so we do not need to check for exceptions in the slow path.
+ * Custom calling convention:
+ * r4 holds the non-null object to unlock.
+ * Callee-save registers have been saved and can be used as temporaries.
+ * Return registers r0-r1 and s0-s1 need to be preserved.
+ */
+ .extern artLockObjectFromJni
+ENTRY art_quick_unlock_object_jni
+ UNLOCK_OBJECT_FAST_PATH r4, r5, r6, r7, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+
+ .Lunlock_object_jni_slow:
+ // Save GPR return registers and return address. Also save r4 for stack alignment.
+ push {r0-r1, r4, lr}
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset lr, 12
+ // Save FPR return registers.
+ vpush {s0-s1}
+ .cfi_adjust_cfa_offset 8
+ // Call `artUnlockObjectFromJni()`
+ mov r0, r4 @ Pass the object to unlock.
+ mov r1, rSELF @ Pass Thread::Current().
+ bl artUnlockObjectFromJni @ (Object* obj, Thread*)
+ // Restore FPR return registers.
+ vpop {s0-s1}
+ .cfi_adjust_cfa_offset -8
+ // Restore GPR return registers and r4 and return.
+ pop {r0-r1, r4, pc}
+END art_quick_unlock_object_jni
+
+ /*
* Entry from managed code that calls artInstanceOfFromCode and on failure calls
* artThrowClassCastExceptionForObject.
*/
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e5dbeda42d..657ff7831f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -881,42 +881,52 @@ ENTRY art_quick_do_long_jump
br xIP1
END art_quick_do_long_jump
- /*
- * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
- * possibly null object to lock.
- *
- * Derived from arm32 code.
- */
- .extern artLockObjectFromCode
-ENTRY art_quick_lock_object
- ldr w1, [xSELF, #THREAD_ID_OFFSET]
- cbz w0, art_quick_lock_object_no_inline
+.macro LOCK_OBJECT_FAST_PATH obj, slow_lock, can_be_null
+ // Use scratch registers x8-x11 as temporaries.
+ ldr w9, [xSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_lock
+ .endif
// Exclusive load/store has no immediate anymore.
- add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET
-.Lretry_lock:
- ldaxr w2, [x4] // Acquire needed only in most common case.
- eor w3, w2, w1 // Prepare the value to store if unlocked
+ add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
+1:
+ ldaxr w10, [x8] // Acquire needed only in most common case.
+ eor w11, w10, w9 // Prepare the value to store if unlocked
// (thread id, count of 0 and preserved read barrier bits),
// or prepare to compare thread id for recursive lock check
// (lock_word.ThreadId() ^ self->ThreadId()).
- tst w2, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits.
- b.ne .Lnot_unlocked // Check if unlocked.
- // unlocked case - store w3: original lock word plus thread id, preserved read barrier bits.
- stxr w2, w3, [x4]
- cbnz w2, .Lretry_lock // If the store failed, retry.
+ tst w10, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits.
+ b.ne 2f // Check if unlocked.
+ // Unlocked case - store w11: original lock word plus thread id, preserved read barrier bits.
+ stxr w10, w11, [x8]
+ cbnz w10, 1b // If the store failed, retry.
ret
-.Lnot_unlocked: // w2: original lock word, w1: thread id, w3: w2 ^ w1
+2: // w10: original lock word, w9: thread id, w11: w10 ^ w11
// Check lock word state and thread id together,
- tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
- b.ne art_quick_lock_object_no_inline
- add w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count.
- tst w3, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count.
- b.eq art_quick_lock_object_no_inline // Zero as the new count indicates overflow, go slow path.
- stxr w2, w3, [x4]
- cbnz w2, .Lretry_lock // If the store failed, retry.
+ tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
+ b.ne \slow_lock
+ add w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // Increment the recursive lock count.
+ tst w11, #LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED // Test the new thin lock count.
+ b.eq \slow_lock // Zero as the new count indicates overflow, go slow path.
+ stxr w10, w11, [x8]
+ cbnz w10, 1b // If the store failed, retry.
ret
+.endm
+
+ /*
+ * Entry from managed code that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
+ * x0 holds the possibly null object to lock.
+ */
+ENTRY art_quick_lock_object
+ LOCK_OBJECT_FAST_PATH x0, art_quick_lock_object_no_inline, /*can_be_null*/ 1
END art_quick_lock_object
+ /*
+ * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
+ * x0 holds the possibly null object to lock.
+ */
+ .extern artLockObjectFromCode
ENTRY art_quick_lock_object_no_inline
// This is also the slow path for art_quick_lock_object.
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block
@@ -927,52 +937,63 @@ ENTRY art_quick_lock_object_no_inline
RETURN_IF_W0_IS_ZERO_OR_DELIVER
END art_quick_lock_object_no_inline
- /*
- * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
- * x0 holds the possibly null object to lock.
- *
- * Derived from arm32 code.
- */
- .extern artUnlockObjectFromCode
-ENTRY art_quick_unlock_object
- ldr w1, [xSELF, #THREAD_ID_OFFSET]
- cbz x0, art_quick_unlock_object_no_inline
+.macro UNLOCK_OBJECT_FAST_PATH obj, slow_unlock, can_be_null
+ // Use scratch registers x8-x11 as temporaries.
+ ldr w9, [xSELF, #THREAD_ID_OFFSET]
+ .if \can_be_null
+ cbz \obj, \slow_unlock
+ .endif
// Exclusive load/store has no immediate anymore.
- add x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET
-.Lretry_unlock:
+ add x8, \obj, #MIRROR_OBJECT_LOCK_WORD_OFFSET
+1:
#ifndef USE_READ_BARRIER
- ldr w2, [x4]
+ ldr w10, [x8]
#else
- ldxr w2, [x4] // Need to use atomic instructions for read barrier.
+ ldxr w10, [x8] // Need to use atomic instructions for read barrier.
#endif
- eor w3, w2, w1 // Prepare the value to store if simply locked
+ eor w11, w10, w9 // Prepare the value to store if simply locked
// (mostly 0s, and preserved read barrier bits),
// or prepare to compare thread id for recursive lock check
// (lock_word.ThreadId() ^ self->ThreadId()).
- tst w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits.
- b.ne .Lnot_simply_locked // Locked recursively or by other thread?
+ tst w11, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED // Test the non-gc bits.
+ b.ne 2f // Locked recursively or by other thread?
// Transition to unlocked.
#ifndef USE_READ_BARRIER
- stlr w3, [x4]
+ stlr w11, [x8]
#else
- stlxr w2, w3, [x4] // Need to use atomic instructions for read barrier.
- cbnz w2, .Lretry_unlock // If the store failed, retry.
+ stlxr w10, w11, [x8] // Need to use atomic instructions for read barrier.
+ cbnz w10, 1b // If the store failed, retry.
#endif
ret
-.Lnot_simply_locked:
- // Check lock word state and thread id together,
- tst w3, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
+2:
+ // Check lock word state and thread id together.
+ tst w11, #(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED)
b.ne art_quick_unlock_object_no_inline
- sub w3, w2, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count
+ sub w11, w10, #LOCK_WORD_THIN_LOCK_COUNT_ONE // decrement count
#ifndef USE_READ_BARRIER
- str w3, [x4]
+ str w11, [x8]
#else
- stxr w2, w3, [x4] // Need to use atomic instructions for read barrier.
- cbnz w2, .Lretry_unlock // If the store failed, retry.
+ stxr w10, w11, [x8] // Need to use atomic instructions for read barrier.
+ cbnz w10, 1b // If the store failed, retry.
#endif
ret
+.endm
+
+ /*
+ * Entry from managed code that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
+ * x0 holds the possibly null object to unlock.
+ */
+ENTRY art_quick_unlock_object
+ UNLOCK_OBJECT_FAST_PATH x0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1
END art_quick_unlock_object
+ /*
+ * Entry from managed code that calls `artUnlockObjectFromCode()`
+ * and delivers exception on failure.
+ * x0 holds the possibly null object to unlock.
+ */
+ .extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object_no_inline
// This is also the slow path for art_quick_unlock_object.
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC
@@ -984,6 +1005,91 @@ ENTRY art_quick_unlock_object_no_inline
END art_quick_unlock_object_no_inline
/*
+ * Entry from JNI stub that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` (the same as for managed code) for the
+ * difficult cases, may block for GC.
+ * Custom calling convention:
+ * x15 holds the non-null object to lock.
+ * Callee-save registers have been saved and can be used as temporaries.
+ * All argument registers need to be preserved.
+ */
+ENTRY art_quick_lock_object_jni
+ LOCK_OBJECT_FAST_PATH x15, .Llock_object_jni_slow, /*can_be_null*/ 0
+
+.Llock_object_jni_slow:
+ // Save register args x0-x7, d0-d7 and return address.
+ stp x0, x1, [sp, #-(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)]!
+ .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
+ stp d0, d1, [sp, #64]
+ stp d2, d3, [sp, #80]
+ stp d4, d5, [sp, #96]
+ stp d6, d7, [sp, #112]
+ str lr, [sp, #136]
+ .cfi_rel_offset lr, 136
+ // Call `artLockObjectFromCode()`
+ mov x0, x15 // Pass the object to lock.
+ mov x1, xSELF // Pass Thread::Current().
+ bl artLockObjectFromCode // (Object* obj, Thread*)
+ // Restore return address.
+ ldr lr, [sp, #136]
+ .cfi_restore lr
+ // Check result.
+ cbnz x0, 1f
+ // Restore register args x0-x7, d0-d7 and return.
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ ldp d0, d1, [sp, #64]
+ ldp d2, d3, [sp, #80]
+ ldp d4, d5, [sp, #96]
+ ldp d6, d7, [sp, #112]
+ ldp x0, x1, [sp], #(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
+ .cfi_adjust_cfa_offset -(8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
+ ret
+ .cfi_adjust_cfa_offset (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
+1:
+ // All args are irrelevant when throwing an exception. Remove the spill area.
+ DECREASE_FRAME (8 * 8 + 8 * 8 + /*padding*/ 8 + 8)
+ // Make a tail call to `artDeliverPendingExceptionFromCode()`.
+ // Rely on the JNI transition frame constructed in the JNI stub.
+ mov x0, xSELF // Pass Thread::Current().
+ b artDeliverPendingExceptionFromCode // (Thread*)
+END art_quick_lock_object_jni
+
+ /*
+ * Entry from JNI stub that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
+ * is fatal, so we do not need to check for exceptions in the slow path.
+ * Custom calling convention:
+ * x15 holds the non-null object to unlock.
+ * Callee-save registers have been saved and can be used as temporaries.
+ * Return registers r0 and d0 need to be preserved.
+ */
+ENTRY art_quick_unlock_object_jni
+ UNLOCK_OBJECT_FAST_PATH x15, .Lunlock_object_jni_slow, /*can_be_null*/ 0
+
+ .Lunlock_object_jni_slow:
+ // Save return registers and return address.
+ stp x0, lr, [sp, #-32]!
+ .cfi_adjust_cfa_offset 32
+ .cfi_rel_offset lr, 8
+ str d0, [sp, #16]
+ // Call `artUnlockObjectFromJni()`
+ mov x0, x15 // Pass the object to unlock.
+ mov x1, xSELF // Pass Thread::Current().
+ bl artUnlockObjectFromJni // (Object* obj, Thread*)
+ // Restore return registers and return.
+ ldr d0, [sp, #16]
+ ldp x0, lr, [sp], #32
+ .cfi_adjust_cfa_offset -32
+ .cfi_restore lr
+ ret
+END art_quick_unlock_object_jni
+
+ /*
* Entry from managed code that calls artInstanceOfFromCode and on failure calls
* artThrowClassCastExceptionForObject.
*/
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2f6af4f5de..d16f15ca21 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1133,145 +1133,236 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC
TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
-DEFINE_FUNCTION art_quick_lock_object
- testl %eax, %eax // null check object/eax
- jz .Lslow_lock
-.Lretry_lock:
- movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word
- test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // test the 2 high bits.
- jne .Lslow_lock // slow path if either of the two high bits are set.
- movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
- test %ecx, %ecx
- jnz .Lalready_thin // lock word contains a thin lock
- // unlocked case - edx: original lock word, eax: obj.
- movl %eax, %ecx // remember object in case of retry
- movl %edx, %eax // eax: lock word zero except for read barrier bits.
- movl %fs:THREAD_ID_OFFSET, %edx // load thread id.
- or %eax, %edx // edx: thread id with count of 0 + read barrier bits.
- lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val.
- jnz .Llock_cmpxchg_fail // cmpxchg failed retry
+MACRO4(LOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_lock)
+1:
+ movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word
+ movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id.
+ xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits.
+ testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits.
+ jnz 2f // Check if unlocked.
+ // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
+ // EAX: old val, tmp: new val.
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
+ .ifnc \saved_eax, none
+ movl REG_VAR(saved_eax), %eax // Restore EAX.
+ .endif
ret
-.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), eax: obj.
- movl %fs:THREAD_ID_OFFSET, %ecx // ecx := thread id
- cmpw %cx, %dx // do we hold the lock already?
- jne .Lslow_lock
- movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the read barrier bits.
- addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count for overflow check.
- test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // overflowed if the first gc state bit is set.
- jne .Lslow_lock // count overflowed so go slow
- movl %eax, %ecx // save obj to use eax for cmpxchg.
- movl %edx, %eax // copy the lock word as the old val for cmpxchg.
- addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
- // update lockword, cmpxchg necessary for read barrier bits.
- lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%ecx) // eax: old val, edx: new val.
- jnz .Llock_cmpxchg_fail // cmpxchg failed retry
+2: // EAX: original lock word, tmp: thread id ^ EAX
+ // Check lock word state and thread id together,
+ testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+ REG_VAR(tmp)
+ jne \slow_lock // Slow path if either of the two high bits are set.
+ // Increment the recursive lock count.
+ leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+ testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
+ jz \slow_lock // If count overflowed, go to slow lock.
+ // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
+ // EAX: old val, tmp: new val.
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
+ .ifnc \saved_eax, none
+ movl REG_VAR(saved_eax), %eax // Restore EAX.
+ .endif
ret
-.Llock_cmpxchg_fail:
- movl %ecx, %eax // restore eax
- jmp .Lretry_lock
-.Lslow_lock:
- SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC
- // Outgoing argument set up
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass object
- call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*)
- addl LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
- RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
- RETURN_IF_EAX_ZERO
+END_MACRO
+
+ /*
+ * Entry from managed code that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
+ * EAX holds the possibly null object to lock.
+ */
+DEFINE_FUNCTION art_quick_lock_object
+ testl %eax, %eax
+ jz SYMBOL(art_quick_lock_object_no_inline)
+ movl %eax, %ecx // Move obj to a different register.
+ LOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Llock_object_slow
+.Llock_object_slow:
+ movl %ecx, %eax // Move obj back to EAX.
+ jmp SYMBOL(art_quick_lock_object_no_inline)
END_FUNCTION art_quick_lock_object
+ /*
+ * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
+ * EAX holds the possibly null object to lock.
+ */
DEFINE_FUNCTION art_quick_lock_object_no_inline
+ // This is also the slow path for art_quick_lock_object.
SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC
// Outgoing argument set up
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+ INCREASE_FRAME 8 // alignment padding
pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass object
+ PUSH_ARG eax // pass object
call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*)
- addl LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
+ DECREASE_FRAME 16 // pop arguments
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_IF_EAX_ZERO
END_FUNCTION art_quick_lock_object_no_inline
-
-DEFINE_FUNCTION art_quick_unlock_object
- testl %eax, %eax // null check object/eax
- jz .Lslow_unlock
-.Lretry_unlock:
- movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax), %ecx // ecx := lock word
- movl %fs:THREAD_ID_OFFSET, %edx // edx := thread id
- test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
- jnz .Lslow_unlock // lock word contains a monitor
- cmpw %cx, %dx // does the thread id match?
- jne .Lslow_unlock
- movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
- cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
- jae .Lrecursive_thin_unlock
- // update lockword, cmpxchg necessary for read barrier bits.
- movl %eax, %edx // edx: obj
- movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original rb bits.
+MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_eax, slow_unlock)
+1:
+ movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word
+ movl %fs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id
+ xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word
+ test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
+ jnz 2f // Check if simply locked.
+ // Transition to unlocked.
#ifndef USE_READ_BARRIER
- movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
#else
- lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val.
- jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
#endif
+ .ifnc \saved_eax, none
+ movl REG_VAR(saved_eax), %eax // Restore EAX.
+ .endif
ret
-.Lrecursive_thin_unlock: // ecx: original lock word, eax: obj
- // update lockword, cmpxchg necessary for read barrier bits.
- movl %eax, %edx // edx: obj
- movl %ecx, %eax // eax: old lock word.
- subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // ecx: new lock word with decremented count.
+2: // EAX: original lock word, tmp: lock_word ^ thread id
+ // Check lock word state and thread id together.
+ testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+ REG_VAR(tmp)
+ jnz \slow_unlock
+ // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
+ // tmp: new lock word with decremented count.
+ leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
#ifndef USE_READ_BARRIER
- mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+ movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
#else
- lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx) // eax: old val, ecx: new val.
- jnz .Lunlock_cmpxchg_fail // cmpxchg failed retry
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
#endif
+ .ifnc \saved_eax, none
+ movl REG_VAR(saved_eax), %eax // Restore EAX.
+ .endif
ret
-.Lunlock_cmpxchg_fail: // edx: obj
- movl %edx, %eax // restore eax
- jmp .Lretry_unlock
-.Lslow_unlock:
- SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC
- // Outgoing argument set up
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
- pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
- CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass object
- call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*)
- addl LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
- RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
- RETURN_IF_EAX_ZERO
+END_MACRO
+
+ /*
+ * Entry from managed code that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
+ * EAX holds the possibly null object to unlock.
+ */
+DEFINE_FUNCTION art_quick_unlock_object
+ testl %eax, %eax
+ jz SYMBOL(art_quick_unlock_object_no_inline)
+ movl %eax, %ecx // Move obj to a different register.
+ UNLOCK_OBJECT_FAST_PATH ecx, edx, /*saved_eax*/ none, .Lunlock_object_slow
+.Lunlock_object_slow:
+ movl %ecx, %eax // Move obj back to EAX.
+ jmp SYMBOL(art_quick_unlock_object_no_inline)
END_FUNCTION art_quick_unlock_object
+ /*
+ * Entry from managed code that calls `artUnlockObjectFromCode()`
+ * and delivers exception on failure.
+ * EAX holds the possibly null object to unlock.
+ */
DEFINE_FUNCTION art_quick_unlock_object_no_inline
+ // This is also the slow path for art_quick_unlock_object.
SETUP_SAVE_REFS_ONLY_FRAME ebx // save ref containing registers for GC
// Outgoing argument set up
- subl LITERAL(8), %esp // alignment padding
- CFI_ADJUST_CFA_OFFSET(8)
+ INCREASE_FRAME 8 // alignment padding
pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
CFI_ADJUST_CFA_OFFSET(4)
- PUSH eax // pass object
+ PUSH_ARG eax // pass object
call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*)
- addl LITERAL(16), %esp // pop arguments
- CFI_ADJUST_CFA_OFFSET(-16)
+ DECREASE_FRAME 16 // pop arguments
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_IF_EAX_ZERO
END_FUNCTION art_quick_unlock_object_no_inline
+ /*
+ * Entry from JNI stub that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` (the same as for managed code) for the
+ * difficult cases, may block for GC.
+ * Custom calling convention:
+ * EBP holds the non-null object to lock.
+ * Callee-save registers have been saved and can be used as temporaries (except EBP).
+ * All argument registers need to be preserved.
+ */
+DEFINE_FUNCTION art_quick_lock_object_jni
+ movl %eax, %edi // Preserve EAX in a callee-save register.
+ LOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi .Llock_object_jni_slow
+
+.Llock_object_jni_slow:
+ // Save register args EAX, ECX, EDX, EBX, mmx0-mmx3 and align stack.
+ PUSH_ARG ebx
+ PUSH_ARG edx
+ PUSH_ARG ecx
+ PUSH_ARG edi // Original contents of EAX.
+ INCREASE_FRAME (/*FPRs*/ 4 * 8 + /*padding*/ 4) // Make xmm<n> spill slots 8-byte aligned.
+ movsd %xmm0, 0(%esp)
+ movsd %xmm1, 8(%esp)
+ movsd %xmm2, 16(%esp)
+ movsd %xmm3, 24(%esp)
+ // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
+ // Call `artLockObjectFromCode()`
+ pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
+ CFI_ADJUST_CFA_OFFSET(4)
+ PUSH_ARG ebp // Pass the object to lock.
+ call SYMBOL(artLockObjectFromCode) // (object, Thread*)
+ // Check result.
+ testl %eax, %eax
+ jnz 1f
+ // Restore register args EAX, ECX, EDX, EBX, mmx0-mmx3 and return.
+ movsd 8(%esp), %xmm0
+ movsd 16(%esp), %xmm1
+ movsd 24(%esp), %xmm2
+ movsd 32(%esp), %xmm3
+ DECREASE_FRAME /*call args*/ 8 + /*FPR args*/ 4 * 8 + /*padding*/ 4
+ POP_ARG eax
+ POP_ARG ecx
+ POP_ARG edx
+ POP_ARG ebx
+ ret
+ .cfi_adjust_cfa_offset (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4)
+1:
+ // All args are irrelevant when throwing an exception.
+ // Remove the spill area except for new padding to align stack.
+ DECREASE_FRAME \
+ (/*call args*/ 8 + /*FPRs*/ 4 * 8 + /*padding*/ 4 + /*GPRs*/ 4 * 4 - /*new padding*/ 8)
+ // Rely on the JNI transition frame constructed in the JNI stub.
+ pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ CFI_ADJUST_CFA_OFFSET(4)
+ call SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*)
+ UNREACHABLE
+END_FUNCTION art_quick_lock_object_jni
+
+ /*
+ * Entry from JNI stub that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
+ * is fatal, so we do not need to check for exceptions in the slow path.
+ * Custom calling convention:
+ * EBP holds the non-null object to unlock.
+ * Callee-save registers have been saved and can be used as temporaries (except EBP).
+ * Return registers EAX, EDX and mmx0 need to be preserved.
+ */
+ .extern artLockObjectFromJni
+DEFINE_FUNCTION art_quick_unlock_object_jni
+ movl %eax, %edi // Preserve EAX in a different register.
+ UNLOCK_OBJECT_FAST_PATH ebp, esi, /*saved_eax*/ edi, .Lunlock_object_jni_slow
+
+ .Lunlock_object_jni_slow:
+ // Save return registers.
+ PUSH_ARG edx
+ PUSH_ARG edi // Original contents of EAX.
+ INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 4
+ movsd %xmm0, 0(%esp)
+ // Note: The stack is not 16-byte aligned here but it shall be after pushing args for the call.
+ // Call `artUnlockObjectFromJni()`
+ pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current().
+ CFI_ADJUST_CFA_OFFSET(4)
+ PUSH_ARG ebp // Pass the object to unlock.
+ call SYMBOL(artUnlockObjectFromJni) // (object, Thread*)
+ // Restore return registers and return.
+ movsd 8(%esp), %xmm0
+ DECREASE_FRAME /*call args*/ 8 + /*xmm0*/ 8 + /*padding*/ 4
+ POP_ARG eax
+ POP_ARG edx
+ ret
+END_FUNCTION art_quick_unlock_object_jni
+
DEFINE_FUNCTION art_quick_instance_of
PUSH eax // alignment padding
PUSH ecx // pass arg2 - obj->klass
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 136198fe55..06715858a1 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1068,48 +1068,50 @@ ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromC
TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
-DEFINE_FUNCTION art_quick_lock_object
- testl %edi, %edi // Null check object/rdi.
- jz .Lslow_lock
-.Lretry_lock:
- movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word.
- test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx // Test the 2 high bits.
- jne .Lslow_lock // Slow path if either of the two high bits are set.
- movl %ecx, %edx // save lock word (edx) to keep read barrier bits.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
- test %ecx, %ecx
- jnz .Lalready_thin // Lock word contains a thin lock.
- // unlocked case - edx: original lock word, edi: obj.
- movl %edx, %eax // eax: lock word zero except for read barrier bits.
- movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id
- or %eax, %edx // edx: thread id with count of 0 + read barrier bits.
- lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
- jnz .Lretry_lock // cmpxchg failed retry
+MACRO3(LOCK_OBJECT_FAST_PATH, obj, tmp, slow_lock)
+1:
+ movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word
+ movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp: thread id.
+ xorl %eax, REG_VAR(tmp) // tmp: thread id with count 0 + read barrier bits.
+ testl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %eax // Test the non-gc bits.
+ jnz 2f // Check if unlocked.
+ // Unlocked case - store tmp: original lock word plus thread id, preserved read barrier bits.
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
ret
-.Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
- movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id
- cmpw %cx, %dx // do we hold the lock already?
- jne .Lslow_lock
- movl %edx, %ecx // copy the lock word to check count overflow.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits.
- addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count
- test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set
- jne .Lslow_lock // count overflowed so go slow
- movl %edx, %eax // copy the lock word as the old val for cmpxchg.
- addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real.
- // update lockword, cmpxchg necessary for read barrier bits.
- lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val.
- jnz .Lretry_lock // cmpxchg failed retry
+2: // EAX: original lock word, tmp: thread id ^ EAX
+ // Check lock word state and thread id together,
+ testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+ REG_VAR(tmp)
+ jne \slow_lock // Slow path if either of the two high bits are set.
+ // Increment the recursive lock count.
+ leal LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
+ testl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_MASK_SHIFTED), REG_VAR(tmp)
+ je \slow_lock // If count overflowed, go to slow lock.
+ // Update lockword for recursive lock, cmpxchg necessary for read barrier bits.
+ // EAX: old val, tmp: new val.
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
ret
-.Lslow_lock:
- SETUP_SAVE_REFS_ONLY_FRAME
- movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
- call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
- RETURN_IF_EAX_ZERO
+END_MACRO
+
+ /*
+ * Entry from managed code that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
+ * RDI holds the possibly null object to lock.
+ */
+DEFINE_FUNCTION art_quick_lock_object
+ testq %rdi, %rdi // Null check object.
+ jz art_quick_lock_object_no_inline
+ LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline
END_FUNCTION art_quick_lock_object
+ /*
+ * Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
+ * RDI holds the possibly null object to lock.
+ */
DEFINE_FUNCTION art_quick_lock_object_no_inline
+ // This is also the slow path for art_quick_lock_object.
SETUP_SAVE_REFS_ONLY_FRAME
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*)
@@ -1117,50 +1119,63 @@ DEFINE_FUNCTION art_quick_lock_object_no_inline
RETURN_IF_EAX_ZERO
END_FUNCTION art_quick_lock_object_no_inline
-DEFINE_FUNCTION art_quick_unlock_object
- testl %edi, %edi // null check object/edi
- jz .Lslow_unlock
-.Lretry_unlock:
- movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word
- movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id
- test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
- jnz .Lslow_unlock // lock word contains a monitor
- cmpw %cx, %dx // does the thread id match?
- jne .Lslow_unlock
- movl %ecx, %edx // copy the lock word to detect new count of 0.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits.
- cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
- jae .Lrecursive_thin_unlock
- // update lockword, cmpxchg necessary for read barrier bits.
- movl %ecx, %eax // eax: old lock word.
- andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits.
+MACRO4(UNLOCK_OBJECT_FAST_PATH, obj, tmp, saved_rax, slow_unlock)
+1:
+ movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj)), %eax // EAX := lock word
+ movl %gs:THREAD_ID_OFFSET, REG_VAR(tmp) // tmp := thread id
+ xorl %eax, REG_VAR(tmp) // tmp := thread id ^ lock word
+ test LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), REG_VAR(tmp)
+ jnz 2f // Check if simply locked.
+ // Transition to unlocked.
#ifndef USE_READ_BARRIER
- movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+ movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
#else
- lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val.
- jnz .Lretry_unlock // cmpxchg failed retry
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
#endif
+ .ifnc \saved_rax, none
+ movq REG_VAR(saved_rax), %rax // Restore RAX.
+ .endif
ret
-.Lrecursive_thin_unlock: // ecx: original lock word, edi: obj
- // update lockword, cmpxchg necessary for read barrier bits.
- movl %ecx, %eax // eax: old lock word.
- subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
+2: // EAX: original lock word, tmp: lock_word ^ thread id
+ // Check lock word state and thread id together.
+ testl LITERAL(LOCK_WORD_STATE_MASK_SHIFTED | LOCK_WORD_THIN_LOCK_OWNER_MASK_SHIFTED), \
+ REG_VAR(tmp)
+ jnz \slow_unlock
+ // Update lockword for recursive unlock, cmpxchg necessary for read barrier bits.
+ // tmp: new lock word with decremented count.
+ leal -LOCK_WORD_THIN_LOCK_COUNT_ONE(%eax), REG_VAR(tmp)
#ifndef USE_READ_BARRIER
- mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
+ // EAX: new lock word with decremented count.
+ movl REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
#else
- lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val.
- jnz .Lretry_unlock // cmpxchg failed retry
+ lock cmpxchg REG_VAR(tmp), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(obj))
+ jnz 1b // cmpxchg failed retry
#endif
+ .ifnc \saved_rax, none
+ movq REG_VAR(saved_rax), %rax // Restore RAX.
+ .endif
ret
-.Lslow_unlock:
- SETUP_SAVE_REFS_ONLY_FRAME
- movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
- call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*)
- RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
- RETURN_IF_EAX_ZERO
+END_MACRO
+
+ /*
+ * Entry from managed code that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
+ * RDI holds the possibly null object to unlock.
+ */
+DEFINE_FUNCTION art_quick_unlock_object
+ testq %rdi, %rdi // Null check object.
+ jz art_quick_lock_object_no_inline
+ UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline
END_FUNCTION art_quick_unlock_object
+ /*
+ * Entry from managed code that calls `artUnlockObjectFromCode()`
+ * and delivers exception on failure.
+ * RDI holds the possibly null object to unlock.
+ */
DEFINE_FUNCTION art_quick_unlock_object_no_inline
+ // This is also the slow path for art_quick_unlock_object.
SETUP_SAVE_REFS_ONLY_FRAME
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*)
@@ -1168,6 +1183,97 @@ DEFINE_FUNCTION art_quick_unlock_object_no_inline
RETURN_IF_EAX_ZERO
END_FUNCTION art_quick_unlock_object_no_inline
+ /*
+ * Entry from JNI stub that tries to lock the object in a fast path and
+ * calls `artLockObjectFromCode()` (the same as for managed code) for the
+ * difficult cases, may block for GC.
+ * Custom calling convention:
+ * RBX holds the non-null object to lock.
+ * Callee-save registers have been saved and can be used as temporaries (except RBX).
+ * All argument registers need to be preserved.
+ */
+DEFINE_FUNCTION art_quick_lock_object_jni
+ LOCK_OBJECT_FAST_PATH rbx, ebp, .Llock_object_jni_slow
+
+.Llock_object_jni_slow:
+ // Save register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and align stack.
+ PUSH_ARG r9
+ PUSH_ARG r8
+ PUSH_ARG rcx
+ PUSH_ARG rdx
+ PUSH_ARG rsi
+ PUSH_ARG rdi
+ INCREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8)
+ movsd %xmm0, 0(%rsp)
+ movsd %xmm1, 8(%rsp)
+ movsd %xmm2, 16(%rsp)
+ movsd %xmm3, 24(%rsp)
+ movsd %xmm4, 32(%rsp)
+ movsd %xmm5, 40(%rsp)
+ movsd %xmm6, 48(%rsp)
+ movsd %xmm7, 56(%rsp)
+ // Call `artLockObjectFromCode()`
+ movq %rbx, %rdi // Pass the object to lock.
+ movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current().
+ call SYMBOL(artLockObjectFromCode) // (object, Thread*)
+ // Check result.
+ testl %eax, %eax
+ jnz 1f
+ // Restore register args RDI, RSI, RDX, RCX, R8, R9, mmx0-mmx7 and return.
+ movsd 0(%esp), %xmm0
+ movsd 8(%esp), %xmm1
+ movsd 16(%esp), %xmm2
+ movsd 24(%esp), %xmm3
+ movsd 32(%esp), %xmm4
+ movsd 40(%esp), %xmm5
+ movsd 48(%esp), %xmm6
+ movsd 56(%esp), %xmm7
+ DECREASE_FRAME /*FPR args*/ 8 * 8 + /*padding*/ 8
+ POP_ARG rdi
+ POP_ARG rsi
+ POP_ARG rdx
+ POP_ARG rcx
+ POP_ARG r8
+ POP_ARG r9
+ ret
+ .cfi_adjust_cfa_offset (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8)
+1:
+ // All args are irrelevant when throwing an exception. Remove the spill area.
+ DECREASE_FRAME (/*FPRs*/ 8 * 8 + /*padding*/ 8 + /*GPRs*/ 6 * 8)
+ // Rely on the JNI transition frame constructed in the JNI stub.
+ movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread::Current().
+ jmp SYMBOL(artDeliverPendingExceptionFromCode) // (Thread*); tail call.
+END_FUNCTION art_quick_lock_object_jni
+
+ /*
+ * Entry from JNI stub that tries to unlock the object in a fast path and calls
+ * `artUnlockObjectFromJni()` for the difficult cases. Note that failure to unlock
+ * is fatal, so we do not need to check for exceptions in the slow path.
+ * Custom calling convention:
+ * RBX holds the non-null object to unlock.
+ * Callee-save registers have been saved and can be used as temporaries (except RBX).
+ * Return registers RAX and mmx0 need to be preserved.
+ */
+DEFINE_FUNCTION art_quick_unlock_object_jni
+ movq %rax, %r12 // Preserve RAX in a different register.
+ UNLOCK_OBJECT_FAST_PATH rbx, ebp, /*saved_rax*/ r12, .Lunlock_object_jni_slow
+
+ .Lunlock_object_jni_slow:
+ // Save return registers and return address.
+ PUSH_ARG r12 // Original contents of RAX.
+ INCREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
+ movsd %xmm0, 0(%rsp)
+ // Call `artUnlockObjectFromJni()`
+ movq %rbx, %rdi // Pass the object to unlock.
+ movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread::Current().
+ call SYMBOL(artUnlockObjectFromJni) // (object, Thread*)
+ // Restore return registers and return.
+ movsd 0(%rsp), %xmm0
+ DECREASE_FRAME /*mmx0*/ 8 + /*padding*/ 8
+ POP_ARG rax
+ ret
+END_FUNCTION art_quick_unlock_object_jni
+
DEFINE_FUNCTION art_quick_check_instance_of
// Type check using the bit string passes null as the target class. In that case just throw.
testl %esi, %esi
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 3333b5fe0e..fd6bf1fb86 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -777,23 +777,27 @@ inline bool NeedsClinitCheckBeforeCall(ArtMethod* method) {
return method->IsStatic() && !method->IsConstructor();
}
-inline jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called)
+inline ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(!called->IsCriticalNative());
DCHECK(!called->IsFastNative());
DCHECK(self->GetManagedStack()->GetTopQuickFrame() != nullptr);
DCHECK_EQ(*self->GetManagedStack()->GetTopQuickFrame(), called);
+ // We do not need read barriers here.
+ // On method entry, all reference arguments are to-space references and we mark the
+ // declaring class of a static native method if needed. When visiting thread roots at
+ // the start of a GC, we visit all these references to ensure they point to the to-space.
if (called->IsStatic()) {
// Static methods synchronize on the declaring class object.
- // The `jclass` is a pointer to the method's declaring class.
- return reinterpret_cast<jobject>(called->GetDeclaringClassAddressWithoutBarrier());
+ return called->GetDeclaringClass<kWithoutReadBarrier>();
} else {
// Instance methods synchronize on the `this` object.
// The `this` reference is stored in the first out vreg in the caller's frame.
- // The `jobject` is a pointer to the spill slot.
uint8_t* sp = reinterpret_cast<uint8_t*>(self->GetManagedStack()->GetTopQuickFrame());
size_t frame_size = RuntimeCalleeSaveFrame::GetFrameSize(CalleeSaveType::kSaveRefsAndArgs);
- return reinterpret_cast<jobject>(sp + frame_size + static_cast<size_t>(kRuntimePointerSize));
+ StackReference<mirror::Object>* this_ref = reinterpret_cast<StackReference<mirror::Object>*>(
+ sp + frame_size + static_cast<size_t>(kRuntimePointerSize));
+ return this_ref->AsMirrorPtr();
}
}
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 72b4c030f8..4731a867d2 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -217,7 +217,7 @@ bool NeedsClinitCheckBeforeCall(ArtMethod* method) REQUIRES_SHARED(Locks::mutato
// Returns the synchronization object for a native method for a GenericJni frame
// we have just created or are about to exit. The synchronization object is
// the class object for static methods and the `this` object otherwise.
-jobject GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called)
+ObjPtr<mirror::Object> GetGenericJniSynchronizationObject(Thread* self, ArtMethod* called)
REQUIRES_SHARED(Locks::mutator_lock_);
// Update .bss method entrypoint if the `callee_reference` has an associated oat file
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index 6ecf3fd59c..f43e25fec1 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -114,9 +114,13 @@ extern "C" void art_quick_invoke_super_trampoline_with_access_check(uint32_t, vo
extern "C" void art_quick_invoke_virtual_trampoline_with_access_check(uint32_t, void*);
-// JNI read barrier entrypoint.
+// JNI read barrier entrypoint. Note: Preserves all registers.
extern "C" void art_read_barrier_jni(art::ArtMethod* method);
+// JNI lock/unlock entrypoints. Note: Custom calling convention.
+extern "C" void art_quick_lock_object_jni(art::mirror::Object*);
+extern "C" void art_quick_unlock_object_jni(art::mirror::Object*);
+
// Polymorphic invoke entrypoints.
extern "C" void art_quick_invoke_polymorphic(uint32_t, void*);
extern "C" void art_quick_invoke_custom(uint32_t, void*);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 9f1766d3f2..df52e2344d 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -74,13 +74,12 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp
// JNI
qpoints->pJniMethodStart = JniMethodStart;
- qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
qpoints->pJniMethodEnd = JniMethodEnd;
- qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
- qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
qpoints->pJniDecodeReferenceResult = JniDecodeReferenceResult;
+ qpoints->pJniLockObject = art_quick_lock_object_jni;
+ qpoints->pJniUnlockObject = art_quick_unlock_object_jni;
// Locks
if (UNLIKELY(VLOG_IS_ON(systrace_lock_logging))) {
@@ -137,12 +136,8 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp
PaletteShouldReportJniInvocations(&should_report);
if (should_report) {
qpoints->pJniMethodStart = JniMonitoredMethodStart;
- qpoints->pJniMethodStartSynchronized = JniMonitoredMethodStartSynchronized;
qpoints->pJniMethodEnd = JniMonitoredMethodEnd;
- qpoints->pJniMethodEndSynchronized = JniMonitoredMethodEndSynchronized;
qpoints->pJniMethodEndWithReference = JniMonitoredMethodEndWithReference;
- qpoints->pJniMethodEndWithReferenceSynchronized =
- JniMonitoredMethodEndWithReferenceSynchronized;
}
}
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 377a63ee41..cf5c697b76 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -55,35 +55,19 @@ struct PACKED(4) QuickEntryPoints {
// JNI entrypoints.
// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
extern void JniMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern void JniMethodStartSynchronized(jobject to_lock, Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern void JniMethodEnd(Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern void JniMethodEndSynchronized(jobject locked, Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
- jobject locked,
- Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
// JNI entrypoints when monitoring entry/exit.
extern void JniMonitoredMethodStart(Thread* self) NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern void JniMonitoredMethodEnd(Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self)
NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
-extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized(jobject result,
- jobject locked,
- Thread* self)
- NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
extern "C" mirror::String* artStringBuilderAppend(uint32_t format,
@@ -93,6 +77,8 @@ extern "C" mirror::String* artStringBuilderAppend(uint32_t format,
extern "C" void artReadBarrierJni(ArtMethod* method)
REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR;
+extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self)
+ REQUIRES_SHARED(Locks::mutator_lock_) HOT_ATTR;
// Read barrier entrypoints.
//
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index a77e849d32..09ce9438ea 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -73,12 +73,11 @@
V(AputObject, void, mirror::Array*, int32_t, mirror::Object*) \
\
V(JniMethodStart, void, Thread*) \
- V(JniMethodStartSynchronized, void, jobject, Thread*) \
V(JniMethodEnd, void, Thread*) \
- V(JniMethodEndSynchronized, void, jobject, Thread*) \
V(JniMethodEndWithReference, mirror::Object*, jobject, Thread*) \
- V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, jobject, Thread*) \
V(JniDecodeReferenceResult, mirror::Object*, jobject, Thread*) \
+ V(JniLockObject, void, mirror::Object*) \
+ V(JniUnlockObject, void, mirror::Object*) \
V(QuickGenericJniTrampoline, void, ArtMethod*) \
\
V(LockObject, void, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 2ea3c2aca9..95072130a9 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -69,11 +69,6 @@ extern void JniMethodStart(Thread* self) {
self->TransitionFromRunnableToSuspended(kNative);
}
-extern void JniMethodStartSynchronized(jobject to_lock, Thread* self) {
- self->DecodeJObject(to_lock)->MonitorEnter(self);
- JniMethodStart(self);
-}
-
// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
static void GoToRunnable(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
if (kIsDebugBuild) {
@@ -95,8 +90,11 @@ static void PopLocalReferences(uint32_t saved_local_ref_cookie, Thread* self)
}
// TODO: annotalysis disabled as monitor semantics are maintained in Java code.
-static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
+extern "C" void artUnlockObjectFromJni(mirror::Object* locked, Thread* self)
NO_THREAD_SAFETY_ANALYSIS REQUIRES(!Roles::uninterruptible_) {
+ // Note: No thread suspension is allowed for successful unlocking, otherwise plain
+ // `mirror::Object*` return value saved by the assembly stub would need to be updated.
+ uintptr_t old_poison_object_cookie = kIsDebugBuild ? self->GetPoisonObjectCookie() : 0u;
// Save any pending exception over monitor exit call.
ObjPtr<mirror::Throwable> saved_exception = nullptr;
if (UNLIKELY(self->IsExceptionPending())) {
@@ -104,17 +102,22 @@ static inline void UnlockJniSynchronizedMethod(jobject locked, Thread* self)
self->ClearException();
}
// Decode locked object and unlock, before popping local references.
- self->DecodeJObject(locked)->MonitorExit(self);
+ locked->MonitorExit(self);
if (UNLIKELY(self->IsExceptionPending())) {
- LOG(FATAL) << "Synchronized JNI code returning with an exception:\n"
- << saved_exception->Dump()
- << "\nEncountered second exception during implicit MonitorExit:\n"
- << self->GetException()->Dump();
+ LOG(FATAL) << "Exception during implicit MonitorExit for synchronized native method:\n"
+ << self->GetException()->Dump()
+ << (saved_exception != nullptr
+ ? "\nAn exception was already pending:\n" + saved_exception->Dump()
+ : "");
+ UNREACHABLE();
}
// Restore pending exception.
if (saved_exception != nullptr) {
self->SetException(saved_exception);
}
+ if (kIsDebugBuild) {
+ DCHECK_EQ(old_poison_object_cookie, self->GetPoisonObjectCookie());
+ }
}
// TODO: These should probably be templatized or macro-ized.
@@ -124,11 +127,6 @@ extern void JniMethodEnd(Thread* self) {
GoToRunnable(self);
}
-extern void JniMethodEndSynchronized(jobject locked, Thread* self) {
- GoToRunnable(self);
- UnlockJniSynchronizedMethod(locked, self); // Must decode before pop.
-}
-
extern mirror::Object* JniDecodeReferenceResult(jobject result, Thread* self)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(!self->IsExceptionPending());
@@ -168,14 +166,6 @@ extern mirror::Object* JniMethodEndWithReference(jobject result, Thread* self) {
return JniMethodEndWithReferenceHandleResult(result, self);
}
-extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
- jobject locked,
- Thread* self) {
- GoToRunnable(self);
- UnlockJniSynchronizedMethod(locked, self);
- return JniMethodEndWithReferenceHandleResult(result, self);
-}
-
extern uint64_t GenericJniMethodEnd(Thread* self,
uint32_t saved_local_ref_cookie,
jvalue result,
@@ -206,9 +196,9 @@ extern uint64_t GenericJniMethodEnd(Thread* self,
// locked object.
if (called->IsSynchronized()) {
DCHECK(normal_native) << "@FastNative/@CriticalNative and synchronize is not supported";
- jobject lock = GetGenericJniSynchronizationObject(self, called);
+ ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called);
DCHECK(lock != nullptr);
- UnlockJniSynchronizedMethod(lock, self);
+ artUnlockObjectFromJni(lock.Ptr(), self);
}
char return_shorty_char = called->GetShorty()[0];
if (return_shorty_char == 'L') {
@@ -258,32 +248,14 @@ extern void JniMonitoredMethodStart(Thread* self) {
MONITOR_JNI(PaletteNotifyBeginJniInvocation);
}
-extern void JniMonitoredMethodStartSynchronized(jobject to_lock, Thread* self) {
- JniMethodStartSynchronized(to_lock, self);
- MONITOR_JNI(PaletteNotifyBeginJniInvocation);
-}
-
extern void JniMonitoredMethodEnd(Thread* self) {
MONITOR_JNI(PaletteNotifyEndJniInvocation);
JniMethodEnd(self);
}
-extern void JniMonitoredMethodEndSynchronized(jobject locked, Thread* self) {
- MONITOR_JNI(PaletteNotifyEndJniInvocation);
- JniMethodEndSynchronized(locked, self);
-}
-
extern mirror::Object* JniMonitoredMethodEndWithReference(jobject result, Thread* self) {
MONITOR_JNI(PaletteNotifyEndJniInvocation);
return JniMethodEndWithReference(result, self);
}
-extern mirror::Object* JniMonitoredMethodEndWithReferenceSynchronized(
- jobject result,
- jobject locked,
- Thread* self) {
- MONITOR_JNI(PaletteNotifyEndJniInvocation);
- return JniMethodEndWithReferenceSynchronized(result, locked, self);
-}
-
} // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index c14dee42ec..e214577f7b 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -2062,11 +2062,14 @@ void BuildGenericJniFrameVisitor::Visit() {
* needed and return to the stub.
*
* The return value is the pointer to the native code, null on failure.
+ *
+ * NO_THREAD_SAFETY_ANALYSIS: Depending on the use case, the trampoline may
+ * or may not lock a synchronization object and transition out of Runnable.
*/
extern "C" const void* artQuickGenericJniTrampoline(Thread* self,
ArtMethod** managed_sp,
uintptr_t* reserved_area)
- REQUIRES_SHARED(Locks::mutator_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS {
// Note: We cannot walk the stack properly until fixed up below.
ArtMethod* called = *managed_sp;
DCHECK(called->IsNative()) << called->PrettyMethod(true);
@@ -2121,14 +2124,14 @@ extern "C" const void* artQuickGenericJniTrampoline(Thread* self,
if (LIKELY(normal_native)) {
// Start JNI.
if (called->IsSynchronized()) {
- jobject lock = GetGenericJniSynchronizationObject(self, called);
- JniMethodStartSynchronized(lock, self);
+ ObjPtr<mirror::Object> lock = GetGenericJniSynchronizationObject(self, called);
+ DCHECK(lock != nullptr);
+ lock->MonitorEnter(self);
if (self->IsExceptionPending()) {
return nullptr; // Report error.
}
- } else {
- JniMethodStart(self);
}
+ JniMethodStart(self);
} else {
DCHECK(!called->IsSynchronized())
<< "@FastNative/@CriticalNative and synchronize is not supported";
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c19e000d1e..c3f1dba967 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -217,18 +217,16 @@ class EntrypointsOrderTest : public CommonRuntimeTest {
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjInstance, pGetObjStatic, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pGetObjStatic, pAputObject, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAputObject, pJniMethodStart, sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodStartSynchronized,
- sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStartSynchronized, pJniMethodEnd,
- sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndSynchronized, sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodStart, pJniMethodEnd, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEnd, pJniMethodEndWithReference,
sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
- pJniMethodEndWithReferenceSynchronized, sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized,
pJniDecodeReferenceResult, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniDecodeReferenceResult,
+ pJniLockObject, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniLockObject,
+ pJniUnlockObject, sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniUnlockObject,
pQuickGenericJniTrampoline, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickGenericJniTrampoline, pLockObject, sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pLockObject, pUnlockObject, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index acb3d30fa2..0b6bf7db91 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: JNI: Faster mutator locking during transition.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '2', '\0' } };
+ // Last oat version changed reason: JNI: Rewrite locking for synchronized methods.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '3', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 9fb8d62147..46aa38e035 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3475,12 +3475,11 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) {
QUICK_ENTRY_POINT_INFO(pGetObjStatic)
QUICK_ENTRY_POINT_INFO(pAputObject)
QUICK_ENTRY_POINT_INFO(pJniMethodStart)
- QUICK_ENTRY_POINT_INFO(pJniMethodStartSynchronized)
QUICK_ENTRY_POINT_INFO(pJniMethodEnd)
- QUICK_ENTRY_POINT_INFO(pJniMethodEndSynchronized)
QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReference)
- QUICK_ENTRY_POINT_INFO(pJniMethodEndWithReferenceSynchronized)
QUICK_ENTRY_POINT_INFO(pJniDecodeReferenceResult)
+ QUICK_ENTRY_POINT_INFO(pJniLockObject)
+ QUICK_ENTRY_POINT_INFO(pJniUnlockObject)
QUICK_ENTRY_POINT_INFO(pQuickGenericJniTrampoline)
QUICK_ENTRY_POINT_INFO(pLockObject)
QUICK_ENTRY_POINT_INFO(pUnlockObject)