JNI: Faster mutator locking during transition.
Add mutator lock pointer to `Thread`. This makes retrieving
the pointer faster on ARM and ARM64 and makes it accessible
for JNI stubs if we decide to inline `JniMethodStart()` and
`JniMethodEnd()`.
Pass the lock level `kMutatorLock` explicitly from the
`MutatorMutex` functions to let the compiler evaluate a lot
of the conditions statically and avoid unnecessary code.
Golem results for art-opt-cc (higher is better):
linux-armv7 before after
NativeDowncallStaticNormal 6.3694 7.2394 (+13.66%)
NativeDowncallStaticNormal6 6.0663 6.8527 (+12.96%)
NativeDowncallStaticNormalRefs6 5.7061 6.3945 (+12.06%)
NativeDowncallVirtualNormal 5.7088 7.2081 (+26.26%)
NativeDowncallVirtualNormal6 5.4563 6.7929 (+24.49%)
NativeDowncallVirtualNormalRefs6 5.1595 6.3415 (+22.91%)
linux-armv8 before after
NativeDowncallStaticNormal 6.4229 7.0423 (+9.642%)
NativeDowncallStaticNormal6 6.2651 6.8527 (+9.379%)
NativeDowncallStaticNormalRefs6 5.8824 6.3976 (+8.760%)
NativeDowncallVirtualNormal 6.2651 6.8527 (+9.379%)
NativeDowncallVirtualNormal6 6.0663 6.6163 (+9.066%)
NativeDowncallVirtualNormalRefs6 5.6630 6.1408 (+8.436%)
There does not seem to be a measurable difference for x86
and x86-64.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 172332525
Change-Id: I2ad511a2fe7bac250549c43789cf3fb5e2de9e25
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 2a1a08d..dba1e12 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -93,7 +93,12 @@
CheckUnattachedThread(level_);
return;
}
- LockLevel level = level_;
+ RegisterAsLockedImpl(self, level_);
+}
+
+inline void BaseMutex::RegisterAsLockedImpl(Thread* self, LockLevel level) {
+ DCHECK(self != nullptr);
+ DCHECK_EQ(level_, level);
// It would be nice to avoid this condition checking in the non-debug case,
// but that would make the various methods that check if a mutex is held not
// work properly for thread wait locks. Since the vast majority of lock
@@ -159,8 +164,13 @@
CheckUnattachedThread(level_);
return;
}
- if (level_ != kMonitorLock) {
- auto level = level_;
+ RegisterAsUnlockedImpl(self , level_);
+}
+
+inline void BaseMutex::RegisterAsUnlockedImpl(Thread* self, LockLevel level) {
+ DCHECK(self != nullptr);
+ DCHECK_EQ(level_, level);
+ if (level != kMonitorLock) {
if (UNLIKELY(level == kThreadWaitLock) && self->GetHeldMutex(kThreadWaitWakeLock) == this) {
level = kThreadWaitWakeLock;
}
@@ -292,11 +302,11 @@
inline void MutatorMutex::TransitionFromRunnableToSuspended(Thread* self) {
AssertSharedHeld(self);
- RegisterAsUnlocked(self);
+ RegisterAsUnlockedImpl(self, kMutatorLock);
}
inline void MutatorMutex::TransitionFromSuspendedToRunnable(Thread* self) {
- RegisterAsLocked(self);
+ RegisterAsLockedImpl(self, kMutatorLock);
AssertSharedHeld(self);
}
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 8f2a8ea..87e9525 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -106,8 +106,11 @@
// Add this mutex to those owned by self, and perform appropriate checking.
// For this call only, self may also be another suspended thread.
void RegisterAsLocked(Thread* self);
+ void RegisterAsLockedImpl(Thread* self, LockLevel level);
void RegisterAsUnlocked(Thread* self);
+ void RegisterAsUnlockedImpl(Thread* self, LockLevel level);
+
void CheckSafeToWait(Thread* self);
friend class ScopedContentionRecorder;
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 609f081..c19e000 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -126,7 +126,8 @@
sizeof(void*) * kNumRosAllocThreadLocalSizeBracketsInThread);
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, mutator_lock, sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mutator_lock, held_mutexes, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, flip_function,
sizeof(void*) * kLockLevelCount);
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
diff --git a/runtime/oat.h b/runtime/oat.h
index bc9a2ca..acb3d30 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: Inlining across dex files for bss within OAT.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '1', '\0' } };
+ // Last oat version changed reason: JNI: Faster mutator locking during transition.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '2', '1', '2', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 9d96e9d..f5bf5fb 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -75,7 +75,7 @@
for (int i = kLockLevelCount - 1; i >= 0; --i) {
BaseMutex* held_mutex = self->GetHeldMutex(static_cast<LockLevel>(i));
if (held_mutex != nullptr &&
- held_mutex != Locks::mutator_lock_ &&
+ held_mutex != GetMutatorLock() &&
held_mutex != cond_var_mutex) {
CHECK(Locks::IsExpectedOnWeakRefAccess(held_mutex))
<< "Holding unexpected mutex " << held_mutex->GetName()
@@ -150,7 +150,7 @@
if (check_locks) {
bool bad_mutexes_held = false;
for (int i = kLockLevelCount - 1; i >= 0; --i) {
- // We expect no locks except the mutator_lock_. User code suspension lock is OK as long as
+ // We expect no locks except the mutator lock. User code suspension lock is OK as long as
// we aren't going to be held suspended due to SuspendReason::kForUserCode.
if (i != kMutatorLock && i != kUserCodeSuspensionLock) {
BaseMutex* held_mutex = GetHeldMutex(static_cast<LockLevel>(i));
@@ -234,8 +234,8 @@
DCHECK_EQ(this, Thread::Current());
// Change to non-runnable state, thereby appearing suspended to the system.
TransitionToSuspendedAndRunCheckpoints(new_state);
- // Mark the release of the share of the mutator_lock_.
- Locks::mutator_lock_->TransitionFromRunnableToSuspended(this);
+ // Mark the release of the share of the mutator lock.
+ GetMutatorLock()->TransitionFromRunnableToSuspended(this);
// Once suspended - check the active suspend barrier flag
PassActiveSuspendBarriers();
}
@@ -246,7 +246,7 @@
int16_t old_state = old_state_and_flags.as_struct.state;
DCHECK_NE(static_cast<ThreadState>(old_state), kRunnable);
do {
- Locks::mutator_lock_->AssertNotHeld(this); // Otherwise we starve GC..
+ GetMutatorLock()->AssertNotHeld(this); // Otherwise we starve GC.
old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
DCHECK_EQ(old_state_and_flags.as_struct.state, old_state);
if (LIKELY(old_state_and_flags.as_struct.flags == 0)) {
@@ -260,8 +260,8 @@
if (LIKELY(tls32_.state_and_flags.as_atomic_int.CompareAndSetWeakAcquire(
old_state_and_flags.as_int,
new_state_and_flags.as_int))) {
- // Mark the acquisition of a share of the mutator_lock_.
- Locks::mutator_lock_->TransitionFromSuspendedToRunnable(this);
+ // Mark the acquisition of a share of the mutator lock.
+ GetMutatorLock()->TransitionFromSuspendedToRunnable(this);
break;
}
} else if ((old_state_and_flags.as_struct.flags & kActiveSuspendBarrier) != 0) {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d54330a..9fb8d62 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2297,6 +2297,8 @@
is_runtime_thread_(false) {
wait_mutex_ = new Mutex("a thread wait mutex", LockLevel::kThreadWaitLock);
wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
+ tlsPtr_.mutator_lock = Locks::mutator_lock_;
+ DCHECK(tlsPtr_.mutator_lock != nullptr);
tlsPtr_.instrumentation_stack =
new std::map<uintptr_t, instrumentation::InstrumentationStackFrame>;
tlsPtr_.name = new std::string(kThreadNameDuringStartup);
diff --git a/runtime/thread.h b/runtime/thread.h
index 7e60582..9478980 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1346,6 +1346,11 @@
return old_state;
}
+ MutatorMutex* GetMutatorLock() RETURN_CAPABILITY(Locks::mutator_lock_) {
+ DCHECK_EQ(tlsPtr_.mutator_lock, Locks::mutator_lock_);
+ return tlsPtr_.mutator_lock;
+ }
+
void VerifyStackImpl() REQUIRES_SHARED(Locks::mutator_lock_);
void DumpState(std::ostream& os) const REQUIRES_SHARED(Locks::mutator_lock_);
@@ -1639,6 +1644,7 @@
thread_local_objects(0),
thread_local_alloc_stack_top(nullptr),
thread_local_alloc_stack_end(nullptr),
+ mutator_lock(nullptr),
flip_function(nullptr),
method_verifier(nullptr),
thread_local_mark_stack(nullptr),
@@ -1782,6 +1788,10 @@
StackReference<mirror::Object>* thread_local_alloc_stack_top;
StackReference<mirror::Object>* thread_local_alloc_stack_end;
+ // Pointer to the mutator lock.
+ // This is the same as `Locks::mutator_lock_` but cached for faster state transitions.
+ MutatorMutex* mutator_lock;
+
// Support for Mutex lock hierarchy bug detection.
BaseMutex* held_mutexes[kLockLevelCount];