summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Hans Boehm <hboehm@google.com> 2017-12-12 11:05:32 -0800
committer Hans Boehm <hboehm@google.com> 2017-12-15 10:53:19 -0800
commitae915a0182db98769ed4851fab440e79e012babd (patch)
tree05d9411ed11fe95ccbcc73f4300c819563bbbf3d
parent9e73b32fed15d262b0393f114b9602ac7ef88917 (diff)
Improve scoped spinlock implementations
Both ScopedAllMutexesLock and ScopedExpectedMutexesOnWeakRefAccessLock really implement simple spinlocks. But they did it in an unorthodox way, with a CAS on unlock. I see no reason for that. Use the standard (and faster) idiom instead. The NanoSleep(100) waiting logic was probably suboptimal and definitely misleading. I timed NanoSleep(100) on Linux4.4 host, and it takes about 60 usecs, i.e. 60,000 nsecs. By comparison a no-op sched_yield takes about 1 usec. This replaces it with waiting logic that should be generically usable. This is no doubt overkill, but the hope is that we can eventually reuse this where it matters more. Test: Built and booted AOSP. Change-Id: I6e47508ecb8d5e5d0b4f08c8e8f073ad7b1d192e
-rw-r--r--runtime/base/mutex.cc45
1 files changed, 33 insertions, 12 deletions
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 3adebe7e00..7324dff974 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -102,18 +102,40 @@ static bool ComputeRelativeTimeSpec(timespec* result_ts, const timespec& lhs, co
}
#endif
+// Wait for an amount of time that roughly increases in the argument i.
+// Spin for small arguments and yield/sleep for longer ones.
+static void BackOff(uint32_t i) {
+ static constexpr uint32_t kSpinMax = 10;
+ static constexpr uint32_t kYieldMax = 20;
+ if (i <= kSpinMax) {
+ // TODO: Esp. in very latency-sensitive cases, consider replacing this with an explicit
+ // test-and-test-and-set loop in the caller. Possibly skip entirely on a uniprocessor.
+ volatile uint32_t x = 0;
+ const uint32_t spin_count = 10 * i;
+ for (uint32_t spin = 0; spin < spin_count; ++spin) {
+ ++x; // Volatile; hence should not be optimized away.
+ }
+ // TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
+ } else if (i <= kYieldMax) {
+ sched_yield();
+ } else {
+ NanoSleep(1000ull * (i - kYieldMax));
+ }
+}
+
class ScopedAllMutexesLock FINAL {
public:
explicit ScopedAllMutexesLock(const BaseMutex* mutex) : mutex_(mutex) {
- while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakAcquire(0, mutex)) {
- NanoSleep(100);
+ for (uint32_t i = 0;
+ !gAllMutexData->all_mutexes_guard.CompareExchangeWeakAcquire(0, mutex);
+ ++i) {
+ BackOff(i);
}
}
~ScopedAllMutexesLock() {
- while (!gAllMutexData->all_mutexes_guard.CompareExchangeWeakRelease(mutex_, 0)) {
- NanoSleep(100);
- }
+ DCHECK_EQ(gAllMutexData->all_mutexes_guard.LoadRelaxed(), mutex_);
+ gAllMutexData->all_mutexes_guard.StoreRelease(0);
}
private:
@@ -123,17 +145,16 @@ class ScopedAllMutexesLock FINAL {
class Locks::ScopedExpectedMutexesOnWeakRefAccessLock FINAL {
public:
explicit ScopedExpectedMutexesOnWeakRefAccessLock(const BaseMutex* mutex) : mutex_(mutex) {
- while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakAcquire(0,
- mutex)) {
- NanoSleep(100);
+ for (uint32_t i = 0;
+ !Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakAcquire(0, mutex);
+ ++i) {
+ BackOff(i);
}
}
~ScopedExpectedMutexesOnWeakRefAccessLock() {
- while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakRelease(mutex_,
- 0)) {
- NanoSleep(100);
- }
+ DCHECK_EQ(Locks::expected_mutexes_on_weak_ref_access_guard_.LoadRelaxed(), mutex_);
+ Locks::expected_mutexes_on_weak_ref_access_guard_.StoreRelease(0);
}
private: