Support unlimited pending checkpoints
Prevents the spinning that used to happen if RunCheckpoint was called
with 3 pending checkpoints. This spinning was done when holding
thread_list_lock_ and thread_suspend_count_lock_ and could deadlock
if any of the pending checkpoints required any of these locks.
The fix is to use an overflow list instead of having a fixed limit of
3.
Changed suspend stress test to have more threads and only compare last
line since there may be libbacktrace spam like:
"+E/libbacktrace(69891): void SignalHandler(int, siginfo_t *, void *):
Timed out waiting for unwind thread to indicate it completed."
Bug: 28988206
Change-Id: I2ae611506147d5199d59a08eee0395f7fa35d448
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 8eb3742..480644a 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -128,7 +128,7 @@
art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 168 * __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 166 * __SIZEOF_POINTER__)
ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value())
// Offset of field Thread::tlsPtr_.thread_local_pos.
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index c621672..91deea0 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -112,10 +112,12 @@
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, name, pthread_self, sizeof(void*));
EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, pthread_self, last_no_thread_suspension_cause,
sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_functions,
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, last_no_thread_suspension_cause, checkpoint_function,
sizeof(void*));
- EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_functions, jni_entrypoints,
- sizeof(void*) * 6);
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, checkpoint_function, active_suspend_barriers,
+ sizeof(void*));
+ EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, active_suspend_barriers, jni_entrypoints,
+ sizeof(Thread::tls_ptr_sized_values::active_suspend_barriers));
// Skip across the entrypoints structures.
diff --git a/runtime/oat.h b/runtime/oat.h
index 57675dc..286394e 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '7', '9', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '8', '0', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index f1f4a12..28f863e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1122,32 +1122,36 @@
}
void Thread::RunCheckpointFunction() {
- Closure *checkpoints[kMaxCheckpoints];
-
- // Grab the suspend_count lock and copy the current set of
- // checkpoints. Then clear the list and the flag. The RequestCheckpoint
- // function will also grab this lock so we prevent a race between setting
- // the kCheckpointRequest flag and clearing it.
- {
- MutexLock mu(this, *Locks::thread_suspend_count_lock_);
- for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
- checkpoints[i] = tlsPtr_.checkpoint_functions[i];
- tlsPtr_.checkpoint_functions[i] = nullptr;
+ bool done = false;
+ do {
+ // Grab the suspend_count lock and copy the checkpoints one by one. When the last checkpoint is
+ // copied, clear the list and the flag. The RequestCheckpoint function will also grab this lock
+ // to prevent a race between setting the kCheckpointRequest flag and clearing it.
+ Closure* checkpoint = nullptr;
+ {
+ MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+ if (tlsPtr_.checkpoint_function != nullptr) {
+ checkpoint = tlsPtr_.checkpoint_function;
+ if (!checkpoint_overflow_.empty()) {
+ // Overflow list not empty, copy the first one out and continue.
+ tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
+ checkpoint_overflow_.pop_front();
+ } else {
+ // No overflow checkpoints, this means that we are on the last pending checkpoint.
+ tlsPtr_.checkpoint_function = nullptr;
+ AtomicClearFlag(kCheckpointRequest);
+ done = true;
+ }
+ } else {
+ LOG(FATAL) << "Checkpoint flag set without pending checkpoint";
+ }
}
- AtomicClearFlag(kCheckpointRequest);
- }
- // Outside the lock, run all the checkpoint functions that
- // we collected.
- bool found_checkpoint = false;
- for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
- if (checkpoints[i] != nullptr) {
- ScopedTrace trace("Run checkpoint function");
- checkpoints[i]->Run(this);
- found_checkpoint = true;
- }
- }
- CHECK(found_checkpoint);
+ // Outside the lock, run the checkpoint functions that we collected.
+ ScopedTrace trace("Run checkpoint function");
+ DCHECK(checkpoint != nullptr);
+ checkpoint->Run(this);
+ } while (!done);
}
bool Thread::RequestCheckpoint(Closure* function) {
@@ -1157,20 +1161,6 @@
return false; // Fail, thread is suspended and so can't run a checkpoint.
}
- uint32_t available_checkpoint = kMaxCheckpoints;
- for (uint32_t i = 0 ; i < kMaxCheckpoints; ++i) {
- if (tlsPtr_.checkpoint_functions[i] == nullptr) {
- available_checkpoint = i;
- break;
- }
- }
- if (available_checkpoint == kMaxCheckpoints) {
- // No checkpoint functions available, we can't run a checkpoint
- return false;
- }
- tlsPtr_.checkpoint_functions[available_checkpoint] = function;
-
- // Checkpoint function installed now install flag bit.
// We must be runnable to request a checkpoint.
DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
union StateAndFlags new_state_and_flags;
@@ -1178,11 +1168,13 @@
new_state_and_flags.as_struct.flags |= kCheckpointRequest;
bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
old_state_and_flags.as_int, new_state_and_flags.as_int);
- if (UNLIKELY(!success)) {
- // The thread changed state before the checkpoint was installed.
- CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
- tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
- } else {
+ if (success) {
+ // Succeeded setting checkpoint flag, now insert the actual checkpoint.
+ if (tlsPtr_.checkpoint_function == nullptr) {
+ tlsPtr_.checkpoint_function = function;
+ } else {
+ checkpoint_overflow_.push_back(function);
+ }
CHECK_EQ(ReadFlag(kCheckpointRequest), true);
TriggerSuspend();
}
@@ -1624,9 +1616,7 @@
std::fill(tlsPtr_.rosalloc_runs,
tlsPtr_.rosalloc_runs + kNumRosAllocThreadLocalSizeBracketsInThread,
gc::allocator::RosAlloc::GetDedicatedFullRun());
- for (uint32_t i = 0; i < kMaxCheckpoints; ++i) {
- tlsPtr_.checkpoint_functions[i] = nullptr;
- }
+ tlsPtr_.checkpoint_function = nullptr;
for (uint32_t i = 0; i < kMaxSuspendBarriers; ++i) {
tlsPtr_.active_suspend_barriers[i] = nullptr;
}
@@ -1767,9 +1757,8 @@
}
CHECK_NE(GetState(), kRunnable);
CHECK_NE(ReadFlag(kCheckpointRequest), true);
- CHECK(tlsPtr_.checkpoint_functions[0] == nullptr);
- CHECK(tlsPtr_.checkpoint_functions[1] == nullptr);
- CHECK(tlsPtr_.checkpoint_functions[2] == nullptr);
+ CHECK(tlsPtr_.checkpoint_function == nullptr);
+ CHECK_EQ(checkpoint_overflow_.size(), 0u);
CHECK(tlsPtr_.flip_function == nullptr);
CHECK_EQ(tls32_.suspended_at_suspend_check, false);
diff --git a/runtime/thread.h b/runtime/thread.h
index 3c367ee..7ae9be5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1220,9 +1220,6 @@
static void ThreadExitCallback(void* arg);
- // Maximum number of checkpoint functions.
- static constexpr uint32_t kMaxCheckpoints = 3;
-
// Maximum number of suspend barriers.
static constexpr uint32_t kMaxSuspendBarriers = 3;
@@ -1452,9 +1449,9 @@
// If no_thread_suspension_ is > 0, what is causing that assertion.
const char* last_no_thread_suspension_cause;
- // Pending checkpoint function or null if non-pending. Installation guarding by
- // Locks::thread_suspend_count_lock_.
- Closure* checkpoint_functions[kMaxCheckpoints];
+ // Pending checkpoint function or null if non-pending. If this checkpoint is set and someone\
+ // requests another checkpoint, it goes to the checkpoint overflow list.
+ Closure* checkpoint_function GUARDED_BY(Locks::thread_suspend_count_lock_);
// Pending barriers that require passing or NULL if non-pending. Installation guarding by
// Locks::thread_suspend_count_lock_.
@@ -1517,6 +1514,9 @@
// Debug disable read barrier count, only is checked for debug builds and only in the runtime.
uint8_t debug_disallow_read_barrier_ = 0;
+ // Pending extra checkpoints if checkpoint_function_ is already used.
+ std::list<Closure*> checkpoint_overflow_ GUARDED_BY(Locks::thread_suspend_count_lock_);
+
friend class Dbg; // For SetStateUnsafe.
friend class gc::collector::SemiSpace; // For getting stack traces.
friend class Runtime; // For CreatePeer.
diff --git a/test/149-suspend-all-stress/check b/test/149-suspend-all-stress/check
new file mode 100755
index 0000000..d30b888
--- /dev/null
+++ b/test/149-suspend-all-stress/check
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Only compare the last line.
+tail -n 1 "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
\ No newline at end of file
diff --git a/test/149-suspend-all-stress/expected.txt b/test/149-suspend-all-stress/expected.txt
index f993efc..134d8d0 100644
--- a/test/149-suspend-all-stress/expected.txt
+++ b/test/149-suspend-all-stress/expected.txt
@@ -1,2 +1 @@
-JNI_OnLoad called
Finishing
diff --git a/test/149-suspend-all-stress/src/Main.java b/test/149-suspend-all-stress/src/Main.java
index aa94fc9..6a27c4b 100644
--- a/test/149-suspend-all-stress/src/Main.java
+++ b/test/149-suspend-all-stress/src/Main.java
@@ -17,7 +17,7 @@
import java.util.Map;
public class Main implements Runnable {
- static final int numberOfThreads = 4;
+ static final int numberOfThreads = 8;
public static void main(String[] args) throws Exception {
System.loadLibrary(args[0]);
diff --git a/test/149-suspend-all-stress/suspend_all.cc b/test/149-suspend-all-stress/suspend_all.cc
index de479a5..5d2be08 100644
--- a/test/149-suspend-all-stress/suspend_all.cc
+++ b/test/149-suspend-all-stress/suspend_all.cc
@@ -22,7 +22,7 @@
extern "C" JNIEXPORT void JNICALL Java_Main_suspendAndResume(JNIEnv*, jclass) {
static constexpr size_t kInitialSleepUS = 100 * 1000; // 100ms.
- static constexpr size_t kIterations = 500;
+ static constexpr size_t kIterations = 250;
usleep(kInitialSleepUS); // Leave some time for threads to get in here before we start suspending.
enum Operation {
kOPSuspendAll,