summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Alex Light <allight@google.com> 2017-11-01 09:29:53 -0700
committer Alex Light <allight@google.com> 2017-11-01 11:47:01 -0700
commitdf00a1ed1a0b633a1e66f1f650f53c22ea260e5b (patch)
tree82b965a44ce46f51def31e1e311a26ee991ee868
parenta2cbb2b0723a3fa05cc44e13cbe90543b3236883 (diff)
Prevent abort in situations with recursive checkpoints
In situations where there were multiple checkpoints queued and the first one causes the thread to suspend itself again then the RunCheckpointFunction function will hit a LOG(FATAL) and abort. This is because the recursive checkpoint will clear out the checkpoint backlog and the first RunCheckpointFunction invocation will unexpectedly find itself without any more checkpoints to run and abort. To fix this, and simplify the code at the same time, we have changed the RunCheckpointFunction method to (as its name suggests) only run a single checkpoint function. It will pop this function off the stack of pending checkpoints and run it relying on the caller to ensure that all pending checkpoints are handled. This is fine since, due to the multithreaded nature of checkpoints, the caller must call RunCheckpointFunction in a loop anyway to ensure it does not advance until all checkpoints have been handled. We add test 203-multi-checkpoints that tests that the checkpoint system does not fall over if there are multiple checkpoints some of which can suspend. Bug: 67838964 Test: ./test.py --host -j50 Change-Id: Ib6a3e083e6069d4839647d194bee6849d973633e
-rw-r--r--runtime/thread.cc48
-rw-r--r--runtime/thread.h3
-rw-r--r--test/203-multi-checkpoint/expected.txt5
-rw-r--r--test/203-multi-checkpoint/info.txt4
-rw-r--r--test/203-multi-checkpoint/multi_checkpoint.cc90
-rw-r--r--test/203-multi-checkpoint/src/Main.java59
-rw-r--r--test/Android.bp1
7 files changed, 181 insertions, 29 deletions
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 47ffb4e13f..83ebe9931d 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1346,36 +1346,26 @@ void Thread::ClearSuspendBarrier(AtomicInteger* target) {
}
void Thread::RunCheckpointFunction() {
- bool done = false;
- do {
- // Grab the suspend_count lock and copy the checkpoints one by one. When the last checkpoint is
- // copied, clear the list and the flag. The RequestCheckpoint function will also grab this lock
- // to prevent a race between setting the kCheckpointRequest flag and clearing it.
- Closure* checkpoint = nullptr;
- {
- MutexLock mu(this, *Locks::thread_suspend_count_lock_);
- if (tlsPtr_.checkpoint_function != nullptr) {
- checkpoint = tlsPtr_.checkpoint_function;
- if (!checkpoint_overflow_.empty()) {
- // Overflow list not empty, copy the first one out and continue.
- tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
- checkpoint_overflow_.pop_front();
- } else {
- // No overflow checkpoints, this means that we are on the last pending checkpoint.
- tlsPtr_.checkpoint_function = nullptr;
- AtomicClearFlag(kCheckpointRequest);
- done = true;
- }
- } else {
- LOG(FATAL) << "Checkpoint flag set without pending checkpoint";
- }
+ // Grab the suspend_count lock, get the next checkpoint and update all the checkpoint fields. If
+ // there are no more checkpoints we will also clear the kCheckpointRequest flag.
+ Closure* checkpoint;
+ {
+ MutexLock mu(this, *Locks::thread_suspend_count_lock_);
+ checkpoint = tlsPtr_.checkpoint_function;
+ if (!checkpoint_overflow_.empty()) {
+ // Overflow list not empty, copy the first one out and continue.
+ tlsPtr_.checkpoint_function = checkpoint_overflow_.front();
+ checkpoint_overflow_.pop_front();
+ } else {
+ // No overflow checkpoints. Clear the kCheckpointRequest flag
+ tlsPtr_.checkpoint_function = nullptr;
+ AtomicClearFlag(kCheckpointRequest);
}
-
- // Outside the lock, run the checkpoint functions that we collected.
- ScopedTrace trace("Run checkpoint function");
- DCHECK(checkpoint != nullptr);
- checkpoint->Run(this);
- } while (!done);
+ }
+ // Outside the lock, run the checkpoint function.
+ ScopedTrace trace("Run checkpoint function");
+ CHECK(checkpoint != nullptr) << "Checkpoint flag set without pending checkpoint";
+ checkpoint->Run(this);
}
void Thread::RunEmptyCheckpoint() {
diff --git a/runtime/thread.h b/runtime/thread.h
index 42b38da0b9..3b917bab9b 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1356,6 +1356,9 @@ class Thread {
WARN_UNUSED
REQUIRES(Locks::thread_suspend_count_lock_);
+ // Runs a single checkpoint function. If there are no more pending checkpoint functions it will
+ // clear the kCheckpointRequest flag. The caller is responsible for calling this in a loop until
+ // the kCheckpointRequest flag is cleared.
void RunCheckpointFunction();
void RunEmptyCheckpoint();
diff --git a/test/203-multi-checkpoint/expected.txt b/test/203-multi-checkpoint/expected.txt
new file mode 100644
index 0000000000..e1e30e3f1d
--- /dev/null
+++ b/test/203-multi-checkpoint/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+Other thread running
+pushing checkpoints
+checkpoints pushed
+Passed!
diff --git a/test/203-multi-checkpoint/info.txt b/test/203-multi-checkpoint/info.txt
new file mode 100644
index 0000000000..a96ba97d2a
--- /dev/null
+++ b/test/203-multi-checkpoint/info.txt
@@ -0,0 +1,4 @@
+Test that we correctly handle checkpoints that suspend.
+
+This could cause problems with asserts when there were multiple checkpoints
+queued and earlier ones suspended.
diff --git a/test/203-multi-checkpoint/multi_checkpoint.cc b/test/203-multi-checkpoint/multi_checkpoint.cc
new file mode 100644
index 0000000000..0799b6ed2d
--- /dev/null
+++ b/test/203-multi-checkpoint/multi_checkpoint.cc
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method-inl.h"
+#include "base/mutex-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+#include "thread_pool.h"
+
+namespace art {
+
+struct TestClosure : public Closure {
+ bool first_run_start;
+ bool first_run_end;
+ bool second_run;
+ bool second_run_interleaved;
+
+ void Run(Thread* self) OVERRIDE {
+ CHECK_EQ(self, Thread::Current()) << "Not running on target thread!";
+ if (!first_run_start) {
+ CHECK(!second_run);
+ first_run_start = true;
+ // Suspend ourself so that we will perform the second run.
+ {
+ ScopedObjectAccess soa(self);
+ self->FullSuspendCheck();
+ }
+ first_run_end = true;
+ } else {
+ CHECK(!second_run);
+ CHECK(first_run_start);
+ second_run = true;
+ second_run_interleaved = !first_run_end;
+ }
+ }
+
+ void Check() {
+ CHECK(first_run_start);
+ CHECK(first_run_end);
+ CHECK(second_run);
+ CHECK(second_run_interleaved);
+ }
+};
+
+static TestClosure gTestClosure = {};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_checkCheckpointsRun(JNIEnv*, jclass) {
+ gTestClosure.Check();
+}
+
+struct SetupClosure : public Closure {
+ void Run(Thread* self) OVERRIDE {
+ CHECK_EQ(self, Thread::Current()) << "Not running on target thread!";
+ ScopedObjectAccess soa(self);
+ MutexLock tscl_mu(self, *Locks::thread_suspend_count_lock_);
+ // Both should succeed since we are in runnable and have the lock.
+ CHECK(self->RequestCheckpoint(&gTestClosure)) << "Could not set first checkpoint.";
+ CHECK(self->RequestCheckpoint(&gTestClosure)) << "Could not set second checkpoint.";
+ }
+};
+
+static SetupClosure gSetupClosure = {};
+
+extern "C" JNIEXPORT void JNICALL Java_Main_pushCheckpoints(JNIEnv*, jclass, jobject thr) {
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ MutexLock tll_mu(self, *Locks::thread_list_lock_);
+ Thread* target = Thread::FromManagedThread(soa, thr);
+ while (true) {
+ MutexLock tscl_mu(self, *Locks::thread_suspend_count_lock_);
+ if (target->RequestCheckpoint(&gSetupClosure)) {
+ break;
+ }
+ }
+}
+
+} // namespace art
diff --git a/test/203-multi-checkpoint/src/Main.java b/test/203-multi-checkpoint/src/Main.java
new file mode 100644
index 0000000000..187f622730
--- /dev/null
+++ b/test/203-multi-checkpoint/src/Main.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.concurrent.Semaphore;
+
+public class Main {
+ static final Semaphore start = new Semaphore(0);
+ static volatile boolean continue_loop = true;
+
+ public static void main(String[] args) throws Exception {
+ System.loadLibrary(args[0]);
+ Thread t = new Thread(Main::runTargetThread, "Target Thread");
+
+ t.start();
+ // Wait for other thread to start.
+ start.acquire();
+
+ System.out.println("pushing checkpoints");
+ pushCheckpoints(t);
+
+ System.out.println("checkpoints pushed");
+ continue_loop = false;
+
+ t.join();
+
+ checkCheckpointsRun();
+
+ System.out.println("Passed!");
+ }
+
+ public static native void pushCheckpoints(Thread t);
+ public static native boolean checkCheckpointsRun();
+
+ public static void doNothing() {}
+ public static void runTargetThread() {
+ System.out.println("Other thread running");
+ try {
+ start.release();
+ while (continue_loop) {
+ doNothing();
+ }
+ } catch (Exception e) {
+ throw new Error("Exception occurred!", e);
+ }
+ }
+}
diff --git a/test/Android.bp b/test/Android.bp
index 16b30f988f..17ef1141df 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -359,6 +359,7 @@ cc_defaults {
"141-class-unload/jni_unload.cc",
"148-multithread-gc-annotations/gc_coverage.cc",
"149-suspend-all-stress/suspend_all.cc",
+ "203-multi-checkpoint/multi_checkpoint.cc",
"154-gc-loop/heap_interface.cc",
"454-get-vreg/get_vreg_jni.cc",
"457-regs/regs_jni.cc",