Make suspend check test specific flags.
Make 20 bits in `Thread.tls32_.state_and_flags` available
for new uses.
Code size changes per suspend check:
- x86/x86-64: +3B (CMP r/m32, imm8 -> TST r/m32, imm32)
- arm: none (CMP -> TST, both 32-bit with high register)
- arm64: +4B (CBNZ/CBZ -> TST+BNE/BEQ)
Note: Using implicit suspend checks on arm64 would sidestep
this code size increase entirely.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: If5b0be0183efba3f397596b22e03a8b7afb87f85
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 933e270..775bfcf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1994,12 +1994,12 @@
Register temp = temps.AcquireW();
__ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
if (successor == nullptr) {
- __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ B(ne, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
- __ Cbz(temp, codegen_->GetLabelOf(successor));
+ __ B(eq, codegen_->GetLabelOf(successor));
__ B(slow_path->GetEntryLabel());
// slow_path will return to GetLabelOf(successor).
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index c514c22..841d59b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7168,12 +7168,12 @@
vixl32::Register temp = temps.Acquire();
GetAssembler()->LoadFromOffset(
kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
if (successor == nullptr) {
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ __ B(ne, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
- __ CompareAndBranchIfZero(temp, codegen_->GetLabelOf(successor));
+ __ B(eq, codegen_->GetLabelOf(successor));
__ B(slow_path->GetEntryLabel());
}
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index f19eaae..5434407 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6683,14 +6683,13 @@
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- __ fs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
- Immediate(0));
+ __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
+ Immediate(Thread::SuspendOrCheckpointRequestFlags()));
if (successor == nullptr) {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotZero, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
- __ j(kEqual, codegen_->GetLabelOf(successor));
+ __ j(kZero, codegen_->GetLabelOf(successor));
__ jmp(slow_path->GetEntryLabel());
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b0bdffe..fa61c67 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6018,15 +6018,14 @@
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- __ gs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
- /* no_rip= */ true),
- Immediate(0));
+ __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
+ /* no_rip= */ true),
+ Immediate(Thread::SuspendOrCheckpointRequestFlags()));
if (successor == nullptr) {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotZero, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
- __ j(kEqual, codegen_->GetLabelOf(successor));
+ __ j(kZero, codegen_->GetLabelOf(successor));
__ jmp(slow_path->GetEntryLabel());
}
}
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 2b3c2dd..3d45abd 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -1058,8 +1058,7 @@
tr,
Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- ___ Cmp(scratch, 0);
+ ___ Tst(scratch, Thread::SuspendOrCheckpointRequestFlags());
___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
// TODO: think about using CBNZ here.
}
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index e2d29fd..a505db0 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -893,8 +893,8 @@
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register scratch = temps.AcquireW();
___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value()));
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- ___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
+ ___ Tst(scratch, Thread::SuspendOrCheckpointRequestFlags());
+ ___ B(ne, Arm64JNIMacroLabel::Cast(label)->AsArm64());
}
void Arm64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 904cca4..4ba3aa1 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -590,9 +590,9 @@
}
void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- __ fs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()), Immediate(0));
- __ j(kNotEqual, X86JNIMacroLabel::Cast(label)->AsX86());
+ __ fs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()),
+ Immediate(Thread::SuspendOrCheckpointRequestFlags()));
+ __ j(kNotZero, X86JNIMacroLabel::Cast(label)->AsX86());
}
void X86JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 2fb2797..de99e74 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -672,10 +672,9 @@
}
void X86_64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- __ gs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
- Immediate(0));
- __ j(kNotEqual, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+ __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
+ Immediate(Thread::SuspendOrCheckpointRequestFlags()));
+ __ j(kNotZero, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
}
void X86_64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index b3d7f38..3cf7dd7 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -186,11 +186,7 @@
// When we are in @FastNative, we are already Runnable.
DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
// Only do a suspend check on the way out of JNI just like compiled stubs.
- if (UNLIKELY(self->TestAllFlags())) {
- // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
- // is a flag raised.
- self->CheckSuspend();
- }
+ self->CheckSuspend();
}
// We need the mutator lock (i.e., calling GoToRunnable()) before accessing the shorty or the
// locked object.
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 3ac1292..7acee5e 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -40,10 +40,7 @@
}
inline void Thread::AllowThreadSuspension() {
- DCHECK_EQ(Thread::Current(), this);
- if (UNLIKELY(TestAllFlags())) {
- CheckSuspend();
- }
+ CheckSuspend();
// Invalidate the current thread's object pointers (ObjPtr) to catch possible moving GC bugs due
// to missing handles.
PoisonObjectPointers();
@@ -51,16 +48,17 @@
inline void Thread::CheckSuspend() {
DCHECK_EQ(Thread::Current(), this);
- for (;;) {
+ while (true) {
StateAndFlags state_and_flags(tls32_.state_and_flags.load(std::memory_order_relaxed));
- if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) {
+ if (LIKELY(!state_and_flags.IsAnyOfFlagsSet(SuspendOrCheckpointRequestFlags()))) {
+ break;
+ } else if (state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest)) {
RunCheckpointFunction();
} else if (state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) {
FullSuspendCheck();
- } else if (state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest)) {
- RunEmptyCheckpoint();
} else {
- break;
+ DCHECK(state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest));
+ RunEmptyCheckpoint();
}
}
}
@@ -256,11 +254,12 @@
GetMutatorLock()->AssertNotHeld(this); // Otherwise we starve GC.
// Optimize for the return from native code case - this is the fast path.
// Atomically change from suspended to runnable if no suspend request pending.
- StateAndFlags new_state_and_flags = old_state_and_flags;
- new_state_and_flags.SetState(ThreadState::kRunnable);
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- if (LIKELY(new_state_and_flags.GetValue() == 0u)) { // No flags set?
+ constexpr uint32_t kCheckedFlags =
+ SuspendOrCheckpointRequestFlags() | enum_cast<uint32_t>(ThreadFlag::kActiveSuspendBarrier);
+ if (LIKELY(!old_state_and_flags.IsAnyOfFlagsSet(kCheckedFlags))) {
// CAS the value with a memory barrier.
+ StateAndFlags new_state_and_flags = old_state_and_flags;
+ new_state_and_flags.SetState(ThreadState::kRunnable);
if (LIKELY(tls32_.state_and_flags.CompareAndSetWeakAcquire(old_state_and_flags.GetValue(),
new_state_and_flags.GetValue()))) {
// Mark the acquisition of a share of the mutator lock.
@@ -272,10 +271,14 @@
} else if (UNLIKELY(old_state_and_flags.IsFlagSet(ThreadFlag::kCheckpointRequest) ||
old_state_and_flags.IsFlagSet(ThreadFlag::kEmptyCheckpointRequest))) {
// Impossible
+ StateAndFlags flags = old_state_and_flags;
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ flags.SetState(ThreadState::kRunnable); // Note: Keeping unused bits.
LOG(FATAL) << "Transitioning to runnable with checkpoint flag, "
- << " flags=" << new_state_and_flags.GetValue() // State set to kRunnable = 0.
+ << " flags=" << flags.GetValue() // State set to kRunnable = 0.
<< " state=" << old_state_and_flags.GetState();
- } else if (old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) {
+ } else {
+ DCHECK(old_state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest));
// Wait while our suspend count is non-zero.
// We pass null to the MutexLock as we may be in a situation where the
diff --git a/runtime/thread.h b/runtime/thread.h
index f1dd7b8..2673ef5 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1110,13 +1110,6 @@
return state_and_flags.IsFlagSet(flag);
}
- bool TestAllFlags() const {
- StateAndFlags state_and_flags(tls32_.state_and_flags.load(std::memory_order_relaxed));
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
- state_and_flags.SetState(ThreadState::kRunnable); // Clear state bits.
- return state_and_flags.GetValue() != 0u;
- }
-
void AtomicSetFlag(ThreadFlag flag) {
tls32_.state_and_flags.fetch_or(enum_cast<uint32_t>(flag), std::memory_order_seq_cst);
}
@@ -1316,6 +1309,17 @@
return WhichPowerOf2(InterpreterCache::kSize);
}
+ static constexpr uint32_t AllThreadFlags() {
+ return enum_cast<uint32_t>(ThreadFlag::kLastFlag) |
+ (enum_cast<uint32_t>(ThreadFlag::kLastFlag) - 1u);
+ }
+
+ static constexpr uint32_t SuspendOrCheckpointRequestFlags() {
+ return enum_cast<uint32_t>(ThreadFlag::kSuspendRequest) |
+ enum_cast<uint32_t>(ThreadFlag::kCheckpointRequest) |
+ enum_cast<uint32_t>(ThreadFlag::kEmptyCheckpointRequest);
+ }
+
private:
explicit Thread(bool daemon);
~Thread() REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_);
@@ -1482,6 +1486,11 @@
value_ = value;
}
+ bool IsAnyOfFlagsSet(uint32_t flags) const {
+ DCHECK_EQ(flags & ~AllThreadFlags(), 0u);
+ return (value_ & flags) != 0u;
+ }
+
bool IsFlagSet(ThreadFlag flag) const {
return (value_ & enum_cast<uint32_t>(flag)) != 0u;
}
diff --git a/test/706-checker-scheduler/src/Main.java b/test/706-checker-scheduler/src/Main.java
index d4d3923..41fee9a 100644
--- a/test/706-checker-scheduler/src/Main.java
+++ b/test/706-checker-scheduler/src/Main.java
@@ -606,7 +606,7 @@
/// CHECK: add
/// CHECK: adds
/// CHECK: ldr
- /// CHECK: cmp
+ /// CHECK: tst
/// CHECK: beq
/// CHECK-START-ARM64: void Main.testCrossItersDependencies() disassembly (after)
@@ -614,7 +614,8 @@
/// CHECK: add
/// CHECK: add
/// CHECK: ldr
- /// CHECK: cbz
+ /// CHECK: tst
+ /// CHECK: b.eq
private static void testCrossItersDependencies() {
int[] data = {1, 2, 3, 0};
int sub = 0;
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index fff5755..6dc6c0e 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -21,10 +21,6 @@
ASM_DEFINE(THREAD_CARD_TABLE_OFFSET,
art::Thread::CardTableOffset<art::kRuntimePointerSize>().Int32Value())
-ASM_DEFINE(THREAD_CHECKPOINT_REQUEST,
- static_cast<uint32_t>(art::ThreadFlag::kCheckpointRequest))
-ASM_DEFINE(THREAD_EMPTY_CHECKPOINT_REQUEST,
- static_cast<uint32_t>(art::ThreadFlag::kEmptyCheckpointRequest))
ASM_DEFINE(THREAD_EXCEPTION_OFFSET,
art::Thread::ExceptionOffset<art::kRuntimePointerSize>().Int32Value())
ASM_DEFINE(THREAD_FLAGS_OFFSET,
@@ -56,9 +52,7 @@
ASM_DEFINE(THREAD_SELF_OFFSET,
art::Thread::SelfOffset<art::kRuntimePointerSize>().Int32Value())
ASM_DEFINE(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST,
- static_cast<uint32_t>(art::ThreadFlag::kSuspendRequest) |
- static_cast<uint32_t>(art::ThreadFlag::kCheckpointRequest) |
- static_cast<uint32_t>(art::ThreadFlag::kEmptyCheckpointRequest))
+ art::Thread::SuspendOrCheckpointRequestFlags())
ASM_DEFINE(THREAD_SUSPEND_REQUEST,
static_cast<uint32_t>(art::ThreadFlag::kSuspendRequest))
ASM_DEFINE(THREAD_TOP_QUICK_FRAME_OFFSET,