Make suspend check test specific flags.
Make 20 bits in `Thread.tls32_.state_and_flags` available
for new uses.
Code size changes per suspend check:
- x86/x86-64: +3B (CMP r/m32, imm8 -> TST r/m32, imm32)
- arm: none (CMP -> TST, both 32-bit with high register)
- arm64: +4B (CBNZ/CBZ -> TST+BNE/BEQ)
Note: Using implicit suspend checks on arm64 would sidestep
this code size increase entirely.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: If5b0be0183efba3f397596b22e03a8b7afb87f85
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 933e270..775bfcf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1994,12 +1994,12 @@
Register temp = temps.AcquireW();
__ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
- static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ Tst(temp, Thread::SuspendOrCheckpointRequestFlags());
if (successor == nullptr) {
- __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ B(ne, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
} else {
- __ Cbz(temp, codegen_->GetLabelOf(successor));
+ __ B(eq, codegen_->GetLabelOf(successor));
__ B(slow_path->GetEntryLabel());
// slow_path will return to GetLabelOf(successor).
}