Always access Thread state and flags as 32-bit location.
Rewrite access to Thread's state and flags to use 32-bit
atomic operations. Avoid `volatile` accesses that prevent
compiler optimizations.
Change `ThreadState` and `ThreadFlag` to `enum class`es.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticNormal 28.162 35.323 (+25.43%)
NativeDowncallStaticNormal6 26.447 32.951 (+24.59%)
NativeDowncallStaticNormalRefs6
NativeDowncallVirtualNormal 27.972 35.027 (+25.22%)
NativeDowncallVirtualNormal6 26.096 32.131 (+23.13%)
NativeDowncallVirtualNormalRefs6 25.922 31.873 (+22.95%)
linux-x64 before after
NativeDowncallStaticNormal 26.987 34.380 (+27.40%)
NativeDowncallStaticNormal6 25.424 31.096 (+22.31%)
NativeDowncallStaticNormalRefs6 25.086 30.602 (+21.99%)
NativeDowncallVirtualNormal 26.812 33.234 (+23.95%)
NativeDowncallVirtualNormal6 25.086 30.617 (+22.05%)
NativeDowncallVirtualNormalRefs6 25.086 30.602 (+21.99%)
linux-armv7 before after
NativeDowncallStaticNormal 7.2394 7.9523 (+9.848%)
NativeDowncallStaticNormal6 6.8527 7.4888 (+9.283%)
NativeDowncallStaticNormalRefs6 6.3976 6.9444 (+8.547%)
NativeDowncallVirtualNormal 7.2081 7.9130 (+9.779%)
NativeDowncallVirtualNormal6 6.8527 7.4888 (+9.283%)
NativeDowncallVirtualNormalRefs6 6.3168 6.8527 (+8.483%)
linux-armv8 before after
NativeDowncallStaticNormal 7.0389 7.5973 (+7.933%)
NativeDowncallStaticNormal6 6.8527 7.3783 (+7.670%)
NativeDowncallStaticNormalRefs6 6.2924 6.8226 (+8.427%)
NativeDowncallVirtualNormal 6.8527 7.3783 (+7.670%)
NativeDowncallVirtualNormal6 6.5604 7.0423 (+7.344%)
NativeDowncallVirtualNormalRefs6 6.1408 6.5329 (+6.386%)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing --interpreter
Bug: 172332525
Bug: 143299880
Change-Id: Ib55d457ad8f5d9e1159b681dfd279d1f9cfb2af7
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 2f96d44..d63ae17 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -536,9 +536,9 @@
static void expectValidThreadState() {
// Normal JNI always transitions to "Native". Other JNIs stay in the "Runnable" state.
if (IsCurrentJniNormal()) {
- EXPECT_EQ(kNative, Thread::Current()->GetState());
+ EXPECT_EQ(ThreadState::kNative, Thread::Current()->GetState());
} else {
- EXPECT_EQ(kRunnable, Thread::Current()->GetState());
+ EXPECT_EQ(ThreadState::kRunnable, Thread::Current()->GetState());
}
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cf2571..bcb5ac5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1994,7 +1994,8 @@
UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
Register temp = temps.AcquireW();
- __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
+ __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
if (successor == nullptr) {
__ Cbnz(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 62c285d..aa06c5a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7181,7 +7181,8 @@
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
GetAssembler()->LoadFromOffset(
- kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+ kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
if (successor == nullptr) {
__ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
__ Bind(slow_path->GetReturnLabel());
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 11c15d6..758a471 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6699,7 +6699,8 @@
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ fs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e601b40..c402e83 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6016,7 +6016,8 @@
DCHECK_EQ(slow_path->GetSuccessor(), successor);
}
- __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ gs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
/* no_rip= */ true),
Immediate(0));
if (successor == nullptr) {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 16abf9d..6d7a953 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1043,7 +1043,7 @@
// All signature polymorphic methods are native.
DCHECK(method == nullptr || !method->IsSignaturePolymorphic());
// Go to native so that we don't block GC during compilation.
- ScopedThreadSuspension sts(soa.Self(), kNative);
+ ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
// Try to compile a fully intrinsified implementation.
if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
DCHECK(compiler_options.IsBootImage());
@@ -1159,7 +1159,7 @@
compiling_class);
CodeVectorAllocator code_allocator(&allocator);
// Go to native so that we don't block GC during compilation.
- ScopedThreadSuspension sts(soa.Self(), kNative);
+ ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
std::unique_ptr<CodeGenerator> codegen(
TryCompileIntrinsic(&allocator,
&arena_stack,
@@ -1328,7 +1328,7 @@
compiling_class);
// Go to native so that we don't block GC during compilation.
- ScopedThreadSuspension sts(self, kNative);
+ ScopedThreadSuspension sts(self, ThreadState::kNative);
codegen.reset(
TryCompile(&allocator,
&arena_stack,
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index bd8aa083..2b3c2dd 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -1053,11 +1053,12 @@
void ArmVIXLJNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
vixl32::Register scratch = temps.Acquire();
- asm_.LoadFromOffset(kLoadUnsignedHalfword,
+ asm_.LoadFromOffset(kLoadWord,
scratch,
tr,
Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
___ Cmp(scratch, 0);
___ BPreferNear(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
// TODO: think about using CBNZ here.
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 561cbbd..e2d29fd 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -892,7 +892,8 @@
void Arm64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
Register scratch = temps.AcquireW();
- ___ Ldrh(scratch, MEM_OP(reg_x(TR), Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value()));
+ ___ Ldr(scratch, MEM_OP(reg_x(TR), Thread::ThreadFlagsOffset<kArm64PointerSize>().Int32Value()));
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
}
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 7dff279..904cca4 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -590,7 +590,8 @@
}
void X86JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
- __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()), Immediate(0));
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ fs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>()), Immediate(0));
__ j(kNotEqual, X86JNIMacroLabel::Cast(label)->AsX86());
}
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 2da1b47..2fb2797 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -672,7 +672,8 @@
}
void X86_64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
- __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
+ static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+ __ gs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>(), true),
Immediate(0));
__ j(kNotEqual, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
}