Always access Thread state and flags as 32-bit location. Rewrite access to Thread's state and flags to use 32-bit atomic operations. Avoid `volatile` accesses that prevent compiler optimizations. Change `ThreadState` and `ThreadFlag` to `enum class`es. Golem results for art-opt-cc (higher is better): linux-ia32 before after NativeDowncallStaticNormal 28.162 35.323 (+25.43%) NativeDowncallStaticNormal6 26.447 32.951 (+24.59%) NativeDowncallStaticNormalRefs6 NativeDowncallVirtualNormal 27.972 35.027 (+25.22%) NativeDowncallVirtualNormal6 26.096 32.131 (+23.13%) NativeDowncallVirtualNormalRefs6 25.922 31.873 (+22.95%) linux-x64 before after NativeDowncallStaticNormal 26.987 34.380 (+27.40%) NativeDowncallStaticNormal6 25.424 31.096 (+22.31%) NativeDowncallStaticNormalRefs6 25.086 30.602 (+21.99%) NativeDowncallVirtualNormal 26.812 33.234 (+23.95%) NativeDowncallVirtualNormal6 25.086 30.617 (+22.05%) NativeDowncallVirtualNormalRefs6 25.086 30.602 (+21.99%) linux-armv7 before after NativeDowncallStaticNormal 7.2394 7.9523 (+9.848%) NativeDowncallStaticNormal6 6.8527 7.4888 (+9.283%) NativeDowncallStaticNormalRefs6 6.3976 6.9444 (+8.547%) NativeDowncallVirtualNormal 7.2081 7.9130 (+9.779%) NativeDowncallVirtualNormal6 6.8527 7.4888 (+9.283%) NativeDowncallVirtualNormalRefs6 6.3168 6.8527 (+8.483%) linux-armv8 before after NativeDowncallStaticNormal 7.0389 7.5973 (+7.933%) NativeDowncallStaticNormal6 6.8527 7.3783 (+7.670%) NativeDowncallStaticNormalRefs6 6.2924 6.8226 (+8.427%) NativeDowncallVirtualNormal 6.8527 7.3783 (+7.670%) NativeDowncallVirtualNormal6 6.5604 7.0423 (+7.344%) NativeDowncallVirtualNormalRefs6 6.1408 6.5329 (+6.386%) Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing --interpreter Bug: 172332525 Bug: 143299880 Change-Id: Ib55d457ad8f5d9e1159b681dfd279d1f9cfb2af7

commit: ddf4fd3c37af160b5a1f7e83212b837f50e13e81 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Mon Nov 22 16:31:57 2021 +0000
committer: Vladimir Marko <vmarko@google.com> Thu Nov 25 09:26:35 2021 +0000
tree: 61f9223f33f3191f6e6416d717a3a13405235413
parent: c3e004d1c8c58c1311beb1bcdd8211f5d4d5a009 [diff]
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2cf2571..bcb5ac5 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -1994,7 +1994,8 @@
   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
   Register temp = temps.AcquireW();
 
-  __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
+  __ Ldr(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
+  static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
   if (successor == nullptr) {
     __ Cbnz(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());

diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 62c285d..aa06c5a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc

@@ -7181,7 +7181,8 @@
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
   GetAssembler()->LoadFromOffset(
-      kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+      kLoadWord, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+  static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
   if (successor == nullptr) {
     __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
     __ Bind(slow_path->GetReturnLabel());

diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 11c15d6..758a471 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc

@@ -6699,7 +6699,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ fs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
+  static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+  __ fs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86PointerSize>().Int32Value()),
                 Immediate(0));
   if (successor == nullptr) {
     __ j(kNotEqual, slow_path->GetEntryLabel());

diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e601b40..c402e83 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc

@@ -6016,7 +6016,8 @@
     DCHECK_EQ(slow_path->GetSuccessor(), successor);
   }
 
-  __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
+  static_assert(static_cast<std::underlying_type_t<ThreadState>>(ThreadState::kRunnable) == 0u);
+  __ gs()->cmpl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
                                   /* no_rip= */ true),
                 Immediate(0));
   if (successor == nullptr) {

diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 16abf9d..6d7a953 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc

@@ -1043,7 +1043,7 @@
     // All signature polymorphic methods are native.
     DCHECK(method == nullptr || !method->IsSignaturePolymorphic());
     // Go to native so that we don't block GC during compilation.
-    ScopedThreadSuspension sts(soa.Self(), kNative);
+    ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
     // Try to compile a fully intrinsified implementation.
     if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
       DCHECK(compiler_options.IsBootImage());
@@ -1159,7 +1159,7 @@
           compiling_class);
       CodeVectorAllocator code_allocator(&allocator);
       // Go to native so that we don't block GC during compilation.
-      ScopedThreadSuspension sts(soa.Self(), kNative);
+      ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
       std::unique_ptr<CodeGenerator> codegen(
           TryCompileIntrinsic(&allocator,
                               &arena_stack,
@@ -1328,7 +1328,7 @@
         compiling_class);
 
     // Go to native so that we don't block GC during compilation.
-    ScopedThreadSuspension sts(self, kNative);
+    ScopedThreadSuspension sts(self, ThreadState::kNative);
     codegen.reset(
         TryCompile(&allocator,
                    &arena_stack,
commit	ddf4fd3c37af160b5a1f7e83212b837f50e13e81	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Mon Nov 22 16:31:57 2021 +0000
committer	Vladimir Marko <vmarko@google.com>	Thu Nov 25 09:26:35 2021 +0000
tree	61f9223f33f3191f6e6416d717a3a13405235413
parent	c3e004d1c8c58c1311beb1bcdd8211f5d4d5a009 [diff]