JNI: Fix transition to suspended to be "release".

This fixes a braino in
    https://android-review.googlesource.com/1903370 .

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I39c9677509987c763f03cb782053c9eec260d5c0
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index b06f428..2c1b4be 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -1062,7 +1062,7 @@
   vixl32::Register scratch = AsVIXLRegister(scratch_regs[0].AsArm());
   vixl32::Register scratch2 = AsVIXLRegister(scratch_regs[1].AsArm());
 
-  // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
   vixl32::Label retry;
   ___ Bind(&retry);
   ___ Ldrex(scratch, MemOperand(tr, thread_flags_offset.Int32Value()));
@@ -1070,10 +1070,10 @@
   // If any flags are set, go to the slow path.
   ___ Cmp(scratch, kRunnableStateValue);
   ___ B(ne, ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+  ___ Dmb(DmbOptions::ISH);  // Memory barrier "any-store" for the "release" operation.
   ___ Strex(scratch, scratch2, MemOperand(tr, thread_flags_offset.Int32Value()));
   ___ Cmp(scratch, 0);
   ___ B(ne, &retry);
-  ___ Dmb(DmbOptions::ISH);  // Memory barrier "load-any" for the "acquire" operation.
 
   // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`; `scratch` holds 0 at this point.
   ___ Str(scratch, MemOperand(tr, thread_held_mutex_mutator_lock_offset.Int32Value()));
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 8ae1d04..e84fe04 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -901,16 +901,16 @@
   Register scratch = temps.AcquireW();
   Register scratch2 = temps.AcquireW();
 
-  // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
   vixl::aarch64::Label retry;
   ___ Bind(&retry);
-  static_assert(thread_flags_offset.Int32Value() == 0);  // LDAXR/STXR require exact address.
-  ___ Ldaxr(scratch, MEM_OP(reg_x(TR)));
+  static_assert(thread_flags_offset.Int32Value() == 0);  // LDXR/STLXR require exact address.
+  ___ Ldxr(scratch, MEM_OP(reg_x(TR)));
   ___ Mov(scratch2, kNativeStateValue);
   // If any flags are set, go to the slow path.
   static_assert(kRunnableStateValue == 0u);
   ___ Cbnz(scratch, Arm64JNIMacroLabel::Cast(label)->AsArm64());
-  ___ Stxr(scratch, scratch2, MEM_OP(reg_x(TR)));
+  ___ Stlxr(scratch, scratch2, MEM_OP(reg_x(TR)));
   ___ Cbnz(scratch, &retry);
 
   // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index fc92c30..8be2a32 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -602,7 +602,7 @@
   Register saved_eax = scratch_regs[0].AsX86().AsCpuRegister();
   Register scratch = scratch_regs[1].AsX86().AsCpuRegister();
 
-  // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
   __ movl(saved_eax, EAX);  // Save EAX.
   static_assert(kRunnableStateValue == 0u);
   __ xorl(EAX, EAX);
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 3ddb689..b25d5c7 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -682,7 +682,7 @@
   CpuRegister rax(RAX);  // RAX can be freely clobbered. It does not hold any argument.
   CpuRegister scratch = GetScratchRegister();
 
-  // CAS acquire, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
   static_assert(kRunnableStateValue == 0u);
   __ xorl(rax, rax);
   __ movl(scratch, Immediate(kNativeStateValue));