ARM: Fix CASObject intrinsic slow path.

Avoid clobbering the from-space reference of the old value
by the STREXD result.

Test: testrunner.py --target --32 --optimizing
Bug: 71781600
Bug: 208892843
Change-Id: I14d51f0aa1e26de82be7077ac5597bbeec476dcd
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index fe4c27e..2b49790 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -3446,6 +3446,8 @@
            (type == DataType::Type::kReference && expected.IsRegisterPair()));
     DCHECK(new_value.IsRegister());
     DCHECK(old_value.IsRegister());
+    // Make sure the unmarked old value for reference CAS slow path is not clobbered by STREX.
+    DCHECK(!expected.Contains(LocationFrom(store_result)));
   }
 
   ArmVIXLAssembler* assembler = codegen->GetAssembler();
@@ -3498,12 +3500,14 @@
   EmitStoreExclusive(codegen, type, ptr, store_result, new_value);
   if (strong) {
     // Instruction scheduling: Loading a constant between STREX* and using its result
-    // is essentially free, so prepare the success value here if needed.
-    if (success.IsValid()) {
-      DCHECK(!success.Is(store_result));
+    // is essentially free, so prepare the success value here if needed and possible.
+    if (success.IsValid() && !success.Is(store_result)) {
       __ Mov(success, 1);  // Indicate success if the store succeeds.
     }
     __ Cmp(store_result, 0);
+    if (success.IsValid() && success.Is(store_result)) {
+      __ Mov(LeaveFlags, success, 1);  // Indicate success if the store succeeds.
+    }
     __ B(ne, &loop_head, /*is_far_target=*/ false);
   } else {
     // Weak CAS (VarHandle.CompareAndExchange variants) always indicates success.
@@ -3716,7 +3720,7 @@
             new_value,
             /*old_value=*/ tmp,
             /*old_value_temp=*/ out,
-            /*store_result=*/ tmp,
+            /*store_result=*/ out,
             /*success=*/ out,
             codegen);
     codegen->AddSlowPath(slow_path);
@@ -4877,6 +4881,8 @@
     // The `old_value_temp` is used first for the marked `old_value` and then for the unmarked
     // reloaded old value for subsequent CAS in the slow path.
     vixl32::Register old_value_temp = store_result;
+    // The slow path store result must not clobber `old_value`.
+    vixl32::Register slow_path_store_result = return_success ? RegisterFrom(out) : store_result;
     ReadBarrierCasSlowPathARMVIXL* rb_slow_path =
         new (codegen->GetScopedAllocator()) ReadBarrierCasSlowPathARMVIXL(
             invoke,
@@ -4887,7 +4893,7 @@
             RegisterFrom(new_value),
             RegisterFrom(old_value),
             old_value_temp,
-            store_result,
+            slow_path_store_result,
             success,
             codegen);
     codegen->AddSlowPath(rb_slow_path);