ARM: Use r4 for stack overflow check to reduce code size.

The code savings are 2 bytes per stack overflow check but
this can be rounded up or down to the kArmAlignment (8).
Current testing shows the boot image size difference for
aosp_taimen-userdebug arm boot*.oat as
  - before: 17939352
  - after: 17881764 (-56KiB, -0.3%)

Test: Pixel 2 XL boots.
Test: testrunner.py --target -t 018-stack-overflow
Bug: 71627785
Change-Id: Icb732b59e9e681b29790e7e07de2710da33245b1
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 577fe00..6cbde72 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2490,8 +2490,12 @@
   }
 
   if (!skip_overflow_check) {
-    UseScratchRegisterScope temps(GetVIXLAssembler());
-    vixl32::Register temp = temps.Acquire();
+    // Using r4 instead of IP saves 2 bytes. Start by asserting that r4 is available here.
+    for (vixl32::Register reg : kParameterCoreRegistersVIXL) {
+      DCHECK(!reg.Is(r4));
+    }
+    DCHECK(!kCoreCalleeSaves.Includes(r4));
+    vixl32::Register temp = r4;
     __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm)));
     // The load must immediately precede RecordPcInfo.
     ExactAssemblyScope aas(GetVIXLAssembler(),