x86/x86-64: Fix BoundsCheck slow path clobbering EDX/RDX.

Test: New run-test 731-bounds-check-slow-path.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing --interpreter --jit --jvm
Bug: 227613778
Bug: 227365246
Bug: 216608614
Bug: 216629762
Change-Id: Ia5fd372128f4c5d40a482491b7f9a4b8772f579c
diff --git a/TEST_MAPPING b/TEST_MAPPING
index ad8df10..827a29f 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -1187,6 +1187,9 @@
       "name": "art-run-test-730-checker-inlining-super[com.google.android.art.apex]"
     },
     {
+      "name": "art-run-test-731-bounds-check-slow-path[com.google.android.art.apex]"
+    },
+    {
       "name": "art-run-test-805-TooDeepClassInstanceOf[com.google.android.art.apex]"
     },
     {
@@ -2479,6 +2482,9 @@
       "name": "art-run-test-730-checker-inlining-super"
     },
     {
+      "name": "art-run-test-731-bounds-check-slow-path"
+    },
+    {
       "name": "art-run-test-805-TooDeepClassInstanceOf"
     },
     {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4025684..8c6b802 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -150,41 +150,62 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
-    // We're moving two locations to locations that could overlap, so we need a parallel
-    // move resolver.
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
-      SaveLiveRegisters(codegen, instruction_->GetLocations());
+      SaveLiveRegisters(codegen, locations);
     }
 
-    // Are we using an array length from memory?
-    HInstruction* array_length = instruction_->InputAt(1);
+    Location index_loc = locations->InAt(0);
     Location length_loc = locations->InAt(1);
     InvokeRuntimeCallingConvention calling_convention;
-    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
-      // Load the array length into our temporary.
-      HArrayLength* length = array_length->AsArrayLength();
-      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
+    Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+    Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+
+    // Are we using an array length from memory?
+    if (!length_loc.IsValid()) {
+      DCHECK(instruction_->InputAt(1)->IsArrayLength());
+      HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
+      DCHECK(array_length->IsEmittedAtUseSite());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
       Location array_loc = array_length->GetLocations()->InAt(0);
-      Address array_len(array_loc.AsRegister<Register>(), len_offset);
-      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
-      // Check for conflicts with index.
-      if (length_loc.Equals(locations->InAt(0))) {
-        // We know we aren't using parameter 2.
-        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
+      if (!index_loc.Equals(length_arg)) {
+        // The index is not clobbered by loading the length directly to `length_arg`.
+        __ movl(length_arg.AsRegister<Register>(),
+                Address(array_loc.AsRegister<Register>(), len_offset));
+        x86_codegen->Move32(index_arg, index_loc);
+      } else if (!array_loc.Equals(index_arg)) {
+        // The array reference is not clobbered by the index move.
+        x86_codegen->Move32(index_arg, index_loc);
+        __ movl(length_arg.AsRegister<Register>(),
+                Address(array_loc.AsRegister<Register>(), len_offset));
+      } else {
+        // We do not have a temporary we could use, so swap the registers using the
+        // parallel move resolver and replace the array with the length afterwards.
+        codegen->EmitParallelMoves(
+            index_loc,
+            index_arg,
+            DataType::Type::kInt32,
+            array_loc,
+            length_arg,
+            DataType::Type::kReference);
+        __ movl(length_arg.AsRegister<Register>(),
+                Address(length_arg.AsRegister<Register>(), len_offset));
       }
-      __ movl(length_loc.AsRegister<Register>(), array_len);
-      if (mirror::kUseStringCompression && length->IsStringLength()) {
-        __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
+      if (mirror::kUseStringCompression && array_length->IsStringLength()) {
+        __ shrl(length_arg.AsRegister<Register>(), Immediate(1));
       }
+    } else {
+      // We're moving two locations to locations that could overlap,
+      // so we need a parallel move resolver.
+      codegen->EmitParallelMoves(
+          index_loc,
+          index_arg,
+          DataType::Type::kInt32,
+          length_loc,
+          length_arg,
+          DataType::Type::kInt32);
     }
-    x86_codegen->EmitParallelMoves(
-        locations->InAt(0),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        DataType::Type::kInt32,
-        length_loc,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        DataType::Type::kInt32);
+
     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
         ? kQuickThrowStringBounds
         : kQuickThrowArrayBounds;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 8c1a533..d919fa7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -203,39 +203,54 @@
     __ Bind(GetEntryLabel());
     if (instruction_->CanThrowIntoCatchBlock()) {
       // Live registers will be restored in the catch block if caught.
-      SaveLiveRegisters(codegen, instruction_->GetLocations());
-    }
-    // Are we using an array length from memory?
-    HInstruction* array_length = instruction_->InputAt(1);
-    Location length_loc = locations->InAt(1);
-    InvokeRuntimeCallingConvention calling_convention;
-    if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
-      // Load the array length into our temporary.
-      HArrayLength* length = array_length->AsArrayLength();
-      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
-      Location array_loc = array_length->GetLocations()->InAt(0);
-      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
-      length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
-      // Check for conflicts with index.
-      if (length_loc.Equals(locations->InAt(0))) {
-        // We know we aren't using parameter 2.
-        length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
-      }
-      __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
-      if (mirror::kUseStringCompression && length->IsStringLength()) {
-        __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
-      }
+      SaveLiveRegisters(codegen, locations);
     }
 
-    // We're moving two locations to locations that could overlap, so we need a parallel
-    // move resolver.
-    codegen->EmitParallelMoves(
-        locations->InAt(0),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        DataType::Type::kInt32,
-        length_loc,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        DataType::Type::kInt32);
+    Location index_loc = locations->InAt(0);
+    Location length_loc = locations->InAt(1);
+    InvokeRuntimeCallingConvention calling_convention;
+    Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+    Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
+
+    // Are we using an array length from memory?
+    if (!length_loc.IsValid()) {
+      DCHECK(instruction_->InputAt(1)->IsArrayLength());
+      HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
+      DCHECK(array_length->IsEmittedAtUseSite());
+      uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
+      Location array_loc = array_length->GetLocations()->InAt(0);
+      Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
+      if (!index_loc.Equals(length_arg)) {
+        // The index is not clobbered by loading the length directly to `length_arg`.
+        __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
+        x86_64_codegen->Move(index_arg, index_loc);
+      } else if (!array_loc.Equals(index_arg)) {
+        // The array reference is not clobbered by the index move.
+        x86_64_codegen->Move(index_arg, index_loc);
+        __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
+      } else {
+        // Load the array length into `TMP`.
+        DCHECK(codegen->IsBlockedCoreRegister(TMP));
+        __ movl(CpuRegister(TMP), array_len);
+        // Single move to CPU register does not clobber `TMP`.
+        x86_64_codegen->Move(index_arg, index_loc);
+        __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
+      }
+      if (mirror::kUseStringCompression && array_length->IsStringLength()) {
+        __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
+      }
+    } else {
+      // We're moving two locations to locations that could overlap,
+      // so we need a parallel move resolver.
+      codegen->EmitParallelMoves(
+          index_loc,
+          index_arg,
+          DataType::Type::kInt32,
+          length_loc,
+          length_arg,
+          DataType::Type::kInt32);
+    }
+
     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
         ? kQuickThrowStringBounds
         : kQuickThrowArrayBounds;
diff --git a/test/731-bounds-check-slow-path/Android.bp b/test/731-bounds-check-slow-path/Android.bp
new file mode 100644
index 0000000..bc9e84b
--- /dev/null
+++ b/test/731-bounds-check-slow-path/Android.bp
@@ -0,0 +1,40 @@
+// Generated by `regen-test-files`. Do not edit manually.
+
+// Build rules for ART run-test `731-bounds-check-slow-path`.
+
+package {
+    // See: http://go/android-license-faq
+    // A large-scale-change added 'default_applicable_licenses' to import
+    // all of the 'license_kinds' from "art_license"
+    // to get the below license kinds:
+    //   SPDX-license-identifier-Apache-2.0
+    default_applicable_licenses: ["art_license"],
+}
+
+// Test's Dex code.
+java_test {
+    name: "art-run-test-731-bounds-check-slow-path",
+    defaults: ["art-run-test-defaults"],
+    test_config_template: ":art-run-test-target-template",
+    srcs: ["src/**/*.java"],
+    data: [
+        ":art-run-test-731-bounds-check-slow-path-expected-stdout",
+        ":art-run-test-731-bounds-check-slow-path-expected-stderr",
+    ],
+}
+
+// Test's expected standard output.
+genrule {
+    name: "art-run-test-731-bounds-check-slow-path-expected-stdout",
+    out: ["art-run-test-731-bounds-check-slow-path-expected-stdout.txt"],
+    srcs: ["expected-stdout.txt"],
+    cmd: "cp -f $(in) $(out)",
+}
+
+// Test's expected standard error.
+genrule {
+    name: "art-run-test-731-bounds-check-slow-path-expected-stderr",
+    out: ["art-run-test-731-bounds-check-slow-path-expected-stderr.txt"],
+    srcs: ["expected-stderr.txt"],
+    cmd: "cp -f $(in) $(out)",
+}
diff --git a/test/731-bounds-check-slow-path/expected-stderr.txt b/test/731-bounds-check-slow-path/expected-stderr.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/731-bounds-check-slow-path/expected-stderr.txt
diff --git a/test/731-bounds-check-slow-path/expected-stdout.txt b/test/731-bounds-check-slow-path/expected-stdout.txt
new file mode 100644
index 0000000..f3ed3fd
--- /dev/null
+++ b/test/731-bounds-check-slow-path/expected-stdout.txt
@@ -0,0 +1,3 @@
+i17 i18 b = 297,33580744,0
+l i19 i20 = 146,172,60
+i16 b i17 = -848052580,0,401
diff --git a/test/731-bounds-check-slow-path/info.txt b/test/731-bounds-check-slow-path/info.txt
new file mode 100644
index 0000000..0129f67
--- /dev/null
+++ b/test/731-bounds-check-slow-path/info.txt
@@ -0,0 +1,3 @@
+Regression tests for x86-64 bug in bounds check slow path that resulted
+in a live register being clobbered when throwing AIOOBE. The value must
+be correctly preserved for a catch handler in the same method.
diff --git a/test/731-bounds-check-slow-path/src/Main.java b/test/731-bounds-check-slow-path/src/Main.java
new file mode 100644
index 0000000..76b5723
--- /dev/null
+++ b/test/731-bounds-check-slow-path/src/Main.java
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static void main(String args[]) {
+        Test227365246 test227365246 = new Test227365246();
+        test227365246.$noinline$mainTest(args);
+
+        Test216608614 test216608614 = new Test216608614();
+        test216608614.$noinline$mainTest(args);
+
+        Test216629762 test216629762 = new Test216629762();
+        test216629762.$noinline$mainTest(args);
+    }
+}
+
+class Test227365246 {
+    int N = 400;
+    int iFld;
+
+    void $noinline$mainTest(String[] strArr1) {
+        int i17, i18 = 5788, i19, i21, i22 = 127, i23;
+        byte[] byArr = new byte[N];
+        for (i17 = 14; 297 > i17; ++i17)
+            for (int ax$2 = 151430; ax$2 < 235417; ax$2 += 2) {}
+        try {
+            for (i19 = 4; 179 > i19; ++i19) {
+                i18 *= i18;
+                for (i21 = 1; i21 < 58; i21++)
+                    for (i23 = i21; 1 + 400 > i23; i23++) {
+                        byArr[i23] -= i22;
+                        i18 += i23;
+                        switch (i19 % 5) {
+                            case 107:
+                                i19 >>= iFld;
+                        }
+                    }
+            }
+        } catch (ArrayIndexOutOfBoundsException exc1) {
+        }
+        System.out.println("i17 i18 b = " + i17 + "," + i18 + "," + 0);
+    }
+}
+
+class Test216608614 {
+    int N = 400;
+    long lFld;
+    double dFld;
+    int iArrFld[]=new int[N];
+    void $noinline$mainTest(String[] strArr1) {
+        // Note: The original bug report started with `l=-1213929899L` but this took
+        // too long when running with interpreter without JIT and we want to allow
+        // this test to run for all configurations. Starting with `l=-1000000L` was
+        // enough to allow JIT to compile the method for OSR and trigger the bug on host.
+        long l=-1000000L;
+        int i19= 46, i20=100, i21, i22=13, i25;
+        try {
+            do
+                for (; i19 < 172; ++i19)
+                    lFld = (long) dFld;
+            while (++l < 146);
+            for (i21 = 8;; ++i21)
+                for (i25 = 1; i25 < 2; i25++) {
+                    i20 = i22 % 1650388388;
+                    i20 = iArrFld[i21];
+                    i22 = 60;
+                }
+        } catch (ArrayIndexOutOfBoundsException exc1) {
+        } finally {
+        }
+        System.out.println("l i19 i20 = " + l + "," + i19 + "," + i20);
+    }
+}
+
+class Test216629762 {
+    static int N = 400;
+    int iFld=29275;
+    volatile double dFld;
+    static long lArrFld[][]=new long[N][N];
+
+    void $noinline$mainTest(String[] strArr1) {
+        int i8, i10=181, i11, i12=-57574, i13=69, i15= 6, i16= 186, i17= 227;
+        try {
+            for (i11 = 6; i11 < 278 + 400; ++i11)
+                i12 *= iFld;
+            for (;; i13++) {
+                i10 /= i10;
+                i16 += i15;
+                lArrFld[i13][i15] >>= 31616;
+                for (i17 = 1; i17 <  1 + 400; i17++)
+                dFld += dFld;
+            }
+        }
+        catch (ArrayIndexOutOfBoundsException exc2) {
+            i16 += i12;
+        }
+        System.out.println("i16 b i17 = " + i16 + "," + 0  + "," + i17);
+    }
+}