SVE: Extract Intermediate Address for SVE Vector Memory Operations

This patch introduces an optimization that extracts and factorizes
the "base + offset" common part for the address computation when
performing an SVE vector memory operation (VecStore/VecLoad).

With SVE enabled by default:

Test: ./art/test.py --simulate-arm64 --run-test --optimizing \
(With the VIXL simulator patch)

Test: ./art/test.py --target --64 --optimizing \
(On Arm FVP with SVE - See steps in test/README.arm_fvp.md)

Test: 527-checker-array-access, 655-checker-simd-arm.

Change-Id: Icd49e57d5550d1530445a94e5d49e217a999d06d
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index e1a4718..7401f0d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6909,9 +6909,7 @@
   Register base = InputRegisterAt(instruction, 0);
   Location index = locations->InAt(1);
 
-  // TODO: Support intermediate address sharing for SVE accesses.
   DCHECK(!instruction->InputAt(1)->IsIntermediateAddressIndex());
-  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
   DCHECK(!index.IsConstant());
 
   uint32_t offset = is_string_char_at
@@ -6919,6 +6917,10 @@
       : mirror::Array::DataOffset(size).Uint32Value();
   size_t shift = ComponentSizeShiftWidth(size);
 
+  if (instruction->InputAt(0)->IsIntermediateAddress()) {
+    return SVEMemOperand(base.X(), XRegisterFrom(index), LSL, shift);
+  }
+
   *scratch = temps_scope->AcquireSameSizeAs(base);
   __ Add(*scratch, base, offset);
   return SVEMemOperand(scratch->X(), XRegisterFrom(index), LSL, shift);
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index ff0859b..a6ec020 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -277,18 +277,30 @@
 }
 
 void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
-  // TODO: Extract regular HIntermediateAddress.
   if (!instruction->IsPredicated() && !instruction->IsStringCharAt() &&
       TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
     RecordSimplification();
+  } else if (instruction->IsPredicated()) {
+    size_t size = DataType::Size(instruction->GetPackedType());
+    size_t offset = mirror::Array::DataOffset(size).Uint32Value();
+    if (TryExtractArrayAccessAddress(
+            instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
+      RecordSimplification();
+    }
   }
 }
 
 void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
-  // TODO: Extract regular HIntermediateAddress.
   if (!instruction->IsPredicated() &&
       TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
     RecordSimplification();
+  } else if (instruction->IsPredicated()) {
+    size_t size = DataType::Size(instruction->GetPackedType());
+    size_t offset = mirror::Array::DataOffset(size).Uint32Value();
+    if (TryExtractArrayAccessAddress(
+            instruction, instruction->GetArray(), instruction->GetIndex(), offset)) {
+      RecordSimplification();
+    }
   }
 }
 
diff --git a/test/527-checker-array-access-simd/src/Main.java b/test/527-checker-array-access-simd/src/Main.java
index 173165a..a08b1f0 100644
--- a/test/527-checker-array-access-simd/src/Main.java
+++ b/test/527-checker-array-access-simd/src/Main.java
@@ -59,9 +59,11 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr1:i\d+>>      IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr1>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>,<<LoopP>>]
-  ///     CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr2:i\d+>>      IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr2>>,<<Index>>,<<Add>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -90,10 +92,10 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr:i\d+>>       IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>,<<LoopP>>]
-  ///     CHECK-NOT:                                    IntermediateAddress
-  ///     CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>,<<LoopP>>]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr>>,<<Index>>,<<Add>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -116,7 +118,6 @@
   //
   //      IntermediateAddressIndex is not supported for SVE.
   ///     CHECK-NOT:                                    IntermediateAddressIndex
-  ///     CHECK-NOT:                                    IntermediateAddress
   //
   /// CHECK-ELSE:
   //
@@ -168,9 +169,11 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr1:i\d+>>      IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr1>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>,<<LoopP>>]
-  ///     CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr2:i\d+>>      IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr2>>,<<Index>>,<<Add>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -199,10 +202,10 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr:i\d+>>       IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Add:d\d+>>           VecAdd [<<Load>>,<<Repl>>,<<LoopP>>]
-  ///     CHECK-NOT:                                    IntermediateAddress
-  ///     CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Add>>,<<LoopP>>]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr>>,<<Index>>,<<Add>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -224,7 +227,6 @@
   //
   //      IntermediateAddressIndex is not supported for SVE.
   ///     CHECK-NOT:                                    IntermediateAddressIndex
-  ///     CHECK-NOT:                                    IntermediateAddress
   //
   /// CHECK-ELSE:
   //
@@ -275,7 +277,8 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:                                    VecStore [<<Array>>,<<Index>>,<<Repl>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr:i\d+>>       IntermediateAddress [<<Array>>,{{i\d+}}]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr>>,<<Index>>,<<Repl>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -327,9 +330,11 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array1>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr1:i\d+>>      IntermediateAddress [<<Array1>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr1>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Cnv:d\d+>>           VecCnv [<<Load>>,<<LoopP>>]
-  ///     CHECK-DAG:                                    VecStore [<<Array2>>,<<Index>>,<<Cnv>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr2:i\d+>>      IntermediateAddress [<<Array2>>,{{i\d+}}]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr2>>,<<Index>>,<<Cnv>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -356,10 +361,11 @@
   ///     CHECK-DAG:             <<LoopP:j\d+>>         VecPredWhile
   ///     CHECK-DAG:             <<Index:i\d+>>         Phi
   ///     CHECK-DAG:                                    If
-  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<Array1>>,<<Index>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr1:i\d+>>      IntermediateAddress [<<Array1>>,{{i\d+}}]
+  ///     CHECK-DAG:             <<Load:d\d+>>          VecLoad [<<IntAddr1>>,<<Index>>,<<LoopP>>]
   ///     CHECK-DAG:             <<Cnv:d\d+>>           VecCnv [<<Load>>,<<LoopP>>]
-  ///     CHECK-NOT:                                    IntermediateAddress
-  ///     CHECK-DAG:                                    VecStore [<<Array2>>,<<Index>>,<<Cnv>>,<<LoopP>>]
+  ///     CHECK-DAG:             <<IntAddr2:i\d+>>      IntermediateAddress [<<Array2>>,{{i\d+}}]
+  ///     CHECK-DAG:                                    VecStore [<<IntAddr2>>,<<Index>>,<<Cnv>>,<<LoopP>>]
   //
   /// CHECK-ELSE:
   //
@@ -381,7 +387,6 @@
   //
   //      IntermediateAddressIndex is not supported for SVE.
   ///     CHECK-NOT:                                    IntermediateAddressIndex
-  ///     CHECK-NOT:                                    IntermediateAddress
   //
   /// CHECK-ELSE:
   //
diff --git a/test/655-checker-simd-arm-opt/expected-stdout.txt b/test/655-checker-simd-arm-opt/expected-stdout.txt
index b0aad4d..fd4f3bd 100644
--- a/test/655-checker-simd-arm-opt/expected-stdout.txt
+++ b/test/655-checker-simd-arm-opt/expected-stdout.txt
@@ -1 +1,2 @@
-passed
+encodableConstants passed
+SVEIntermediateAddress passed
diff --git a/test/655-checker-simd-arm-opt/src/Main.java b/test/655-checker-simd-arm-opt/src/Main.java
index 980593d..5412aab 100644
--- a/test/655-checker-simd-arm-opt/src/Main.java
+++ b/test/655-checker-simd-arm-opt/src/Main.java
@@ -18,6 +18,7 @@
  * Checker test for arm and arm64 simd optimizations.
  */
 public class Main {
+  static int[] arr;
 
   private static void expectEquals(int expected, int result) {
     if (expected != result) {
@@ -97,6 +98,33 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.SVEIntermediateAddress(int) loop_optimization (after)
+  /// CHECK-DAG: VecLoad  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.SVEIntermediateAddress(int) instruction_simplifier_arm64 (after)
+  /// CHECK-IF:     hasIsaFeature("sve")
+  ///     CHECK-DAG: <<IntAddr1:i\d+>> IntermediateAddress [{{l\d+}},{{i\d+}}]            loop:<<Loop:B\d+>>  outer_loop:none
+  ///     CHECK-DAG:                   VecLoad [<<IntAddr1>>,{{i\d+}},{{j\d+}}]           loop:<<Loop>>       outer_loop:none
+  ///     CHECK-DAG:                   VecAdd                                             loop:<<Loop>>       outer_loop:none
+  ///     CHECK-DAG: <<IntAddr2:i\d+>> IntermediateAddress [{{l\d+}},{{i\d+}}]            loop:<<Loop>>       outer_loop:none
+  ///     CHECK-DAG:                   VecStore [<<IntAddr2>>,{{i\d+}},{{d\d+}},{{j\d+}}] loop:<<Loop>>       outer_loop:none
+  /// CHECK-FI:
+  //
+  /// CHECK-START-ARM64: void Main.SVEIntermediateAddress(int) GVN$after_arch (after)
+  /// CHECK-IF:     hasIsaFeature("sve")
+  ///     CHECK-DAG: <<IntAddr:i\d+>>  IntermediateAddress [{{l\d+}},{{i\d+}}]            loop:<<Loop:B\d+>>  outer_loop:none
+  ///     CHECK-DAG:                   VecLoad [<<IntAddr>>,{{i\d+}},{{j\d+}}]            loop:<<Loop>>       outer_loop:none
+  ///     CHECK-DAG:                   VecAdd                                             loop:<<Loop>>       outer_loop:none
+  ///     CHECK-DAG:                   VecStore [<<IntAddr>>,{{i\d+}},{{d\d+}},{{j\d+}}]  loop:<<Loop>>       outer_loop:none
+  /// CHECK-FI:
+  static void SVEIntermediateAddress(int x) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      arr[i] += x;
+    }
+  }
+
   private static int sumArray(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
     int sum = 0;
     for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -107,7 +135,7 @@
 
   public static final int ARRAY_SIZE = 128;
 
-  public static void main(String[] args) {
+  public static void checkEncodableConstants() {
     byte[] b = new byte[ARRAY_SIZE];
     short[] s = new short[ARRAY_SIZE];
     char[] c = new char[ARRAY_SIZE];
@@ -119,6 +147,28 @@
     encodableConstants(b, s, c, a, l, f, d);
     expectEquals(32640, sumArray(b, s, c, a, l, f, d));
 
-    System.out.println("passed");
+    System.out.println("encodableConstants passed");
+  }
+
+  public static void checkSVEIntermediateAddress() {
+    arr = new int[ARRAY_SIZE];
+
+    // Setup.
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      arr[i] = i;
+    }
+
+    // Arithmetic operations.
+    SVEIntermediateAddress(2);
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      expectEquals(i + 2, arr[i]);
+    }
+
+    System.out.println("SVEIntermediateAddress passed");
+  }
+
+  public static void main(String[] args) {
+    checkEncodableConstants();
+    checkSVEIntermediateAddress();
   }
 }