ARM64: Encode constants when it is possible. Small optimization which improves HVecReplicateScalar by encoding immediates directly into NEON instruction when possible instead of generating constant in GPR and transferring it into NEON register. Test: test-art-target, test-art-host. Change-Id: I2113bbd98c0dc8433d2b7048921b9ed7c35ef1c5

commit: 8dfe746dc969b61416a2906bea8c176427457efc [log] [tgz]
author: Artem Serov <artem.serov@linaro.org> Thu Jun 01 14:28:48 2017 +0100
committer: Nicolas Geoffray <ngeoffray@google.com> Thu Jun 08 13:50:28 2017 +0000
tree: 3b5d736e7ead08f176514622684f8db7f0b7e40a
parent: a215c5b2bac883a57e1d35e5490241609ad22e5f [diff]
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index a41adca..f422b9f 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc

@@ -22,6 +22,8 @@
 namespace art {
 namespace arm64 {
 
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::Arm64CanEncodeConstantAsImmediate;
 using helpers::DRegisterFrom;
 using helpers::VRegisterFrom;
 using helpers::HeapOperand;
@@ -34,6 +36,7 @@
 
 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -41,13 +44,19 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      if (input->IsConstant() &&
+          Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
+        locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(0, Location::RequiresFpuRegister());
+        locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -57,33 +66,58 @@
 
 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location src_loc = locations->InAt(0);
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimInt:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimLong:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0)));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V2D(), XRegisterFrom(src_loc));
+      }
       break;
     case Primitive::kPrimFloat:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
+      } else {
+        __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
+      }
       break;
     case Primitive::kPrimDouble:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
+      } else {
+        __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";

diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 721f74e..e73fd7d 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h

@@ -234,9 +234,20 @@
   }
 }
 
-inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
-      << constant->DebugName();
+inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
+  if (instr->IsVecReplicateScalar()) {
+    if (constant->IsLongConstant()) {
+      return false;
+    } else if (constant->IsFloatConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
+    } else if (constant->IsDoubleConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
+    }
+    return IsUint<8>(value);
+  }
 
   // For single uses we let VIXL handle the constant generation since it will
   // use registers that are not managed by the register allocator (wip0, wip1).
@@ -249,8 +260,6 @@
     return true;
   }
 
-  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
-
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
     return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
@@ -276,7 +285,7 @@
 inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
                                                         HInstruction* instr) {
   if (constant->IsConstant()
-      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+      && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant->AsConstant());
   }
 

diff --git a/test/655-checker-simd-arm-opt/expected.txt b/test/655-checker-simd-arm-opt/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/expected.txt

@@ -0,0 +1 @@
+passed

diff --git a/test/655-checker-simd-arm-opt/info.txt b/test/655-checker-simd-arm-opt/info.txt
new file mode 100644
index 0000000..198cc95
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/info.txt

@@ -0,0 +1 @@
+Checker test for arm and arm64 simd optimizations.

diff --git a/test/655-checker-simd-arm-opt/src/Main.java b/test/655-checker-simd-arm-opt/src/Main.java
new file mode 100644
index 0000000..7b61dd7
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/src/Main.java

@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Checker test for arm and arm64 simd optimizations.
+ */
+public class Main {
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.encodableConstants(byte[], short[], char[], int[], long[], float[], double[]) disassembly (after)
+  /// CHECK-DAG: <<C1:i\d+>>   IntConstant 1
+  /// CHECK-DAG: <<C2:i\d+>>   IntConstant 2
+  /// CHECK-DAG: <<C3:i\d+>>   IntConstant 3
+  /// CHECK-DAG: <<C4:i\d+>>   IntConstant 4
+  /// CHECK-DAG: <<L5:j\d+>>   LongConstant 5
+  /// CHECK-DAG: <<F2:f\d+>>   FloatConstant 2
+  /// CHECK-DAG: <<D20:d\d+>>  DoubleConstant 20
+  //
+  /// CHECK-DAG:               VecReplicateScalar [<<C1>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.16b, #0x1
+  /// CHECK-DAG:               VecReplicateScalar [<<C2>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x2, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C3>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x3, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C4>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.4s, #0x4, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<L5>>]
+  /// CHECK-DAG:               dup v{{[0-9]+}}.2d, x{{[0-9]+}}
+  /// CHECK-DAG:               VecReplicateScalar [<<F2>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.4s, #0x0
+  /// CHECK-DAG:               VecReplicateScalar [<<D20>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.2d, #0x34
+  private static void encodableConstants(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      b[i] += 1;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      s[i] += 2;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      c[i] += 3;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      a[i] += 4;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      l[i] += 5;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      f[i] += 2.0f;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      d[i] += 20.0;
+    }
+  }
+
+  private static int sumArray(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    int sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += b[i] + s[i] + c[i] + a[i] + l[i] + f[i] + d[i];
+    }
+    return sum;
+  }
+
+  public static final int ARRAY_SIZE = 100;
+
+  public static void main(String[] args) {
+    byte[] b = new byte[ARRAY_SIZE];
+    short[] s = new short[ARRAY_SIZE];
+    char[] c = new char[ARRAY_SIZE];
+    int[] a = new int[ARRAY_SIZE];
+    long[] l = new long[ARRAY_SIZE];
+    float[] f = new float[ARRAY_SIZE];
+    double[] d = new double[ARRAY_SIZE];
+
+    encodableConstants(b, s, c, a, l, f, d);
+    expectEquals(3700, sumArray(b, s, c, a, l, f, d));
+
+    System.out.println("passed");
+  }
+}
commit	8dfe746dc969b61416a2906bea8c176427457efc	[log] [tgz]
author	Artem Serov <artem.serov@linaro.org>	Thu Jun 01 14:28:48 2017 +0100
committer	Nicolas Geoffray <ngeoffray@google.com>	Thu Jun 08 13:50:28 2017 +0000
tree	3b5d736e7ead08f176514622684f8db7f0b7e40a
parent	a215c5b2bac883a57e1d35e5490241609ad22e5f [diff]