Merge "ARM64: Encode constants when it is possible."
am: db1a013335

Change-Id: Ibf44b31accb4c38f7737ca7ce5c052c71d2257b0
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index a41adca..f422b9f 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm64 {
 
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::Arm64CanEncodeConstantAsImmediate;
 using helpers::DRegisterFrom;
 using helpers::VRegisterFrom;
 using helpers::HeapOperand;
@@ -34,6 +36,7 @@
 
 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -41,13 +44,19 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      if (input->IsConstant() &&
+          Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
+        locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(0, Location::RequiresFpuRegister());
+        locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -57,33 +66,58 @@
 
 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location src_loc = locations->InAt(0);
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimInt:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimLong:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0)));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V2D(), XRegisterFrom(src_loc));
+      }
       break;
     case Primitive::kPrimFloat:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
+      } else {
+        __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
+      }
       break;
     case Primitive::kPrimDouble:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
+      } else {
+        __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 721f74e..e73fd7d 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -234,9 +234,20 @@
   }
 }
 
-inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
-      << constant->DebugName();
+inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
+  if (instr->IsVecReplicateScalar()) {
+    if (constant->IsLongConstant()) {
+      return false;
+    } else if (constant->IsFloatConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
+    } else if (constant->IsDoubleConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
+    }
+    return IsUint<8>(value);
+  }
 
   // For single uses we let VIXL handle the constant generation since it will
   // use registers that are not managed by the register allocator (wip0, wip1).
@@ -249,8 +260,6 @@
     return true;
   }
 
-  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
-
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
     return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
@@ -276,7 +285,7 @@
 inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
                                                         HInstruction* instr) {
   if (constant->IsConstant()
-      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+      && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant->AsConstant());
   }
 
diff --git a/test/655-checker-simd-arm-opt/expected.txt b/test/655-checker-simd-arm-opt/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/655-checker-simd-arm-opt/info.txt b/test/655-checker-simd-arm-opt/info.txt
new file mode 100644
index 0000000..198cc95
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/info.txt
@@ -0,0 +1 @@
+Checker test for arm and arm64 simd optimizations.
diff --git a/test/655-checker-simd-arm-opt/src/Main.java b/test/655-checker-simd-arm-opt/src/Main.java
new file mode 100644
index 0000000..7b61dd7
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Checker test for arm and arm64 simd optimizations.
+ */
+public class Main {
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.encodableConstants(byte[], short[], char[], int[], long[], float[], double[]) disassembly (after)
+  /// CHECK-DAG: <<C1:i\d+>>   IntConstant 1
+  /// CHECK-DAG: <<C2:i\d+>>   IntConstant 2
+  /// CHECK-DAG: <<C3:i\d+>>   IntConstant 3
+  /// CHECK-DAG: <<C4:i\d+>>   IntConstant 4
+  /// CHECK-DAG: <<L5:j\d+>>   LongConstant 5
+  /// CHECK-DAG: <<F2:f\d+>>   FloatConstant 2
+  /// CHECK-DAG: <<D20:d\d+>>  DoubleConstant 20
+  //
+  /// CHECK-DAG:               VecReplicateScalar [<<C1>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.16b, #0x1
+  /// CHECK-DAG:               VecReplicateScalar [<<C2>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x2, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C3>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x3, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C4>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.4s, #0x4, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<L5>>]
+  /// CHECK-DAG:               dup v{{[0-9]+}}.2d, x{{[0-9]+}}
+  /// CHECK-DAG:               VecReplicateScalar [<<F2>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.4s, #0x0
+  /// CHECK-DAG:               VecReplicateScalar [<<D20>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.2d, #0x34
+  private static void encodableConstants(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      b[i] += 1;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      s[i] += 2;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      c[i] += 3;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      a[i] += 4;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      l[i] += 5;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      f[i] += 2.0f;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      d[i] += 20.0;
+    }
+  }
+
+  private static int sumArray(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    int sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += b[i] + s[i] + c[i] + a[i] + l[i] + f[i] + d[i];
+    }
+    return sum;
+  }
+
+  public static final int ARRAY_SIZE = 100;
+
+  public static void main(String[] args) {
+    byte[] b = new byte[ARRAY_SIZE];
+    short[] s = new short[ARRAY_SIZE];
+    char[] c = new char[ARRAY_SIZE];
+    int[] a = new int[ARRAY_SIZE];
+    long[] l = new long[ARRAY_SIZE];
+    float[] f = new float[ARRAY_SIZE];
+    double[] d = new double[ARRAY_SIZE];
+
+    encodableConstants(b, s, c, a, l, f, d);
+    expectEquals(3700, sumArray(b, s, c, a, l, f, d));
+
+    System.out.println("passed");
+  }
+}