Merge "ARM64: Encode constants when it is possible."
am: db1a013335
Change-Id: Ibf44b31accb4c38f7737ca7ce5c052c71d2257b0
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index a41adca..f422b9f 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,8 @@
namespace art {
namespace arm64 {
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
using helpers::VRegisterFrom;
using helpers::HeapOperand;
@@ -34,6 +36,7 @@
void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -41,13 +44,19 @@
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ if (input->IsConstant() &&
+ Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
+ locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ }
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -57,33 +66,58 @@
void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
+ Location src_loc = locations->InAt(0);
VRegister dst = VRegisterFrom(locations->Out());
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
+ } else {
+ __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+ }
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
+ } else {
+ __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+ }
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
+ } else {
+ __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+ }
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0)));
+ if (src_loc.IsConstant()) {
+ __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
+ } else {
+ __ Dup(dst.V2D(), XRegisterFrom(src_loc));
+ }
break;
case Primitive::kPrimFloat:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0);
+ if (src_loc.IsConstant()) {
+ __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
+ } else {
+ __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
+ }
break;
case Primitive::kPrimDouble:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0);
+ if (src_loc.IsConstant()) {
+ __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
+ } else {
+ __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
+ }
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 721f74e..e73fd7d 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -234,9 +234,20 @@
}
}
-inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
- DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
- << constant->DebugName();
+inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+ // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
+ if (instr->IsVecReplicateScalar()) {
+ if (constant->IsLongConstant()) {
+ return false;
+ } else if (constant->IsFloatConstant()) {
+ return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
+ } else if (constant->IsDoubleConstant()) {
+ return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
+ }
+ return IsUint<8>(value);
+ }
// For single uses we let VIXL handle the constant generation since it will
// use registers that are not managed by the register allocator (wip0, wip1).
@@ -249,8 +260,6 @@
return true;
}
- int64_t value = CodeGenerator::GetInt64ValueOf(constant);
-
if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
// Uses logical operations.
return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
@@ -276,7 +285,7 @@
inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
HInstruction* instr) {
if (constant->IsConstant()
- && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+ && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
return Location::ConstantLocation(constant->AsConstant());
}
diff --git a/test/655-checker-simd-arm-opt/expected.txt b/test/655-checker-simd-arm-opt/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/655-checker-simd-arm-opt/info.txt b/test/655-checker-simd-arm-opt/info.txt
new file mode 100644
index 0000000..198cc95
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/info.txt
@@ -0,0 +1 @@
+Checker test for arm and arm64 simd optimizations.
diff --git a/test/655-checker-simd-arm-opt/src/Main.java b/test/655-checker-simd-arm-opt/src/Main.java
new file mode 100644
index 0000000..7b61dd7
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Checker test for arm and arm64 simd optimizations.
+ */
+public class Main {
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.encodableConstants(byte[], short[], char[], int[], long[], float[], double[]) disassembly (after)
+ /// CHECK-DAG: <<C1:i\d+>> IntConstant 1
+ /// CHECK-DAG: <<C2:i\d+>> IntConstant 2
+ /// CHECK-DAG: <<C3:i\d+>> IntConstant 3
+ /// CHECK-DAG: <<C4:i\d+>> IntConstant 4
+ /// CHECK-DAG: <<L5:j\d+>> LongConstant 5
+ /// CHECK-DAG: <<F2:f\d+>> FloatConstant 2
+ /// CHECK-DAG: <<D20:d\d+>> DoubleConstant 20
+ //
+ /// CHECK-DAG: VecReplicateScalar [<<C1>>]
+ /// CHECK-DAG: movi v{{[0-9]+}}.16b, #0x1
+ /// CHECK-DAG: VecReplicateScalar [<<C2>>]
+ /// CHECK-DAG: movi v{{[0-9]+}}.8h, #0x2, lsl #0
+ /// CHECK-DAG: VecReplicateScalar [<<C3>>]
+ /// CHECK-DAG: movi v{{[0-9]+}}.8h, #0x3, lsl #0
+ /// CHECK-DAG: VecReplicateScalar [<<C4>>]
+ /// CHECK-DAG: movi v{{[0-9]+}}.4s, #0x4, lsl #0
+ /// CHECK-DAG: VecReplicateScalar [<<L5>>]
+ /// CHECK-DAG: dup v{{[0-9]+}}.2d, x{{[0-9]+}}
+ /// CHECK-DAG: VecReplicateScalar [<<F2>>]
+ /// CHECK-DAG: fmov v{{[0-9]+}}.4s, #0x0
+ /// CHECK-DAG: VecReplicateScalar [<<D20>>]
+ /// CHECK-DAG: fmov v{{[0-9]+}}.2d, #0x34
+ private static void encodableConstants(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ b[i] += 1;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ s[i] += 2;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ c[i] += 3;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ a[i] += 4;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ l[i] += 5;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ f[i] += 2.0f;
+ }
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ d[i] += 20.0;
+ }
+ }
+
+ private static int sumArray(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+ int sum = 0;
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ sum += b[i] + s[i] + c[i] + a[i] + l[i] + f[i] + d[i];
+ }
+ return sum;
+ }
+
+ public static final int ARRAY_SIZE = 100;
+
+ public static void main(String[] args) {
+ byte[] b = new byte[ARRAY_SIZE];
+ short[] s = new short[ARRAY_SIZE];
+ char[] c = new char[ARRAY_SIZE];
+ int[] a = new int[ARRAY_SIZE];
+ long[] l = new long[ARRAY_SIZE];
+ float[] f = new float[ARRAY_SIZE];
+ double[] d = new double[ARRAY_SIZE];
+
+ encodableConstants(b, s, c, a, l, f, d);
+ expectEquals(3700, sumArray(b, s, c, a, l, f, d));
+
+ System.out.println("passed");
+ }
+}