ARM64: Enable SDOT/UDOT instructions emission.
Enables SDOT/UDOT instructions emission for those arm64
targets which support DOTPROD feature. Currently only
vector VecDotProd instruction could emit those.
Test: test-art-target.
Test: test-art-target --instruction-set-features runtime.
Test: 684-checker-simd-dotprod.
Change-Id: I57a16e340a42879ff19a3b2439ea11525dbeaccc
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 714d984..2a4c785 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -40,13 +40,9 @@
#define __ GetVIXLAssembler()->
-// Build-time switch for Armv8.4-a dot product instructions.
-// TODO: Enable dot product when there is a device to test it on.
-static constexpr bool kArm64EmitDotProdInstructions = false;
-
// Returns whether dot product instructions should be emitted.
static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
- return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+ return codegen_->GetInstructionSetFeatures().HasDotProd();
}
void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index d6fa0f6..1761dfc 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -40,13 +40,9 @@
#define __ GetVIXLAssembler()->
-// Build-time switch for Armv8.4-a dot product instructions.
-// TODO: Enable dot product when there is a device to test it on.
-static constexpr bool kArm64EmitDotProdInstructions = false;
-
// Returns whether dot product instructions should be emitted.
static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
- return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+ return codegen_->GetInstructionSetFeatures().HasDotProd();
}
void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
diff --git a/test/684-checker-simd-dotprod/src/other/TestByte.java b/test/684-checker-simd-dotprod/src/other/TestByte.java
index 9acfc59..608d021 100644
--- a/test/684-checker-simd-dotprod/src/other/TestByte.java
+++ b/test/684-checker-simd-dotprod/src/other/TestByte.java
@@ -34,7 +34,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -48,6 +48,16 @@
//
/// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none
/// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none
+
+
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdSimple(byte[], byte[]) disassembly (after)
+ /// CHECK: VecDotProd
+ /// CHECK-IF: hasIsaFeature("dotprod")
+ /// CHECK-NEXT: sdot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
+ /// CHECK-ELSE:
+ /// CHECK-NOT: sdot
+ /// CHECK-NOT: udot
+ /// CHECK-FI:
public static final int testDotProdSimple(byte[] a, byte[] b) {
int s = 1;
for (int i = 0; i < b.length; i++) {
@@ -72,7 +82,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -109,7 +119,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -123,6 +133,15 @@
//
/// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none
/// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none
+
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) disassembly (after)
+ /// CHECK: VecDotProd
+ /// CHECK-IF: hasIsaFeature("dotprod")
+ /// CHECK-NEXT: udot v{{\d+}}.4s, v{{\d+}}.16b, v{{\d+}}.16b
+ /// CHECK-ELSE:
+ /// CHECK-NOT: sdot
+ /// CHECK-NOT: udot
+ /// CHECK-FI:
public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) {
int s = 1;
for (int i = 0; i < b.length; i++) {
@@ -147,7 +166,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -188,7 +207,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -229,7 +248,7 @@
/// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none
/// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none
@@ -255,7 +274,7 @@
return s - 1;
}
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after)
/// CHECK-DAG: VecDotProd type:Int8
public static final int testDotProdSignedWidening(byte[] a, byte[] b) {
int s = 1;
@@ -266,7 +285,7 @@
return s - 1;
}
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after)
/// CHECK-DAG: VecDotProd type:Int8
public static final int testDotProdParamSigned(int x, byte[] b) {
int s = 1;
@@ -277,7 +296,7 @@
return s - 1;
}
- /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after)
+ /// CHECK-START-ARM64: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after)
/// CHECK-DAG: VecDotProd type:Uint8
public static final int testDotProdParamUnsigned(int x, byte[] b) {
int s = 1;