Revert^2 "RFC: ART: ARM64: Support SDOT/UDOT instructions."
This reverts commit f65c7be257b9ec7453b9dc7ffd73e8901403e199.
Pixel 3 BoardConfig kryo385 issue is fixed now and no longer blocks
this patch.
NOTE: The feature is TURNED OFF by default.
Test: 684-checker-simd-dotprod.
Test: test-art-target, test-art-host.
Change-Id: Ib0fb8603b32004984ce8114447d56514d798111d
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 5a18c1f..df95c88 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm64.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
@@ -37,6 +38,15 @@
#define __ GetVIXLAssembler()->
+// Build-time switch for Armv8.4-a dot product instructions.
+// TODO: Enable dot product when there is a device to test it on.
+static constexpr bool kArm64EmitDotProdInstructions = false;
+
+// Returns whether dot product instructions should be emitted.
+static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
+ return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+}
+
void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
HInstruction* input = instruction->InputAt(0);
@@ -1285,8 +1295,9 @@
locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
- // For Int8 and Uint8 we need a temp register.
- if (DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) {
+ // For Int8 and Uint8 general case we need a temp register.
+ if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
+ !ShouldEmitDotProductInstructions(codegen_)) {
locations->AddTemp(Location::RequiresFpuRegister());
}
}
@@ -1308,25 +1319,32 @@
switch (inputs_data_size) {
case 1u: {
DCHECK_EQ(16u, a->GetVectorLength());
- VRegister tmp = VRegisterFrom(locations->GetTemp(0));
if (instruction->IsZeroExtending()) {
- // TODO: Use Armv8.4-A UDOT instruction when it is available.
- __ Umull(tmp.V8H(), left.V8B(), right.V8B());
- __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
- __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Udot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Umull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
- __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
- __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
- __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
} else {
- // TODO: Use Armv8.4-A SDOT instruction when it is available.
- __ Smull(tmp.V8H(), left.V8B(), right.V8B());
- __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
- __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Sdot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Smull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
- __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
- __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
- __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
}
break;
}