Revert^2 "RFC: ART: ARM64: Support SDOT/UDOT instructions."

This reverts commit f65c7be257b9ec7453b9dc7ffd73e8901403e199.

Pixel 3 BoardConfig kryo385 issue is fixed now and no longer blocks
this patch.

NOTE: The feature is TURNED OFF by default.

Test: 684-checker-simd-dotprod.
Test: test-art-target, test-art-host.
Change-Id: Ib0fb8603b32004984ce8114447d56514d798111d
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 5a18c1f..df95c88 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "code_generator_arm64.h"
 
+#include "arch/arm64/instruction_set_features_arm64.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
 
@@ -37,6 +38,15 @@
 
 #define __ GetVIXLAssembler()->
 
+// Build-time switch for Armv8.4-a dot product instructions.
+// TODO: Enable dot product when there is a device to test it on.
+static constexpr bool kArm64EmitDotProdInstructions = false;
+
+// Returns whether dot product instructions should be emitted.
+static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
+  return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+}
+
 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   HInstruction* input = instruction->InputAt(0);
@@ -1285,8 +1295,9 @@
   locations->SetInAt(2, Location::RequiresFpuRegister());
   locations->SetOut(Location::SameAsFirstInput());
 
-  // For Int8 and Uint8 we need a temp register.
-  if (DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) {
+  // For Int8 and Uint8 general case we need a temp register.
+  if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
+      !ShouldEmitDotProductInstructions(codegen_)) {
     locations->AddTemp(Location::RequiresFpuRegister());
   }
 }
@@ -1308,25 +1319,32 @@
   switch (inputs_data_size) {
     case 1u: {
       DCHECK_EQ(16u, a->GetVectorLength());
-      VRegister tmp = VRegisterFrom(locations->GetTemp(0));
       if (instruction->IsZeroExtending()) {
-        // TODO: Use Armv8.4-A UDOT instruction when it is available.
-        __ Umull(tmp.V8H(), left.V8B(), right.V8B());
-        __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
-        __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+        if (ShouldEmitDotProductInstructions(codegen_)) {
+          __ Udot(acc.V4S(), left.V16B(), right.V16B());
+        } else {
+          VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+          __ Umull(tmp.V8H(), left.V8B(), right.V8B());
+          __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+          __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
 
-        __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
-        __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
-        __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+          __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
+          __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+          __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+        }
       } else {
-        // TODO: Use Armv8.4-A SDOT instruction when it is available.
-        __ Smull(tmp.V8H(), left.V8B(), right.V8B());
-        __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
-        __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+        if (ShouldEmitDotProductInstructions(codegen_)) {
+          __ Sdot(acc.V4S(), left.V16B(), right.V16B());
+        } else {
+          VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+          __ Smull(tmp.V8H(), left.V8B(), right.V8B());
+          __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+          __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
 
-        __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
-        __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
-        __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+          __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
+          __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+          __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+        }
       }
       break;
     }