RFC: ARM64: Split arm64 codegen into scalar and vector (SVE and NEON).

This is a first CL in the series of introducing arm64 SVE support
in ART. The patch splits the codegen functionality into scalar and
vector ones and for the latter introduces NEON and SVE
implementations. SVE one currently is an exact copy of NEON one -
for the sake of testing and an easy diff when the next CL comes
with an actual SVE instructions support.

The patch effectively doesn't change any behavior; NEON mode is
used for vector instructions, tests pass.

Test: test-art-target.
Change-Id: I5f7f2c8218330998e5a733a56f42473526cd58e6
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 84bf491..ff2be47 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -223,6 +223,9 @@
   virtual const Assembler& GetAssembler() const = 0;
   virtual size_t GetWordSize() const = 0;
 
+  // Returns whether the target supports predicated SIMD instructions.
+  virtual bool SupportsPredicatedSIMD() const { return false; }
+
   // Get FP register width in bytes for spilling/restoring in the slow paths.
   //
   // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 001fcb1..09c801b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -75,7 +75,6 @@
 using helpers::OutputCPURegister;
 using helpers::OutputFPRegister;
 using helpers::OutputRegister;
-using helpers::QRegisterFrom;
 using helpers::RegisterFrom;
 using helpers::StackOperandFrom;
 using helpers::VIXLRegCodeFromART;
@@ -177,6 +176,7 @@
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
   const unsigned v_reg_size_in_bits = codegen->GetSlowPathFPWidth() * 8;
+  DCHECK_LE(codegen->GetSIMDRegisterWidth(), kQRegSizeInBytes);
   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size_in_bits, fp_spills);
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
@@ -426,10 +426,10 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
+    SaveLiveRegisters(codegen, locations);  // Only saves live vector regs for SIMD.
     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
+    RestoreLiveRegisters(codegen, locations);  // Only restores live vector regs for SIMD.
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -883,8 +883,10 @@
                     stats),
       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      location_builder_(graph, this),
-      instruction_visitor_(graph, this),
+      location_builder_neon_(graph, this),
+      instruction_visitor_neon_(graph, this),
+      location_builder_sve_(graph, this),
+      instruction_visitor_sve_(graph, this),
       move_resolver_(graph->GetAllocator(), this),
       assembler_(graph->GetAllocator(),
                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
@@ -909,6 +911,19 @@
                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
   AddAllocatedRegister(LocationFrom(lr));
+
+  bool use_sve = ShouldUseSVE();
+  if (use_sve) {
+    location_builder_ = &location_builder_sve_;
+    instruction_visitor_ = &instruction_visitor_sve_;
+  } else {
+    location_builder_ = &location_builder_neon_;
+    instruction_visitor_ = &instruction_visitor_neon_;
+  }
+}
+
+bool CodeGeneratorARM64::ShouldUseSVE() const {
+  return kArm64AllowSVE && GetInstructionSetFeatures().HasSVE();
 }
 
 #define __ GetVIXLAssembler()->
@@ -1038,9 +1053,9 @@
     scratch = LocationFrom(vixl_temps_.AcquireX());
   } else {
     DCHECK_EQ(kind, Location::kFpuRegister);
-    scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
-        ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
-        : vixl_temps_.AcquireD());
+    scratch = codegen_->GetGraph()->HasSIMD()
+        ? codegen_->GetInstructionCodeGeneratorArm64()->AllocateSIMDScratchLocation(&vixl_temps_)
+        : LocationFrom(vixl_temps_.AcquireD());
   }
   AddScratchLocation(scratch);
   return scratch;
@@ -1051,7 +1066,11 @@
     vixl_temps_.Release(XRegisterFrom(loc));
   } else {
     DCHECK(loc.IsFpuRegister());
-    vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
+    if (codegen_->GetGraph()->HasSIMD()) {
+      codegen_->GetInstructionCodeGeneratorArm64()->FreeSIMDScratchLocation(loc, &vixl_temps_);
+    } else {
+      vixl_temps_.Release(DRegisterFrom(loc));
+    }
   }
   RemoveScratchLocation(loc);
 }
@@ -1434,7 +1453,7 @@
       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
       __ Ldr(dst, StackOperandFrom(source));
     } else if (source.IsSIMDStackSlot()) {
-      __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+      GetInstructionCodeGeneratorArm64()->LoadSIMDRegFromStack(destination, source);
     } else if (source.IsConstant()) {
       DCHECK(CoherentConstantAndType(source, dst_type));
       MoveConstant(dst, source.GetConstant());
@@ -1458,30 +1477,14 @@
       } else {
         DCHECK(destination.IsFpuRegister());
         if (GetGraph()->HasSIMD()) {
-          __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+          GetInstructionCodeGeneratorArm64()->MoveSIMDRegToSIMDReg(destination, source);
         } else {
           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
         }
       }
     }
   } else if (destination.IsSIMDStackSlot()) {
-    if (source.IsFpuRegister()) {
-      __ Str(QRegisterFrom(source), StackOperandFrom(destination));
-    } else {
-      DCHECK(source.IsSIMDStackSlot());
-      UseScratchRegisterScope temps(GetVIXLAssembler());
-      if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
-        Register temp = temps.AcquireX();
-        __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
-        __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
-        __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
-        __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
-      } else {
-        VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
-        __ Ldr(temp, StackOperandFrom(source));
-        __ Str(temp, StackOperandFrom(destination));
-      }
-    }
+    GetInstructionCodeGeneratorArm64()->MoveToSIMDStackSlot(destination, source);
   } else {  // The destination is not a register. It must be a stack slot.
     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
     if (source.IsRegister() || source.IsFpuRegister()) {
@@ -6372,6 +6375,39 @@
   }
 }
 
+MemOperand InstructionCodeGeneratorARM64::VecNeonAddress(
+    HVecMemoryOperation* instruction,
+    UseScratchRegisterScope* temps_scope,
+    size_t size,
+    bool is_string_char_at,
+    /*out*/ Register* scratch) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register base = InputRegisterAt(instruction, 0);
+
+  if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
+    DCHECK(!is_string_char_at);
+    return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
+  }
+
+  Location index = locations->InAt(1);
+  uint32_t offset = is_string_char_at
+      ? mirror::String::ValueOffset().Uint32Value()
+      : mirror::Array::DataOffset(size).Uint32Value();
+  size_t shift = ComponentSizeShiftWidth(size);
+
+  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
+  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
+
+  if (index.IsConstant()) {
+    offset += Int64FromLocation(index) << shift;
+    return HeapOperand(base, offset);
+  } else {
+    *scratch = temps_scope->AcquireSameSizeAs(base);
+    __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
+    return HeapOperand(*scratch, offset);
+  }
+}
+
 #undef __
 #undef QUICK_ENTRY_POINT
 
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 47a0194..627cf72 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -53,6 +53,9 @@
 static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
 static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
 
+// SVE is currently not enabled.
+static constexpr bool kArm64AllowSVE = false;
+
 static const vixl::aarch64::Register kParameterCoreRegisters[] = {
   vixl::aarch64::x1,
   vixl::aarch64::x2,
@@ -262,7 +265,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name, super) \
   void Visit##name(H##name* instr) override;
 
-  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
@@ -276,7 +279,15 @@
   Arm64Assembler* GetAssembler() const { return assembler_; }
   vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
 
- private:
+  // SIMD helpers.
+  virtual Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) = 0;
+  virtual void FreeSIMDScratchLocation(Location loc,
+                                       vixl::aarch64::UseScratchRegisterScope* scope)  = 0;
+  virtual void LoadSIMDRegFromStack(Location destination, Location source) = 0;
+  virtual void MoveSIMDRegToSIMDReg(Location destination, Location source) = 0;
+  virtual void MoveToSIMDStackSlot(Location destination, Location source) = 0;
+
+ protected:
   void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
                                         vixl::aarch64::Register class_reg);
   void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
@@ -340,7 +351,11 @@
   void GenerateIntRemForPower2Denom(HRem *instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
-  vixl::aarch64::MemOperand VecAddress(
+  // Helper to set up locations for vector memory operations. Returns the memory operand and,
+  // if used, sets the output parameter scratch to a temporary register used in this operand,
+  // so that the client can release it right after the memory operand use.
+  // Neon version.
+  vixl::aarch64::MemOperand VecNeonAddress(
       HVecMemoryOperation* instruction,
       // This function may acquire a scratch register.
       vixl::aarch64::UseScratchRegisterScope* temps_scope,
@@ -362,7 +377,7 @@
 #define DECLARE_VISIT_INSTRUCTION(name, super) \
   void Visit##name(H##name* instr) override;
 
-  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
   FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION)
 
@@ -373,7 +388,7 @@
                << " (id " << instruction->GetId() << ")";
   }
 
- private:
+ protected:
   void HandleBinaryOp(HBinaryOperation* instr);
   void HandleFieldSet(HInstruction* instruction);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
@@ -387,6 +402,72 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
 
+class InstructionCodeGeneratorARM64Neon : public InstructionCodeGeneratorARM64 {
+ public:
+  InstructionCodeGeneratorARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
+      InstructionCodeGeneratorARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
+  void FreeSIMDScratchLocation(Location loc,
+                               vixl::aarch64::UseScratchRegisterScope* scope) override;
+  void LoadSIMDRegFromStack(Location destination, Location source) override;
+  void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
+  void MoveToSIMDStackSlot(Location destination, Location source) override;
+};
+
+class LocationsBuilderARM64Neon : public LocationsBuilderARM64 {
+ public:
+  LocationsBuilderARM64Neon(HGraph* graph, CodeGeneratorARM64* codegen) :
+      LocationsBuilderARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
+class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
+ public:
+  InstructionCodeGeneratorARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
+      InstructionCodeGeneratorARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  Location AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope* scope) override;
+  void FreeSIMDScratchLocation(Location loc,
+                               vixl::aarch64::UseScratchRegisterScope* scope) override;
+  void LoadSIMDRegFromStack(Location destination, Location source) override;
+  void MoveSIMDRegToSIMDReg(Location destination, Location source) override;
+  void MoveToSIMDStackSlot(Location destination, Location source) override;
+};
+
+class LocationsBuilderARM64Sve : public LocationsBuilderARM64 {
+ public:
+  LocationsBuilderARM64Sve(HGraph* graph, CodeGeneratorARM64* codegen) :
+      LocationsBuilderARM64(graph, codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) \
+  void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
 class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
  public:
   ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
@@ -435,6 +516,8 @@
     return kArm64WordSize;
   }
 
+  bool SupportsPredicatedSIMD() const override { return ShouldUseSVE(); }
+
   size_t GetSlowPathFPWidth() const override {
     return GetGraph()->HasSIMD()
         ? GetSIMDRegisterWidth()
@@ -455,8 +538,11 @@
     return block_entry_label->GetLocation();
   }
 
-  HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
-  HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+  HGraphVisitor* GetLocationBuilder() override { return location_builder_; }
+  InstructionCodeGeneratorARM64* GetInstructionCodeGeneratorArm64() {
+    return instruction_visitor_;
+  }
+  HGraphVisitor* GetInstructionVisitor() override { return GetInstructionCodeGeneratorArm64(); }
   Arm64Assembler* GetAssembler() override { return &assembler_; }
   const Arm64Assembler& GetAssembler() const override { return assembler_; }
   vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
@@ -899,14 +985,22 @@
   static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
                                           ArenaVector<linker::LinkerPatch>* linker_patches);
 
+  // Returns whether SVE features are supported and should be used.
+  bool ShouldUseSVE() const;
+
   // Labels for each block that will be compiled.
   // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
   ArenaDeque<vixl::aarch64::Label> block_labels_;  // Indexed by block id.
   vixl::aarch64::Label frame_entry_label_;
   ArenaVector<std::unique_ptr<JumpTableARM64>> jump_tables_;
 
-  LocationsBuilderARM64 location_builder_;
-  InstructionCodeGeneratorARM64 instruction_visitor_;
+  LocationsBuilderARM64Neon location_builder_neon_;
+  InstructionCodeGeneratorARM64Neon instruction_visitor_neon_;
+  LocationsBuilderARM64Sve location_builder_sve_;
+  InstructionCodeGeneratorARM64Sve instruction_visitor_sve_;
+
+  LocationsBuilderARM64* location_builder_;
+  InstructionCodeGeneratorARM64* instruction_visitor_;
   ParallelMoveResolverARM64 move_resolver_;
   Arm64Assembler assembler_;
 
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
similarity index 87%
rename from compiler/optimizing/code_generator_vector_arm64.cc
rename to compiler/optimizing/code_generator_vector_arm64_neon.cc
index df95c88..78720c3 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -31,9 +31,11 @@
 using helpers::HeapOperand;
 using helpers::InputRegisterAt;
 using helpers::Int64FromLocation;
+using helpers::LocationFrom;
 using helpers::OutputRegister;
+using helpers::QRegisterFrom;
+using helpers::StackOperandFrom;
 using helpers::VRegisterFrom;
-using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 
 #define __ GetVIXLAssembler()->
@@ -47,7 +49,7 @@
   return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
 }
 
-void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void LocationsBuilderARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
@@ -78,7 +80,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location src_loc = locations->InAt(0);
   VRegister dst = VRegisterFrom(locations->Out());
@@ -140,7 +142,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
@@ -164,7 +166,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecExtractScalar(HVecExtractScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   switch (instruction->GetPackedType()) {
@@ -215,11 +217,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+void LocationsBuilderARM64Neon::VisitVecReduce(HVecReduce* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecReduce(HVecReduce* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = DRegisterFrom(locations->Out());
@@ -255,11 +257,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+void LocationsBuilderARM64Neon::VisitVecCnv(HVecCnv* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecCnv(HVecCnv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -273,11 +275,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+void LocationsBuilderARM64Neon::VisitVecNeg(HVecNeg* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecNeg(HVecNeg* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -314,11 +316,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAbs(HVecAbs* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -353,11 +355,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+void LocationsBuilderARM64Neon::VisitVecNot(HVecNot* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecNot(HVecNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -404,11 +406,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAdd(HVecAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAdd(HVecAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -446,11 +448,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -478,11 +480,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -518,11 +520,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSub(HVecSub* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -560,11 +562,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSaturationSub(HVecSaturationSub* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -592,11 +594,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMul(HVecMul* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMul(HVecMul* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -630,11 +632,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+void LocationsBuilderARM64Neon::VisitVecDiv(HVecDiv* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecDiv(HVecDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -654,11 +656,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMin(HVecMin* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMin(HVecMin* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -702,11 +704,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMax(HVecMax* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMax(HVecMax* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -750,12 +752,12 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAnd(HVecAnd* instruction) {
   // TODO: Allow constants supported by BIC (vector, immediate).
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAnd(HVecAnd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -778,20 +780,20 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void LocationsBuilderARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecAndNot(HVecAndNot* instruction) {
   // TODO: Use BIC (vector, register).
   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
 }
 
-void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecOr(HVecOr* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecOr(HVecOr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -814,11 +816,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+void LocationsBuilderARM64Neon::VisitVecXor(HVecXor* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecXor(HVecXor* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -861,11 +863,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+void LocationsBuilderARM64Neon::VisitVecShl(HVecShl* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecShl(HVecShl* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -895,11 +897,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecShr(HVecShr* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecShr(HVecShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -929,11 +931,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+void LocationsBuilderARM64Neon::VisitVecUShr(HVecUShr* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecUShr(HVecUShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -963,7 +965,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
 
   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
@@ -995,7 +997,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSetScalars(HVecSetScalars* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister dst = VRegisterFrom(locations->Out());
 
@@ -1057,14 +1059,14 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void LocationsBuilderARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
 }
 
 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
 // However vector MultiplyAccumulate instruction is not affected.
-void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister acc = VRegisterFrom(locations->InAt(0));
   VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1105,7 +1107,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void LocationsBuilderARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   // Some conversions require temporary registers.
   LocationSummary* locations = instruction->GetLocations();
@@ -1147,7 +1149,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister acc = VRegisterFrom(locations->InAt(0));
   VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1287,7 +1289,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void LocationsBuilderARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
   locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1302,7 +1304,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecDotProd(HVecDotProd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   DCHECK(locations->InAt(0).Equals(locations->Out()));
   VRegister acc = VRegisterFrom(locations->InAt(0));
@@ -1392,47 +1394,11 @@
   }
 }
 
-// Helper to set up locations for vector memory operations. Returns the memory operand and,
-// if used, sets the output parameter scratch to a temporary register used in this operand,
-// so that the client can release it right after the memory operand use.
-MemOperand InstructionCodeGeneratorARM64::VecAddress(
-    HVecMemoryOperation* instruction,
-    UseScratchRegisterScope* temps_scope,
-    size_t size,
-    bool is_string_char_at,
-    /*out*/ Register* scratch) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register base = InputRegisterAt(instruction, 0);
-
-  if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
-    DCHECK(!is_string_char_at);
-    return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
-  }
-
-  Location index = locations->InAt(1);
-  uint32_t offset = is_string_char_at
-      ? mirror::String::ValueOffset().Uint32Value()
-      : mirror::Array::DataOffset(size).Uint32Value();
-  size_t shift = ComponentSizeShiftWidth(size);
-
-  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
-  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
-
-  if (index.IsConstant()) {
-    offset += Int64FromLocation(index) << shift;
-    return HeapOperand(base, offset);
-  } else {
-    *scratch = temps_scope->AcquireSameSizeAs(base);
-    __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
-    return HeapOperand(*scratch, offset);
-  }
-}
-
-void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+void LocationsBuilderARM64Neon::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecLoad(HVecLoad* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   size_t size = DataType::Size(instruction->GetPackedType());
   VRegister reg = VRegisterFrom(locations->Out());
@@ -1456,7 +1422,7 @@
         temps.Release(length);  // no longer needed
         // Zero extend 8 compressed bytes into 8 chars.
         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
-               VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+               VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
         __ Uxtl(reg.V8H(), reg.V8B());
         __ B(&done);
         if (scratch.IsValid()) {
@@ -1464,7 +1430,8 @@
         }
         // Load 8 direct uncompressed chars.
         __ Bind(&uncompressed_load);
-        __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+        __ Ldr(reg,
+               VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
         __ Bind(&done);
         return;
       }
@@ -1478,7 +1445,8 @@
     case DataType::Type::kFloat64:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
+      __ Ldr(reg,
+             VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1486,11 +1454,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+void LocationsBuilderARM64Neon::VisitVecStore(HVecStore* instruction) {
   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecStore(HVecStore* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   size_t size = DataType::Size(instruction->GetPackedType());
   VRegister reg = VRegisterFrom(locations->InAt(2));
@@ -1509,7 +1477,8 @@
     case DataType::Type::kFloat64:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
+      __ Str(reg,
+             VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1517,6 +1486,54 @@
   }
 }
 
+Location InstructionCodeGeneratorARM64Neon::AllocateSIMDScratchLocation(
+    vixl::aarch64::UseScratchRegisterScope* scope) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
+}
+
+void InstructionCodeGeneratorARM64Neon::FreeSIMDScratchLocation(Location loc,
+    vixl::aarch64::UseScratchRegisterScope* scope) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  scope->Release(QRegisterFrom(loc));
+}
+
+void InstructionCodeGeneratorARM64Neon::LoadSIMDRegFromStack(Location destination,
+                                                             Location source) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Neon::MoveSIMDRegToSIMDReg(Location destination,
+                                                             Location source) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Neon::MoveToSIMDStackSlot(Location destination,
+                                                            Location source) {
+  DCHECK(destination.IsSIMDStackSlot());
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+
+  if (source.IsFpuRegister()) {
+    __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+  } else {
+    DCHECK(source.IsSIMDStackSlot());
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
+      Register temp = temps.AcquireX();
+      __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
+      __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
+      __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
+      __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+    } else {
+      VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
+      __ Ldr(temp, StackOperandFrom(source));
+      __ Str(temp, StackOperandFrom(destination));
+    }
+  }
+}
+
 #undef __
 
 }  // namespace arm64
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
similarity index 87%
copy from compiler/optimizing/code_generator_vector_arm64.cc
copy to compiler/optimizing/code_generator_vector_arm64_sve.cc
index df95c88..5460ff2 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2020 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,9 +31,11 @@
 using helpers::HeapOperand;
 using helpers::InputRegisterAt;
 using helpers::Int64FromLocation;
+using helpers::LocationFrom;
 using helpers::OutputRegister;
+using helpers::QRegisterFrom;
+using helpers::StackOperandFrom;
 using helpers::VRegisterFrom;
-using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 
 #define __ GetVIXLAssembler()->
@@ -47,7 +49,7 @@
   return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
 }
 
-void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
@@ -78,7 +80,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location src_loc = locations->InAt(0);
   VRegister dst = VRegisterFrom(locations->Out());
@@ -140,7 +142,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
@@ -164,7 +166,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   switch (instruction->GetPackedType()) {
@@ -215,11 +217,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = DRegisterFrom(locations->Out());
@@ -255,11 +257,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -273,11 +275,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -314,11 +316,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -353,11 +355,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister src = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -404,11 +406,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -446,11 +448,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -478,11 +480,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -518,11 +520,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -560,11 +562,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -592,11 +594,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -630,11 +632,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -654,11 +656,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
+void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -702,11 +704,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
+void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -750,12 +752,12 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
   // TODO: Allow constants supported by BIC (vector, immediate).
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -778,20 +780,20 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
 }
 
-void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
   // TODO: Use BIC (vector, register).
   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
 }
 
-void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -814,11 +816,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister rhs = VRegisterFrom(locations->InAt(1));
@@ -861,11 +863,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -895,11 +897,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -929,11 +931,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister lhs = VRegisterFrom(locations->InAt(0));
   VRegister dst = VRegisterFrom(locations->Out());
@@ -963,7 +965,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
 
   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
@@ -995,7 +997,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister dst = VRegisterFrom(locations->Out());
 
@@ -1057,14 +1059,14 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
 }
 
 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
 // However vector MultiplyAccumulate instruction is not affected.
-void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister acc = VRegisterFrom(locations->InAt(0));
   VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1105,7 +1107,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
   // Some conversions require temporary registers.
   LocationSummary* locations = instruction->GetLocations();
@@ -1147,7 +1149,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   VRegister acc = VRegisterFrom(locations->InAt(0));
   VRegister left = VRegisterFrom(locations->InAt(1));
@@ -1287,7 +1289,7 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
   locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -1302,7 +1304,7 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   DCHECK(locations->InAt(0).Equals(locations->Out()));
   VRegister acc = VRegisterFrom(locations->InAt(0));
@@ -1392,47 +1394,11 @@
   }
 }
 
-// Helper to set up locations for vector memory operations. Returns the memory operand and,
-// if used, sets the output parameter scratch to a temporary register used in this operand,
-// so that the client can release it right after the memory operand use.
-MemOperand InstructionCodeGeneratorARM64::VecAddress(
-    HVecMemoryOperation* instruction,
-    UseScratchRegisterScope* temps_scope,
-    size_t size,
-    bool is_string_char_at,
-    /*out*/ Register* scratch) {
-  LocationSummary* locations = instruction->GetLocations();
-  Register base = InputRegisterAt(instruction, 0);
-
-  if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
-    DCHECK(!is_string_char_at);
-    return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
-  }
-
-  Location index = locations->InAt(1);
-  uint32_t offset = is_string_char_at
-      ? mirror::String::ValueOffset().Uint32Value()
-      : mirror::Array::DataOffset(size).Uint32Value();
-  size_t shift = ComponentSizeShiftWidth(size);
-
-  // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
-  DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
-
-  if (index.IsConstant()) {
-    offset += Int64FromLocation(index) << shift;
-    return HeapOperand(base, offset);
-  } else {
-    *scratch = temps_scope->AcquireSameSizeAs(base);
-    __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
-    return HeapOperand(*scratch, offset);
-  }
-}
-
-void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   size_t size = DataType::Size(instruction->GetPackedType());
   VRegister reg = VRegisterFrom(locations->Out());
@@ -1456,7 +1422,7 @@
         temps.Release(length);  // no longer needed
         // Zero extend 8 compressed bytes into 8 chars.
         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
-               VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
+               VecNeonAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
         __ Uxtl(reg.V8H(), reg.V8B());
         __ B(&done);
         if (scratch.IsValid()) {
@@ -1464,7 +1430,8 @@
         }
         // Load 8 direct uncompressed chars.
         __ Bind(&uncompressed_load);
-        __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
+        __ Ldr(reg,
+               VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
         __ Bind(&done);
         return;
       }
@@ -1478,7 +1445,8 @@
     case DataType::Type::kFloat64:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
+      __ Ldr(reg,
+             VecNeonAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1486,11 +1454,11 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
 }
 
-void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   size_t size = DataType::Size(instruction->GetPackedType());
   VRegister reg = VRegisterFrom(locations->InAt(2));
@@ -1509,7 +1477,8 @@
     case DataType::Type::kFloat64:
       DCHECK_LE(2u, instruction->GetVectorLength());
       DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
+      __ Str(reg,
+             VecNeonAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1517,6 +1486,54 @@
   }
 }
 
+Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
+    vixl::aarch64::UseScratchRegisterScope* scope) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  return LocationFrom(scope->AcquireVRegisterOfSize(kQRegSize));
+}
+
+void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
+    vixl::aarch64::UseScratchRegisterScope* scope) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  scope->Release(QRegisterFrom(loc));
+}
+
+void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
+                                                            Location source) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
+                                                            Location source) {
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+  __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
+}
+
+void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
+                                                           Location source) {
+  DCHECK(destination.IsSIMDStackSlot());
+  DCHECK_EQ(codegen_->GetSIMDRegisterWidth(), kQRegSizeInBytes);
+
+  if (source.IsFpuRegister()) {
+    __ Str(QRegisterFrom(source), StackOperandFrom(destination));
+  } else {
+    DCHECK(source.IsSIMDStackSlot());
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
+      Register temp = temps.AcquireX();
+      __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
+      __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
+      __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
+      __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
+    } else {
+      VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
+      __ Ldr(temp, StackOperandFrom(source));
+      __ Str(temp, StackOperandFrom(destination));
+    }
+  }
+}
+
 #undef __
 
 }  // namespace arm64
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3427893..e02a393 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1387,7 +1387,7 @@
   DISALLOW_COPY_AND_ASSIGN(HLoopInformationOutwardIterator);
 };
 
-#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                         \
+#define FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M)                  \
   M(Above, Condition)                                                   \
   M(AboveOrEqual, Condition)                                            \
   M(Abs, UnaryOperation)                                                \
@@ -1477,7 +1477,9 @@
   M(TryBoundary, Instruction)                                           \
   M(TypeConversion, Instruction)                                        \
   M(UShr, BinaryOperation)                                              \
-  M(Xor, BinaryOperation)                                               \
+  M(Xor, BinaryOperation)
+
+#define FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(M)                  \
   M(VecReplicateScalar, VecUnaryOperation)                              \
   M(VecExtractScalar, VecUnaryOperation)                                \
   M(VecReduce, VecUnaryOperation)                                       \
@@ -1508,6 +1510,10 @@
   M(VecLoad, VecMemoryOperation)                                        \
   M(VecStore, VecMemoryOperation)                                       \
 
+#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                         \
+  FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M)                        \
+  FOR_EACH_CONCRETE_INSTRUCTION_VECTOR_COMMON(M)
+
 /*
  * Instructions, shared across several (not all) architectures.
  */