Opt compiler: Implement parallel move resolver without using swap.

The algorithm of ParallelMoveResolverNoSwap() is almost the same with
ParallelMoveResolverWithSwap(), except the way we resolve the circular
dependency. NoSwap() uses additional scratch register to resolve the
circular dependency. For example, (0->1) (1->2) (2->0) will be performed
as (2->scratch) (1->2) (0->1) (scratch->0).

On architectures without swap register support, NoSwap() can reduce the
number of moves from 3x(N-1) to (N+1) when there is circular dependency
with N moves.

And also, NoSwap() algorithm does not depend on architecture register
layout information, which means it can support register pairs on arm32
and X/W, D/S registers on arm64 without additional modification.

Change-Id: Idf56bd5469bb78c0e339e43ab16387428a082318
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 07c6dd0..c862683 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -194,15 +194,17 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM64);
 };
 
-class ParallelMoveResolverARM64 : public ParallelMoveResolver {
+class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
  public:
   ParallelMoveResolverARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
-      : ParallelMoveResolver(allocator), codegen_(codegen) {}
+      : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
 
+ protected:
+  void PrepareForEmitNativeCode() OVERRIDE;
+  void FinishEmitNativeCode() OVERRIDE;
+  Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE;
+  void FreeScratchLocation(Location loc) OVERRIDE;
   void EmitMove(size_t index) OVERRIDE;
-  void EmitSwap(size_t index) OVERRIDE;
-  void RestoreScratch(int reg) OVERRIDE;
-  void SpillScratch(int reg) OVERRIDE;
 
  private:
   Arm64Assembler* GetAssembler() const;
@@ -211,6 +213,7 @@
   }
 
   CodeGeneratorARM64* const codegen_;
+  vixl::UseScratchRegisterScope vixl_temps_;
 
   DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARM64);
 };
@@ -318,7 +321,6 @@
   // locations, and is used for optimisation and debugging.
   void MoveLocation(Location destination, Location source,
                     Primitive::Type type = Primitive::kPrimVoid);
-  void SwapLocations(Location loc_1, Location loc_2);
   void Load(Primitive::Type type, vixl::CPURegister dst, const vixl::MemOperand& src);
   void Store(Primitive::Type type, vixl::CPURegister rt, const vixl::MemOperand& dst);
   void LoadCurrentMethod(vixl::Register current_method);