2 files changed, 31 insertions, 6 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index f108595a00..00ad3e34b7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -5681,13 +5681,13 @@ void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
   // TODO(VIXL32): Double check the performance of this implementation.
   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
-  vixl32::Register temp = temps.Acquire();
-  vixl32::SRegister temp_s = temps.AcquireS();
+  vixl32::SRegister temp_1 = temps.AcquireS();
+  vixl32::SRegister temp_2 = temps.AcquireS();
 
-  __ Ldr(temp, MemOperand(sp, mem1));
-  __ Vldr(temp_s, MemOperand(sp, mem2));
-  __ Str(temp, MemOperand(sp, mem2));
-  __ Vstr(temp_s, MemOperand(sp, mem1));
+  __ Vldr(temp_1, MemOperand(sp, mem1));
+  __ Vldr(temp_2, MemOperand(sp, mem2));
+  __ Vstr(temp_1, MemOperand(sp, mem2));
+  __ Vstr(temp_2, MemOperand(sp, mem1));
 }
 
 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index ac83bd9b0c..879b4ce59e 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -1041,6 +1041,31 @@ TEST_F(CodegenTest, ComparisonsLong) {
   }
 }
 
+#ifdef ART_ENABLE_CODEGEN_arm
+TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
+  std::unique_ptr<const ArmInstructionSetFeatures> features(
+      ArmInstructionSetFeatures::FromCppDefines());
+  ArenaPool pool;
+  ArenaAllocator allocator(&pool);
+  HGraph* graph = CreateGraph(&allocator);
+  arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions());
+
+  codegen.Initialize();
+
+  // This will result in calling EmitSwap -> void ParallelMoveResolverARMVIXL::Exchange(int mem1,
+  // int mem2) which was faulty (before the fix). So previously GPR and FP scratch registers were
+  // used as temps; however GPR scratch register is required for big stack offsets which don't fit
+  // LDR encoding. So the following code is a regression test for that situation.
+  HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+  move->AddMove(Location::StackSlot(0), Location::StackSlot(8192), Primitive::kPrimInt, nullptr);
+  move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), Primitive::kPrimInt, nullptr);
+  codegen.GetMoveResolver()->EmitNativeCode(move);
+
+  InternalCodeAllocator code_allocator;
+  codegen.Finalize(&code_allocator);
+}
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_mips
 TEST_F(CodegenTest, MipsClobberRA) {
   std::unique_ptr<const MipsInstructionSetFeatures> features_mips(