Handle cycles with double stack slots in ARM64 parallel moves.
When acquiring a scratch register to emit a move between two
double stack slots, ask for a FP register first, to avoid
depleting the core scratch register pool, which is used in
vixl::aarch64::MacroAssembler::LoadStoreMacro when the
offset does not fit in the immediate field of the load
instruction.
Test: make test-art-target (on ARM64)
Bug: 34760542
Change-Id: Ie9b37d007ed6ec5886931a35dcb22a9aff73bbbe
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cf824a1..26c8254 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1452,6 +1452,19 @@
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
+// Allocate a scratch register from the VIXL pool, querying first into
+// the floating-point register pool, and then the the core register
+// pool. This is essentially a reimplementation of
+// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
+// using a different allocation strategy.
+static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
+ vixl::aarch64::UseScratchRegisterScope* temps,
+ int size_in_bits) {
+ return masm->GetScratchFPRegisterList()->IsEmpty()
+ ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
+ : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
+}
+
void CodeGeneratorARM64::MoveLocation(Location destination,
Location source,
Primitive::Type dst_type) {
@@ -1563,8 +1576,16 @@
// a move is blocked by a another move requiring a scratch FP
// register, which would reserve D31). To prevent this issue, we
// ask for a scratch register of any type (core or FP).
- CPURegister temp =
- temps.AcquireCPURegisterOfSize(destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize);
+ //
+ // Also, we start by asking for a FP scratch register first, as the
+ // demand of scratch core registers is higher. This is why we
+ // use AcquireFPOrCoreCPURegisterOfSize instead of
+ // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
+ // allocates core scratch registers first.
+ CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
+ GetVIXLAssembler(),
+ &temps,
+ (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
__ Ldr(temp, StackOperandFrom(source));
__ Str(temp, StackOperandFrom(destination));
}
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index e3f3df0..763d6da 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -1067,6 +1067,39 @@
}
#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Regression test for b/34760542.
+TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+ arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions());
+
+ codegen.Initialize();
+
+ // The following ParallelMove used to fail this assertion:
+ //
+ // Assertion failed (!available->IsEmpty())
+ //
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+ move->AddMove(Location::DoubleStackSlot(0),
+ Location::DoubleStackSlot(257),
+ Primitive::kPrimDouble,
+ nullptr);
+ move->AddMove(Location::DoubleStackSlot(257),
+ Location::DoubleStackSlot(0),
+ Primitive::kPrimDouble,
+ nullptr);
+ codegen.GetMoveResolver()->EmitNativeCode(move);
+
+ InternalCodeAllocator code_allocator;
+ codegen.Finalize(&code_allocator);
+}
+#endif
+
#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(CodegenTest, MipsClobberRA) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(