summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc10
-rw-r--r--compiler/optimizing/codegen_test.cc24
2 files changed, 27 insertions, 7 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index eee832a732..0bc4bd7524 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1498,7 +1498,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
if (kind == Location::kRegister) {
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
- DCHECK(kind == Location::kFpuRegister);
+ DCHECK_EQ(kind, Location::kFpuRegister);
scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
: vixl_temps_.AcquireD());
@@ -1726,9 +1726,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
-// Allocate a scratch register from the VIXL pool, querying first into
-// the floating-point register pool, and then the the core register
-// pool. This is essentially a reimplementation of
+// Allocate a scratch register from the VIXL pool, querying first
+// the floating-point register pool, and then the core register
+// pool. This is essentially a reimplementation of
// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
// using a different allocation strategy.
static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
@@ -1876,7 +1876,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
// ask for a scratch register of any type (core or FP).
//
// Also, we start by asking for a FP scratch register first, as the
- // demand of scratch core registers is higher. This is why we
+ // demand of scratch core registers is higher. This is why we
// use AcquireFPOrCoreCPURegisterOfSize instead of
// UseScratchRegisterScope::AcquireCPURegisterOfSize, which
// allocates core scratch registers first.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4ba5c5580f..7e3c377198 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
//
// Assertion failed (!available->IsEmpty())
//
- // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable,
+ // because of the following situation:
+ //
+ // 1. a temp register (IP0) is allocated as a scratch register by
+ // the parallel move resolver to solve a cycle (swap):
+ //
+ // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ]
+ // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ]
+ //
+ // 2. within CodeGeneratorARM64::MoveLocation, another temp
+ // register (IP1) is allocated to generate the swap between two
+ // double stack slots;
+ //
+ // 3. VIXL requires a third temp register to emit the `Ldr` or
+ // `Str` operation from CodeGeneratorARM64::MoveLocation (as
+ // one of the stack slots' offsets cannot be encoded as an
+ // immediate), but the pool of (core) temp registers is now
+ // empty.
+ //
+ // The solution used so far is to use a floating-point temp register
+ // (D31) in step #2, so that IP1 is available for step #3.
+
HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
move->AddMove(Location::DoubleStackSlot(0),
Location::DoubleStackSlot(257),
@@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
-
#endif
#ifdef ART_ENABLE_CODEGEN_mips