diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 10 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 24 |
2 files changed, 27 insertions, 7 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index eee832a732..0bc4bd7524 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1498,7 +1498,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki if (kind == Location::kRegister) { scratch = LocationFrom(vixl_temps_.AcquireX()); } else { - DCHECK(kind == Location::kFpuRegister); + DCHECK_EQ(kind, Location::kFpuRegister); scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) : vixl_temps_.AcquireD()); @@ -1726,9 +1726,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); } -// Allocate a scratch register from the VIXL pool, querying first into -// the floating-point register pool, and then the the core register -// pool. This is essentially a reimplementation of +// Allocate a scratch register from the VIXL pool, querying first +// the floating-point register pool, and then the core register +// pool. This is essentially a reimplementation of // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize // using a different allocation strategy. static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, @@ -1876,7 +1876,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, // ask for a scratch register of any type (core or FP). // // Also, we start by asking for a FP scratch register first, as the - // demand of scratch core registers is higher. This is why we + // demand of scratch core registers is higher. This is why we // use AcquireFPOrCoreCPURegisterOfSize instead of // UseScratchRegisterScope::AcquireCPURegisterOfSize, which // allocates core scratch registers first. diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 4ba5c5580f..7e3c377198 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // // Assertion failed (!available->IsEmpty()) // - // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable. + // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable, + // because of the following situation: + // + // 1. a temp register (IP0) is allocated as a scratch register by + // the parallel move resolver to solve a cycle (swap): + // + // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ] + // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ] + // + // 2. within CodeGeneratorARM64::MoveLocation, another temp + // register (IP1) is allocated to generate the swap between two + // double stack slots; + // + // 3. VIXL requires a third temp register to emit the `Ldr` or + // `Str` operation from CodeGeneratorARM64::MoveLocation (as + // one of the stack slots' offsets cannot be encoded as an + // immediate), but the pool of (core) temp registers is now + // empty. + // + // The solution used so far is to use a floating-point temp register + // (D31) in step #2, so that IP1 is available for step #3. + HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); move->AddMove(Location::DoubleStackSlot(0), Location::DoubleStackSlot(257), @@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { InternalCodeAllocator code_allocator; codegen.Finalize(&code_allocator); } - #endif #ifdef ART_ENABLE_CODEGEN_mips |