summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Roland Levillain <rpl@google.com> 2017-05-10 17:24:56 +0000
committer Gerrit Code Review <noreply-gerritcodereview@google.com> 2017-05-10 17:24:58 +0000
commit92f4672f811a4eccdc596f7c2235804abd196fde (patch)
tree03449148ef022c9c3afc50bc960f2851e0098f97 /compiler/optimizing
parent5528d1c7cf686d2edea51ff12881e495c2124f09 (diff)
parent952b23505e2512c9327e6d20c8304493bf8fcf7c (diff)
Merge "Improve the documentation of an ARM64 parallel move resolver corner case."
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator_arm64.cc10
-rw-r--r--compiler/optimizing/codegen_test.cc24
2 files changed, 27 insertions, 7 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 2af3e3a3ea..d59f8b435c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1515,7 +1515,7 @@ Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind ki
if (kind == Location::kRegister) {
scratch = LocationFrom(vixl_temps_.AcquireX());
} else {
- DCHECK(kind == Location::kFpuRegister);
+ DCHECK_EQ(kind, Location::kFpuRegister);
scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
: vixl_temps_.AcquireD());
@@ -1743,9 +1743,9 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
-// Allocate a scratch register from the VIXL pool, querying first into
-// the floating-point register pool, and then the the core register
-// pool. This is essentially a reimplementation of
+// Allocate a scratch register from the VIXL pool, querying first
+// the floating-point register pool, and then the core register
+// pool. This is essentially a reimplementation of
// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
// using a different allocation strategy.
static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
@@ -1893,7 +1893,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
// ask for a scratch register of any type (core or FP).
//
// Also, we start by asking for a FP scratch register first, as the
- // demand of scratch core registers is higher. This is why we
+ // demand of scratch core registers is higher. This is why we
// use AcquireFPOrCoreCPURegisterOfSize instead of
// UseScratchRegisterScope::AcquireCPURegisterOfSize, which
// allocates core scratch registers first.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 4ba5c5580f..7e3c377198 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -754,7 +754,28 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
//
// Assertion failed (!available->IsEmpty())
//
- // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable,
+ // because of the following situation:
+ //
+ // 1. a temp register (IP0) is allocated as a scratch register by
+ // the parallel move resolver to solve a cycle (swap):
+ //
+ // [ source=DS0 destination=DS257 type=PrimDouble instruction=null ]
+ // [ source=DS257 destination=DS0 type=PrimDouble instruction=null ]
+ //
+ // 2. within CodeGeneratorARM64::MoveLocation, another temp
+ // register (IP1) is allocated to generate the swap between two
+ // double stack slots;
+ //
+ // 3. VIXL requires a third temp register to emit the `Ldr` or
+ // `Str` operation from CodeGeneratorARM64::MoveLocation (as
+ // one of the stack slots' offsets cannot be encoded as an
+ // immediate), but the pool of (core) temp registers is now
+ // empty.
+ //
+ // The solution used so far is to use a floating-point temp register
+ // (D31) in step #2, so that IP1 is available for step #3.
+
HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
move->AddMove(Location::DoubleStackSlot(0),
Location::DoubleStackSlot(257),
@@ -807,7 +828,6 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
-
#endif
#ifdef ART_ENABLE_CODEGEN_mips