Relax the only one back-edge restriction.

The rule is in the way for better register allocation, as
it creates an artificial join point between multiple paths.

Change-Id: Ia4392890f95bcea56d143138f28ddce6c572ad58
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e4c37de..f56e446 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -112,6 +112,10 @@
     return &return_label_;
   }
 
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
  private:
   HSuspendCheck* const instruction_;
   // If not null, the block to branch to after the suspend check.
@@ -3539,8 +3543,18 @@
 void InstructionCodeGeneratorARM::GenerateSuspendCheck(HSuspendCheck* instruction,
                                                        HBasicBlock* successor) {
   SuspendCheckSlowPathARM* slow_path =
-      new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
-  codegen_->AddSlowPath(slow_path);
+      down_cast<SuspendCheckSlowPathARM*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
 
   __ LoadFromOffset(
       kLoadUnsignedHalfword, IP, TR, Thread::ThreadFlagsOffset<kArmWordSize>().Int32Value());