ART: Fix Quick-style LR vs PC core spill mask bug

It's always been a bug that Quick marked PC as spilled instead of
LR. The root cause was a mutation of the spill mask at frame exit,
when LR is being restored into PC to return. A local should have
been used to keep the actual spill mask safe and sound.

This has only worked because nobody ever uses LR, even after long
jumps for exception dispatch. However, single-frame deoptimization
needs this to work, and I'd rather fix this than being forced to
have machine-specific fixups.

Also fix in optimizing, and bump the oat version.

Change-Id: Ib032a533408bf464097fc96dcbfc5b6a68bf59a1
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 438ef69..a4c58b0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -48,7 +48,7 @@
 // with baseline.
 static constexpr Register kCoreSavedRegisterForBaseline = R5;
 static constexpr Register kCoreCalleeSaves[] =
-    { R5, R6, R7, R8, R10, R11, PC };
+    { R5, R6, R7, R8, R10, R11, LR };
 static constexpr SRegister kFpuCalleeSaves[] =
     { S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 };
 
@@ -409,8 +409,8 @@
       method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
       call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter()),
       relative_call_patches_(graph->GetArena()->Adapter()) {
-  // Save the PC register to mimic Quick.
-  AddAllocatedRegister(Location::RegisterLocation(PC));
+  // Always save the LR register to mimic Quick.
+  AddAllocatedRegister(Location::RegisterLocation(LR));
 }
 
 void CodeGeneratorARM::Finalize(CodeAllocator* allocator) {
@@ -599,12 +599,9 @@
     RecordPcInfo(nullptr, 0);
   }
 
-  // PC is in the list of callee-save to mimic Quick, but we need to push
-  // LR at entry instead.
-  uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
-  __ PushList(push_mask);
-  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
-  __ cfi().RelOffsetForMany(DWARFReg(kMethodRegisterArgument), 0, push_mask, kArmWordSize);
+  __ PushList(core_spill_mask_);
+  __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask_));
+  __ cfi().RelOffsetForMany(DWARFReg(kMethodRegisterArgument), 0, core_spill_mask_, kArmWordSize);
   if (fpu_spill_mask_ != 0) {
     SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
     __ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
@@ -632,7 +629,10 @@
     __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
     __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
   }
-  __ PopList(core_spill_mask_);
+  // Pop LR into PC to return.
+  DCHECK_NE(core_spill_mask_ & (1 << LR), 0U);
+  uint32_t pop_mask = (core_spill_mask_ & (~(1 << LR))) | 1 << PC;
+  __ PopList(pop_mask);
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
 }