Register promotion support for 64-bit targets

Not sufficiently tested for 64-bit targets, but should be
fairly close.

A significant amount of refactoring could stil be done, (in
later CLs).

With this change we are not making any changes to the vmap
scheme.  As a result, it is a requirement that if a vreg
is promoted to both a 32-bit view and the low half of a
64-bit view it must share the same physical register.  We
may change this restriction later on to allow for more flexibility
for 32-bit Arm.

For example, if v4, v5, v4/v5 and v5/v6 are all hot enough to
promote, we'd end up with something like:

v4 (as an int)    -> r10
v4/v5 (as a long) -> r10
v5 (as an int)    -> r11
v5/v6 (as a long) -> r11

Fix a couple of ARM64 bugs on the way...

Change-Id: I6a152b9c164d9f1a053622266e165428045362f3
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 3e0b3cf..56dcbe5 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -445,17 +445,59 @@
 
   NewLIR0(kPseudoMethodExit);
 
-  /* Need to restore any FP callee saves? */
-  if (fp_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
-    UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
-  }
-  if (core_spill_mask_) {
-    int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
-    UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+  // Restore saves and drop stack frame.
+  // 2 versions:
+  //
+  // 1. (Original): Try to address directly, then drop the whole frame.
+  //                Limitation: ldp is a 7b signed immediate. There should have been a DCHECK!
+  //
+  // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
+  //           in range. Then drop the rest.
+  //
+  // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
+  //       in variant 1.
+
+  if (frame_size_ <= 504) {
+    // "Magic" constant, 63 (max signed 7b) * 8. Do variant 1.
+    // Could be tighter, as the last load is below frame_size_ offset.
+    if (fp_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int spill_offset = frame_size_ - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_);
+  } else {
+    // Second variant. Drop the frame part.
+    int drop = 0;
+    // TODO: Always use the first formula, as num_fp_spills would be zero?
+    if (fp_spill_mask_) {
+      drop = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+    } else {
+      drop = frame_size_ - kArm64PointerSize * num_core_spills_;
+    }
+
+    // Drop needs to be 16B aligned, so that SP keeps aligned.
+    drop = RoundDown(drop, 16);
+
+    OpRegImm64(kOpAdd, rs_sp, drop);
+
+    if (fp_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+      UnSpillFPRegs(rs_sp, offset, fp_spill_mask_);
+    }
+    if (core_spill_mask_) {
+      int offset = frame_size_ - drop - kArm64PointerSize * num_core_spills_;
+      UnSpillCoreRegs(rs_sp, offset, core_spill_mask_);
+    }
+
+    OpRegImm64(kOpAdd, rs_sp, frame_size_ - drop);
   }
 
-  OpRegImm64(kOpAdd, rs_sp, frame_size_);
+  // Finally return.
   NewLIR0(kA64Ret);
 }