Register promotion support for 64-bit targets
Not sufficiently tested for 64-bit targets, but should be
fairly close.
A significant amount of refactoring could stil be done, (in
later CLs).
With this change we are not making any changes to the vmap
scheme. As a result, it is a requirement that if a vreg
is promoted to both a 32-bit view and the low half of a
64-bit view it must share the same physical register. We
may change this restriction later on to allow for more flexibility
for 32-bit Arm.
For example, if v4, v5, v4/v5 and v5/v6 are all hot enough to
promote, we'd end up with something like:
v4 (as an int) -> r10
v4/v5 (as a long) -> r10
v5 (as an int) -> r11
v5/v6 (as a long) -> r11
Fix a couple of ARM64 bugs on the way...
Change-Id: I6a152b9c164d9f1a053622266e165428045362f3
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 3e0b3cf..56dcbe5 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -445,17 +445,59 @@
NewLIR0(kPseudoMethodExit);
- /* Need to restore any FP callee saves? */
- if (fp_spill_mask_) {
- int spill_offset = frame_size_ - kArm64PointerSize*(num_fp_spills_ + num_core_spills_);
- UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
- }
- if (core_spill_mask_) {
- int spill_offset = frame_size_ - kArm64PointerSize*num_core_spills_;
- UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+ // Restore saves and drop stack frame.
+ // 2 versions:
+ //
+ // 1. (Original): Try to address directly, then drop the whole frame.
+ // Limitation: ldp is a 7b signed immediate. There should have been a DCHECK!
+ //
+ // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
+ // in range. Then drop the rest.
+ //
+ // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
+ // in variant 1.
+
+ if (frame_size_ <= 504) {
+ // "Magic" constant, 63 (max signed 7b) * 8. Do variant 1.
+ // Could be tighter, as the last load is below frame_size_ offset.
+ if (fp_spill_mask_) {
+ int spill_offset = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+ UnSpillFPRegs(rs_sp, spill_offset, fp_spill_mask_);
+ }
+ if (core_spill_mask_) {
+ int spill_offset = frame_size_ - kArm64PointerSize * num_core_spills_;
+ UnSpillCoreRegs(rs_sp, spill_offset, core_spill_mask_);
+ }
+
+ OpRegImm64(kOpAdd, rs_sp, frame_size_);
+ } else {
+ // Second variant. Drop the frame part.
+ int drop = 0;
+ // TODO: Always use the first formula, as num_fp_spills would be zero?
+ if (fp_spill_mask_) {
+ drop = frame_size_ - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+ } else {
+ drop = frame_size_ - kArm64PointerSize * num_core_spills_;
+ }
+
+ // Drop needs to be 16B aligned, so that SP keeps aligned.
+ drop = RoundDown(drop, 16);
+
+ OpRegImm64(kOpAdd, rs_sp, drop);
+
+ if (fp_spill_mask_) {
+ int offset = frame_size_ - drop - kArm64PointerSize * (num_fp_spills_ + num_core_spills_);
+ UnSpillFPRegs(rs_sp, offset, fp_spill_mask_);
+ }
+ if (core_spill_mask_) {
+ int offset = frame_size_ - drop - kArm64PointerSize * num_core_spills_;
+ UnSpillCoreRegs(rs_sp, offset, core_spill_mask_);
+ }
+
+ OpRegImm64(kOpAdd, rs_sp, frame_size_ - drop);
}
- OpRegImm64(kOpAdd, rs_sp, frame_size_);
+ // Finally return.
NewLIR0(kA64Ret);
}
diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index f71713f..7db6ab6 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h
@@ -123,8 +123,6 @@
void ClobberCallerSave();
void FreeCallTemps();
void LockCallTemps();
- void MarkPreservedSingle(int v_reg, RegStorage reg);
- void MarkPreservedDouble(int v_reg, RegStorage reg);
void CompilerInitializeRegAlloc();
// Required for target - miscellaneous.
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc
index 18a4e8f..51c8723 100644
--- a/compiler/dex/quick/arm64/int_arm64.cc
+++ b/compiler/dex/quick/arm64/int_arm64.cc
@@ -19,6 +19,7 @@
#include "arm64_lir.h"
#include "codegen_arm64.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "mirror/array.h"
@@ -1054,6 +1055,7 @@
if (UNLIKELY(reg2 < 0)) {
NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
} else {
+ DCHECK_LE(offset, 63);
NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
}
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index dcb0050..6985de6 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -22,6 +22,7 @@
#include "dex/compiler_internals.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
namespace art {
@@ -648,29 +649,6 @@
num_core_spills_++;
}
-/*
- * Mark a callee-save fp register as promoted.
- */
-void Arm64Mir2Lir::MarkPreservedSingle(int v_reg, RegStorage reg) {
- DCHECK(reg.IsFloat());
- int adjusted_reg_num = reg.GetRegNum() - A64_FP_CALLEE_SAVE_BASE;
- // Ensure fp_vmap_table is large enough
- int table_size = fp_vmap_table_.size();
- for (int i = table_size; i < (adjusted_reg_num + 1); i++) {
- fp_vmap_table_.push_back(INVALID_VREG);
- }
- // Add the current mapping
- fp_vmap_table_[adjusted_reg_num] = v_reg;
- // Size of fp_vmap_table is high-water mark, use to set mask
- num_fp_spills_ = fp_vmap_table_.size();
- fp_spill_mask_ = ((1 << num_fp_spills_) - 1) << A64_FP_CALLEE_SAVE_BASE;
-}
-
-void Arm64Mir2Lir::MarkPreservedDouble(int v_reg, RegStorage reg) {
- DCHECK(reg.IsDouble());
- MarkPreservedSingle(v_reg, reg);
-}
-
/* Clobber all regs that might be used by an external C call */
void Arm64Mir2Lir::ClobberCallerSave() {
Clobber(rs_x0);
@@ -904,7 +882,7 @@
int n = *num_gpr_used;
if (n < 8) {
*num_gpr_used = n + 1;
- if (loc->wide) {
+ if (loc->wide || loc->ref) {
*op_size = k64;
return RegStorage::Solo64(n);
} else {
@@ -965,35 +943,64 @@
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
for (int i = 0; i < cu_->num_ins; i++) {
- PromotionMap* v_map = &promotion_map_[start_vreg + i];
RegLocation* t_loc = &ArgLocs[i];
OpSize op_size;
RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size);
if (reg.Valid()) {
- if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
- OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
- } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
- OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+ // If arriving in register.
+
+ // We have already updated the arg location with promoted info
+ // so we can be based on it.
+ if (t_loc->location == kLocPhysReg) {
+ // Just copy it.
+ OpRegCopy(t_loc->reg, reg);
} else {
- StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
- if (reg.Is64Bit()) {
- if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
- LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
- }
- i += 1;
+ // Needs flush.
+ if (t_loc->ref) {
+ StoreRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile);
+ } else {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32,
+ kNotVolatile);
}
}
} else {
- // If arriving in frame & promoted
- if (v_map->core_location == kLocPhysReg) {
- LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
- RegStorage::Solo32(v_map->core_reg));
- }
- if (v_map->fp_location == kLocPhysReg) {
- LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+ // If arriving in frame & promoted.
+ if (t_loc->location == kLocPhysReg) {
+ if (t_loc->ref) {
+ LoadRefDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile);
+ } else {
+ LoadBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), t_loc->reg,
+ t_loc->wide ? k64 : k32, kNotVolatile);
+ }
}
}
+ if (t_loc->wide) {
+ // Increment i to skip the next one.
+ i++;
+ }
+ // if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+ // OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+ // } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+ // OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg);
+ // } else {
+ // StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile);
+ // if (reg.Is64Bit()) {
+ // if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) {
+ // LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots";
+ // }
+ // i += 1;
+ // }
+ // }
+ // } else {
+ // // If arriving in frame & promoted
+ // if (v_map->core_location == kLocPhysReg) {
+ // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i),
+ // RegStorage::Solo32(v_map->core_reg));
+ // }
+ // if (v_map->fp_location == kLocPhysReg) {
+ // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg));
+ // }
}
}
@@ -1067,7 +1074,11 @@
loc = UpdateLoc(loc);
if (loc.location == kLocPhysReg) {
ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg);
- StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+ if (loc.ref) {
+ StoreRefDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile);
+ } else {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile);
+ }
}
next_arg++;
}
diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc
index ca78e5b..aaee91b 100644
--- a/compiler/dex/quick/arm64/utility_arm64.cc
+++ b/compiler/dex/quick/arm64/utility_arm64.cc
@@ -17,6 +17,7 @@
#include "arm64_lir.h"
#include "codegen_arm64.h"
#include "dex/quick/mir_to_lir-inl.h"
+#include "dex/reg_storage_eq.h"
namespace art {