x86_64: Hard Float ABI support in QCG
This patch shows our efforts on resolving the ART limitations:
- passing "float"/"double" arguments via FPR
- passing "long" arguments via single GPR, not pair
- passing more than 3 agruments via GPR.
Work done:
- Extended SpecialTargetRegister enum with kARG4, kARG5, fARG4..fARG7.
- Created initial LoadArgRegs/GenDalvikX/FlushIns version in X86Mir2Lir.
- Unlimited number of long/double/float arguments support
- Refactored (v2)
Change-Id: I5deadd320b4341d5b2f50ba6fa4a98031abc3902
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
Signed-off-by: Dmitry Petrochenko <dmitry.petrochenko@intel.com>
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb48cc3..f0b4787 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -48,10 +48,16 @@
kArg1,
kArg2,
kArg3,
+ kArg4,
+ kArg5,
kFArg0,
kFArg1,
kFArg2,
kFArg3,
+ kFArg4,
+ kFArg5,
+ kFArg6,
+ kFArg7,
kRet0,
kRet1,
kInvokeTgt,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5b9c763..547c0f6 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -707,7 +707,7 @@
// which has problems with long, float, double
constexpr char arm64_supported_types[] = "ZBSCILVJFD";
// (x84_64) We still have troubles with compiling longs/doubles/floats
-constexpr char x86_64_supported_types[] = "ZBSCILV";
+constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
// TODO: Remove this when we are able to compile everything.
static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
@@ -718,7 +718,7 @@
// 1 is for the return type. Currently, we only accept 2 parameters at the most.
// (x86_64): For now we have the same limitation. But we might want to split this
// check in future into two separate cases for arm64 and x86_64.
- if (shorty_size > (1 + 2)) {
+ if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
return false;
}
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index bd9c8b4..3b30cde 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -113,6 +113,7 @@
case kHiddenArg: res_reg = rs_r12; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index b287399..ce95286 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -127,6 +127,7 @@
case kHiddenArg: res_reg = rs_x12; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 842533b..ee68fe2 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -290,26 +290,51 @@
}
LoadValueDirectWideFixed(arg1, r_tmp);
} else {
- RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+ RegStorage r_tmp;
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+ }
LoadValueDirectWideFixed(arg1, r_tmp);
}
}
} else {
RegStorage r_tmp;
if (arg0.fp) {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+ }
} else {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+ }
}
LoadValueDirectWideFixed(arg0, r_tmp);
if (arg1.wide == 0) {
- LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+ if (cu_->instruction_set == kX86_64) {
+ LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+ } else {
+ LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+ }
} else {
RegStorage r_tmp;
if (arg1.fp) {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+ }
} else {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+ }
}
LoadValueDirectWideFixed(arg1, r_tmp);
}
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c1a7c99..381c7ce 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -98,6 +98,7 @@
case kHiddenArg: res_reg = rs_rT0; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = rs_rMIPS_COUNT; break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f12b6f..a85be5e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -68,20 +68,51 @@
// TODO: needs revisit for 64-bit.
RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
- RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
- RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
- RegStorage::InvalidReg();
-
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+ if (cu_->instruction_set == kX86) {
/*
* When doing a call for x86, it moves the stack pointer in order to push return.
* Thus, we add another 4 bytes to figure out the out of caller (in of callee).
- * TODO: This needs revisited for 64-bit.
*/
offset += sizeof(uint32_t);
}
+ if (cu_->instruction_set == kX86_64) {
+ /*
+ * When doing a call for x86, it moves the stack pointer in order to push return.
+ * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+ */
+ offset += sizeof(uint64_t);
+ }
+
+ if (cu_->instruction_set == kX86_64) {
+ RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+ if (!reg_arg.Valid()) {
+ RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+ LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
+ return new_reg;
+ } else {
+ // Check if we need to copy the arg to a different reg_class.
+ if (!RegClassMatches(reg_class, reg_arg)) {
+ if (wide) {
+ RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+ OpRegCopyWide(new_reg, reg_arg);
+ reg_arg = new_reg;
+ } else {
+ RegStorage new_reg = AllocTypedTemp(false, reg_class);
+ OpRegCopy(new_reg, reg_arg);
+ reg_arg = new_reg;
+ }
+ }
+ }
+ return reg_arg;
+ }
+
+ RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+ RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
+ RegStorage::InvalidReg();
+
// If the VR is wide and there is no register for high part, we need to load it.
if (wide && !reg_arg_high.Valid()) {
// If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
@@ -129,15 +160,22 @@
void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+ if (cu_->instruction_set == kX86) {
/*
* When doing a call for x86, it moves the stack pointer in order to push return.
* Thus, we add another 4 bytes to figure out the out of caller (in of callee).
- * TODO: This needs revisited for 64-bit.
*/
offset += sizeof(uint32_t);
}
+ if (cu_->instruction_set == kX86_64) {
+ /*
+ * When doing a call for x86, it moves the stack pointer in order to push return.
+ * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+ */
+ offset += sizeof(uint64_t);
+ }
+
if (!rl_dest.wide) {
RegStorage reg = GetArgMappingToPhysicalReg(in_position);
if (reg.Valid()) {
@@ -146,6 +184,16 @@
Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
}
} else {
+ if (cu_->instruction_set == kX86_64) {
+ RegStorage reg = GetArgMappingToPhysicalReg(in_position);
+ if (reg.Valid()) {
+ OpRegCopy(rl_dest.reg, reg);
+ } else {
+ LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
+ }
+ return;
+ }
+
RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ed94a8d..9718acd 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -910,13 +910,13 @@
void GenInvoke(CallInfo* info);
void GenInvokeNoInline(CallInfo* info);
virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
- int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
NextCallInsn next_call_insn,
const MethodReference& target_method,
uint32_t vtable_idx,
uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
bool skip_this);
- int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
NextCallInsn next_call_insn,
const MethodReference& target_method,
uint32_t vtable_idx,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 648c148..8113f8e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -20,9 +20,43 @@
#include "dex/compiler_internals.h"
#include "x86_lir.h"
+#include <map>
+
namespace art {
class X86Mir2Lir : public Mir2Lir {
+ protected:
+ class InToRegStorageMapper {
+ public:
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+ virtual ~InToRegStorageMapper() {}
+ };
+
+ class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+ public:
+ InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+ virtual ~InToRegStorageX86_64Mapper() {}
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+ private:
+ int cur_core_reg_;
+ int cur_fp_reg_;
+ };
+
+ class InToRegStorageMapping {
+ public:
+ InToRegStorageMapping() : initialized_(false) {}
+ void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+ int GetMaxMappedIn() { return max_mapped_in_; }
+ bool IsThereStackMapped() { return is_there_stack_mapped_; }
+ RegStorage Get(int in_position);
+ bool IsInitialized() { return initialized_; }
+ private:
+ std::map<int, RegStorage> mapping_;
+ int max_mapped_in_;
+ bool is_there_stack_mapped_;
+ bool initialized_;
+ };
+
public:
X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
@@ -56,6 +90,7 @@
// Required for target - register utilities.
RegStorage TargetReg(SpecialTargetRegister reg);
RegStorage GetArgMappingToPhysicalReg(int arg_num);
+ RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
RegLocation GetReturnAlt();
RegLocation GetReturnWideAlt();
RegLocation LocCReturn();
@@ -306,6 +341,22 @@
*/
void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+ void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+
+ int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx,
+ uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+ bool skip_this);
+
+ int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx,
+ uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+ bool skip_this);
+
/*
* @brief Generate a relative call to the method that will be patched at link time.
* @param target_method The MethodReference of the method to be invoked.
@@ -794,6 +845,8 @@
* @param mir A kMirOpConst128b MIR instruction to match.
*/
LIR *AddVectorLiteral(MIR *mir);
+
+ InToRegStorageMapping in_to_reg_storage_mapping_;
};
} // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4d8fd1b..160ec62 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,10 +132,18 @@
X86NativeRegisterPool rX86_ARG1;
X86NativeRegisterPool rX86_ARG2;
X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+X86NativeRegisterPool rX86_ARG4;
+X86NativeRegisterPool rX86_ARG5;
+#endif
X86NativeRegisterPool rX86_FARG0;
X86NativeRegisterPool rX86_FARG1;
X86NativeRegisterPool rX86_FARG2;
X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_FARG4;
+X86NativeRegisterPool rX86_FARG5;
+X86NativeRegisterPool rX86_FARG6;
+X86NativeRegisterPool rX86_FARG7;
X86NativeRegisterPool rX86_RET0;
X86NativeRegisterPool rX86_RET1;
X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -145,10 +153,16 @@
RegStorage rs_rX86_ARG1;
RegStorage rs_rX86_ARG2;
RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_ARG4;
+RegStorage rs_rX86_ARG5;
RegStorage rs_rX86_FARG0;
RegStorage rs_rX86_FARG1;
RegStorage rs_rX86_FARG2;
RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_FARG4;
+RegStorage rs_rX86_FARG5;
+RegStorage rs_rX86_FARG6;
+RegStorage rs_rX86_FARG7;
RegStorage rs_rX86_RET0;
RegStorage rs_rX86_RET1;
RegStorage rs_rX86_INVOKE_TGT;
@@ -188,35 +202,27 @@
case kArg1: res_reg = rs_rX86_ARG1; break;
case kArg2: res_reg = rs_rX86_ARG2; break;
case kArg3: res_reg = rs_rX86_ARG3; break;
+ case kArg4: res_reg = rs_rX86_ARG4; break;
+ case kArg5: res_reg = rs_rX86_ARG5; break;
case kFArg0: res_reg = rs_rX86_FARG0; break;
case kFArg1: res_reg = rs_rX86_FARG1; break;
case kFArg2: res_reg = rs_rX86_FARG2; break;
case kFArg3: res_reg = rs_rX86_FARG3; break;
+ case kFArg4: res_reg = rs_rX86_FARG4; break;
+ case kFArg5: res_reg = rs_rX86_FARG5; break;
+ case kFArg6: res_reg = rs_rX86_FARG6; break;
+ case kFArg7: res_reg = rs_rX86_FARG7; break;
case kRet0: res_reg = rs_rX86_RET0; break;
case kRet1: res_reg = rs_rX86_RET1; break;
case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
case kHiddenArg: res_reg = rs_rAX; break;
case kHiddenFpArg: res_reg = rs_fr0; break;
case kCount: res_reg = rs_rX86_COUNT; break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
- // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
- // TODO: This is not 64-bit compliant and depends on new internal ABI.
- switch (arg_num) {
- case 0:
- return rs_rX86_ARG1;
- case 1:
- return rs_rX86_ARG2;
- case 2:
- return rs_rX86_ARG3;
- default:
- return RegStorage::InvalidReg();
- }
-}
-
/*
* Decode the register id.
*/
@@ -482,6 +488,20 @@
LockTemp(rs_rX86_ARG1);
LockTemp(rs_rX86_ARG2);
LockTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+ if (Gen64Bit()) {
+ LockTemp(rs_rX86_ARG4);
+ LockTemp(rs_rX86_ARG5);
+ LockTemp(rs_rX86_FARG0);
+ LockTemp(rs_rX86_FARG1);
+ LockTemp(rs_rX86_FARG2);
+ LockTemp(rs_rX86_FARG3);
+ LockTemp(rs_rX86_FARG4);
+ LockTemp(rs_rX86_FARG5);
+ LockTemp(rs_rX86_FARG6);
+ LockTemp(rs_rX86_FARG7);
+ }
+#endif
}
/* To be used when explicitly managing register use */
@@ -490,6 +510,20 @@
FreeTemp(rs_rX86_ARG1);
FreeTemp(rs_rX86_ARG2);
FreeTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+ if (Gen64Bit()) {
+ FreeTemp(rs_rX86_ARG4);
+ FreeTemp(rs_rX86_ARG5);
+ FreeTemp(rs_rX86_FARG0);
+ FreeTemp(rs_rX86_FARG1);
+ FreeTemp(rs_rX86_FARG2);
+ FreeTemp(rs_rX86_FARG3);
+ FreeTemp(rs_rX86_FARG4);
+ FreeTemp(rs_rX86_FARG5);
+ FreeTemp(rs_rX86_FARG6);
+ FreeTemp(rs_rX86_FARG7);
+ }
+#endif
}
bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
@@ -688,11 +722,37 @@
rs_rX86_ARG1 = rs_rSI;
rs_rX86_ARG2 = rs_rDX;
rs_rX86_ARG3 = rs_rCX;
+#ifdef TARGET_REX_SUPPORT
+ rs_rX86_ARG4 = rs_r8;
+ rs_rX86_ARG5 = rs_r9;
+#else
+ rs_rX86_ARG4 = RegStorage::InvalidReg();
+ rs_rX86_ARG5 = RegStorage::InvalidReg();
+#endif
+ rs_rX86_FARG0 = rs_fr0;
+ rs_rX86_FARG1 = rs_fr1;
+ rs_rX86_FARG2 = rs_fr2;
+ rs_rX86_FARG3 = rs_fr3;
+ rs_rX86_FARG4 = rs_fr4;
+ rs_rX86_FARG5 = rs_fr5;
+ rs_rX86_FARG6 = rs_fr6;
+ rs_rX86_FARG7 = rs_fr7;
rX86_ARG0 = rDI;
rX86_ARG1 = rSI;
rX86_ARG2 = rDX;
rX86_ARG3 = rCX;
- // TODO: ARG4(r8), ARG5(r9), floating point args.
+#ifdef TARGET_REX_SUPPORT
+ rX86_ARG4 = r8;
+ rX86_ARG5 = r9;
+#endif
+ rX86_FARG0 = fr0;
+ rX86_FARG1 = fr1;
+ rX86_FARG2 = fr2;
+ rX86_FARG3 = fr3;
+ rX86_FARG4 = fr4;
+ rX86_FARG5 = fr5;
+ rX86_FARG6 = fr6;
+ rX86_FARG7 = fr7;
} else {
rs_rX86_SP = rs_rX86_SP_32;
@@ -700,23 +760,32 @@
rs_rX86_ARG1 = rs_rCX;
rs_rX86_ARG2 = rs_rDX;
rs_rX86_ARG3 = rs_rBX;
+ rs_rX86_ARG4 = RegStorage::InvalidReg();
+ rs_rX86_ARG5 = RegStorage::InvalidReg();
+ rs_rX86_FARG0 = rs_rAX;
+ rs_rX86_FARG1 = rs_rCX;
+ rs_rX86_FARG2 = rs_rDX;
+ rs_rX86_FARG3 = rs_rBX;
+ rs_rX86_FARG4 = RegStorage::InvalidReg();
+ rs_rX86_FARG5 = RegStorage::InvalidReg();
+ rs_rX86_FARG6 = RegStorage::InvalidReg();
+ rs_rX86_FARG7 = RegStorage::InvalidReg();
rX86_ARG0 = rAX;
rX86_ARG1 = rCX;
rX86_ARG2 = rDX;
rX86_ARG3 = rBX;
+ rX86_FARG0 = rAX;
+ rX86_FARG1 = rCX;
+ rX86_FARG2 = rDX;
+ rX86_FARG3 = rBX;
+ // TODO(64): Initialize with invalid reg
+// rX86_ARG4 = RegStorage::InvalidReg();
+// rX86_ARG5 = RegStorage::InvalidReg();
}
- rs_rX86_FARG0 = rs_rAX;
- rs_rX86_FARG1 = rs_rCX;
- rs_rX86_FARG2 = rs_rDX;
- rs_rX86_FARG3 = rs_rBX;
rs_rX86_RET0 = rs_rAX;
rs_rX86_RET1 = rs_rDX;
rs_rX86_INVOKE_TGT = rs_rAX;
rs_rX86_COUNT = rs_rCX;
- rX86_FARG0 = rAX;
- rX86_FARG1 = rCX;
- rX86_FARG2 = rDX;
- rX86_FARG3 = rBX;
rX86_RET0 = rAX;
rX86_RET1 = rDX;
rX86_INVOKE_TGT = rAX;
@@ -1676,4 +1745,458 @@
return new_value;
}
+// ------------ ABI support: mapping of args to physical registers -------------
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+ const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
+ const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+ const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
+ rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
+ const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+ RegStorage result = RegStorage::InvalidReg();
+ if (is_double_or_float) {
+ if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+ result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+ if (result.Valid()) {
+ result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
+ }
+ }
+ } else {
+ if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+ if (result.Valid()) {
+ result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
+ }
+ }
+ }
+ return result;
+}
+
+RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+ DCHECK(IsInitialized());
+ auto res = mapping_.find(in_position);
+ return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+ DCHECK(mapper != nullptr);
+ max_mapped_in_ = -1;
+ is_there_stack_mapped_ = false;
+ for (int in_position = 0; in_position < count; in_position++) {
+ RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+ if (reg.Valid()) {
+ mapping_[in_position] = reg;
+ max_mapped_in_ = std::max(max_mapped_in_, in_position);
+ if (reg.Is64BitSolo()) {
+ // We covered 2 args, so skip the next one
+ in_position++;
+ }
+ } else {
+ is_there_stack_mapped_ = true;
+ }
+ }
+ initialized_ = true;
+}
+
+RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+ if (!Gen64Bit()) {
+ return GetCoreArgMappingToPhysicalReg(arg_num);
+ }
+
+ if (!in_to_reg_storage_mapping_.IsInitialized()) {
+ int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+ RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+ InToRegStorageX86_64Mapper mapper;
+ in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+ }
+ return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
+ // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+ // Not used for 64-bit, TODO: Move X86_32 to the same framework
+ switch (core_arg_num) {
+ case 0:
+ return rs_rX86_ARG1;
+ case 1:
+ return rs_rX86_ARG2;
+ case 2:
+ return rs_rX86_ARG3;
+ default:
+ return RegStorage::InvalidReg();
+ }
+}
+
+// ---------End of ABI support: mapping of args to physical registers -------------
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame. Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+ if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+ /*
+ * Dummy up a RegLocation for the incoming Method*
+ * It will attempt to keep kArg0 live (or copy it to home location
+ * if promoted).
+ */
+
+ RegLocation rl_src = rl_method;
+ rl_src.location = kLocPhysReg;
+ rl_src.reg = TargetReg(kArg0);
+ rl_src.home = false;
+ MarkLive(rl_src);
+ StoreValue(rl_method, rl_src);
+ // If Method* has been promoted, explicitly flush
+ if (rl_method.location == kLocPhysReg) {
+ StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+ }
+
+ if (cu_->num_ins == 0) {
+ return;
+ }
+
+ int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+ /*
+ * Copy incoming arguments to their proper home locations.
+ * NOTE: an older version of dx had an issue in which
+ * it would reuse static method argument registers.
+ * This could result in the same Dalvik virtual register
+ * being promoted to both core and fp regs. To account for this,
+ * we only copy to the corresponding promoted physical register
+ * if it matches the type of the SSA name for the incoming
+ * argument. It is also possible that long and double arguments
+ * end up half-promoted. In those cases, we must flush the promoted
+ * half to memory as well.
+ */
+ for (int i = 0; i < cu_->num_ins; i++) {
+ PromotionMap* v_map = &promotion_map_[start_vreg + i];
+ RegStorage reg = RegStorage::InvalidReg();
+ // get reg corresponding to input
+ reg = GetArgMappingToPhysicalReg(i);
+
+ if (reg.Valid()) {
+ // If arriving in register
+ bool need_flush = true;
+ RegLocation* t_loc = &ArgLocs[i];
+ if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+ need_flush = false;
+ } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+ need_flush = false;
+ } else {
+ need_flush = true;
+ }
+
+ // For wide args, force flush if not fully promoted
+ if (t_loc->wide) {
+ PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+ // Is only half promoted?
+ need_flush |= (p_map->core_location != v_map->core_location) ||
+ (p_map->fp_location != v_map->fp_location);
+ }
+ if (need_flush) {
+ if (t_loc->wide && t_loc->fp) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+ // Increment i to skip the next one
+ i++;
+ } else if (t_loc->wide && !t_loc->fp) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+ // Increment i to skip the next one
+ i++;
+ } else {
+ Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+ }
+ }
+ } else {
+ // If arriving in frame & promoted
+ if (v_map->core_location == kLocPhysReg) {
+ Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+ }
+ if (v_map->fp_location == kLocPhysReg) {
+ Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+ }
+ }
+ }
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3. On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer. Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+ int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx, uintptr_t direct_code,
+ uintptr_t direct_method, InvokeType type, bool skip_this) {
+ if (!Gen64Bit()) {
+ return Mir2Lir::GenDalvikArgsNoRange(info,
+ call_state, pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code,
+ direct_method, type, skip_this);
+ }
+ return GenDalvikArgsRange(info,
+ call_state, pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code,
+ direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo). Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying. This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ * If < 20 arguments
+ * Pass args 3-18 using vldm/vstm block copy
+ * Pass arg0, arg1 & arg2 in kArg1-kArg3
+ * If 20+ arguments
+ * Pass args arg19+ using memcpy block copy
+ * Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+ LIR** pcrLabel, NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type, bool skip_this) {
+ if (!Gen64Bit()) {
+ return Mir2Lir::GenDalvikArgsRange(info, call_state,
+ pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code, direct_method,
+ type, skip_this);
+ }
+
+ /* If no arguments, just return */
+ if (info->num_arg_words == 0)
+ return call_state;
+
+ const int start_index = skip_this ? 1 : 0;
+
+ InToRegStorageX86_64Mapper mapper;
+ InToRegStorageMapping in_to_reg_storage_mapping;
+ in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+ const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+ const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+ in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+ int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+ // Fisrt of all, check whether it make sense to use bulk copying
+ // Optimization is aplicable only for range case
+ // TODO: make a constant instead of 2
+ if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+ // Scan the rest of the args - if in phys_reg flush to memory
+ for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+ RegLocation loc = info->args[next_arg];
+ if (loc.wide) {
+ loc = UpdateLocWide(loc);
+ if (loc.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+ }
+ next_arg += 2;
+ } else {
+ loc = UpdateLoc(loc);
+ if (loc.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+ }
+ next_arg++;
+ }
+ }
+
+ // Logic below assumes that Method pointer is at offset zero from SP.
+ DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+ // The rest can be copied together
+ int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+ int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+
+ int current_src_offset = start_offset;
+ int current_dest_offset = outs_offset;
+
+ while (regs_left_to_pass_via_stack > 0) {
+ // This is based on the knowledge that the stack itself is 16-byte aligned.
+ bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+ bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+ size_t bytes_to_move;
+
+ /*
+ * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+ * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+ * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+ * We do this because we could potentially do a smaller move to align.
+ */
+ if (regs_left_to_pass_via_stack == 4 ||
+ (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+ // Moving 128-bits via xmm register.
+ bytes_to_move = sizeof(uint32_t) * 4;
+
+ // Allocate a free xmm temp. Since we are working through the calling sequence,
+ // we expect to have an xmm temporary available. AllocTempDouble will abort if
+ // there are no free registers.
+ RegStorage temp = AllocTempDouble();
+
+ LIR* ld1 = nullptr;
+ LIR* ld2 = nullptr;
+ LIR* st1 = nullptr;
+ LIR* st2 = nullptr;
+
+ /*
+ * The logic is similar for both loads and stores. If we have 16-byte alignment,
+ * do an aligned move. If we have 8-byte alignment, then do the move in two
+ * parts. This approach prevents possible cache line splits. Finally, fall back
+ * to doing an unaligned move. In most cases we likely won't split the cache
+ * line but we cannot prove it and thus take a conservative approach.
+ */
+ bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+ bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+ if (src_is_16b_aligned) {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+ } else if (src_is_8b_aligned) {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+ ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+ kMovHi128FP);
+ } else {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+ }
+
+ if (dest_is_16b_aligned) {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+ } else if (dest_is_8b_aligned) {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+ st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+ temp, kMovHi128FP);
+ } else {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+ }
+
+ // TODO If we could keep track of aliasing information for memory accesses that are wider
+ // than 64-bit, we wouldn't need to set up a barrier.
+ if (ld1 != nullptr) {
+ if (ld2 != nullptr) {
+ // For 64-bit load we can actually set up the aliasing information.
+ AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+ AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+ } else {
+ // Set barrier for 128-bit load.
+ SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+ ld1->u.m.def_mask = ENCODE_ALL;
+ }
+ }
+ if (st1 != nullptr) {
+ if (st2 != nullptr) {
+ // For 64-bit store we can actually set up the aliasing information.
+ AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+ AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+ } else {
+ // Set barrier for 128-bit store.
+ SetMemRefType(st1, false /* is_load */, kDalvikReg);
+ st1->u.m.def_mask = ENCODE_ALL;
+ }
+ }
+
+ // Free the temporary used for the data movement.
+ FreeTemp(temp);
+ } else {
+ // Moving 32-bits via general purpose register.
+ bytes_to_move = sizeof(uint32_t);
+
+ // Instead of allocating a new temp, simply reuse one of the registers being used
+ // for argument passing.
+ RegStorage temp = TargetReg(kArg3);
+
+ // Now load the argument VR and store to the outs.
+ Load32Disp(TargetReg(kSp), current_src_offset, temp);
+ Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+ }
+
+ current_src_offset += bytes_to_move;
+ current_dest_offset += bytes_to_move;
+ regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+ }
+ DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+ }
+
+ // Now handle rest not registers if they are
+ if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+ RegStorage regSingle = TargetReg(kArg2);
+ RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+ for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+ RegLocation rl_arg = info->args[i];
+ rl_arg = UpdateRawLoc(rl_arg);
+ RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ if (!reg.Valid()) {
+ int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+ if (rl_arg.wide) {
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+ } else {
+ LoadValueDirectWideFixed(rl_arg, regWide);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+ }
+ i++;
+ } else {
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+ } else {
+ LoadValueDirectFixed(rl_arg, regSingle);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+ }
+ }
+ call_state = next_call_insn(cu_, info, call_state, target_method,
+ vtable_idx, direct_code, direct_method, type);
+ }
+ }
+ }
+
+ // Finish with mapped registers
+ for (int i = start_index; i <= last_mapped_in; i++) {
+ RegLocation rl_arg = info->args[i];
+ rl_arg = UpdateRawLoc(rl_arg);
+ RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ if (reg.Valid()) {
+ if (rl_arg.wide) {
+ LoadValueDirectWideFixed(rl_arg, reg);
+ i++;
+ } else {
+ LoadValueDirectFixed(rl_arg, reg);
+ }
+ call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+ direct_code, direct_method, type);
+ }
+ }
+
+ call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+ direct_code, direct_method, type);
+ if (pcrLabel) {
+ if (Runtime::Current()->ExplicitNullChecks()) {
+ *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+ } else {
+ *pcrLabel = nullptr;
+ // In lieu of generating a check for kArg1 being null, we need to
+ // perform a load when doing implicit checks.
+ RegStorage tmp = AllocTemp();
+ Load32Disp(TargetReg(kArg1), 0, tmp);
+ MarkPossibleNullPointerException(info->opt_flags);
+ FreeTemp(tmp);
+ }
+ }
+ return call_state;
+}
+
} // namespace art
+
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index bb8df89..f290548 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -334,10 +334,18 @@
extern X86NativeRegisterPool rX86_ARG1;
extern X86NativeRegisterPool rX86_ARG2;
extern X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+extern X86NativeRegisterPool rX86_ARG4;
+extern X86NativeRegisterPool rX86_ARG5;
+#endif
extern X86NativeRegisterPool rX86_FARG0;
extern X86NativeRegisterPool rX86_FARG1;
extern X86NativeRegisterPool rX86_FARG2;
extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_FARG4;
+extern X86NativeRegisterPool rX86_FARG5;
+extern X86NativeRegisterPool rX86_FARG6;
+extern X86NativeRegisterPool rX86_FARG7;
extern X86NativeRegisterPool rX86_RET0;
extern X86NativeRegisterPool rX86_RET1;
extern X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -347,10 +355,16 @@
extern RegStorage rs_rX86_ARG1;
extern RegStorage rs_rX86_ARG2;
extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_ARG4;
+extern RegStorage rs_rX86_ARG5;
extern RegStorage rs_rX86_FARG0;
extern RegStorage rs_rX86_FARG1;
extern RegStorage rs_rX86_FARG2;
extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_FARG4;
+extern RegStorage rs_rX86_FARG5;
+extern RegStorage rs_rX86_FARG6;
+extern RegStorage rs_rX86_FARG7;
extern RegStorage rs_rX86_RET0;
extern RegStorage rs_rX86_RET1;
extern RegStorage rs_rX86_INVOKE_TGT;