x86_64: Hard Float ABI support in QCG
This patch shows our efforts on resolving the ART limitations:
- passing "float"/"double" arguments via FPR
- passing "long" arguments via single GPR, not pair
- passing more than 3 agruments via GPR.
Work done:
- Extended SpecialTargetRegister enum with kARG4, kARG5, fARG4..fARG7.
- Created initial LoadArgRegs/GenDalvikX/FlushIns version in X86Mir2Lir.
- Unlimited number of long/double/float arguments support
- Refactored (v2)
Change-Id: I5deadd320b4341d5b2f50ba6fa4a98031abc3902
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
Signed-off-by: Dmitry Petrochenko <dmitry.petrochenko@intel.com>
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb48cc3..f0b4787 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -48,10 +48,16 @@
kArg1,
kArg2,
kArg3,
+ kArg4,
+ kArg5,
kFArg0,
kFArg1,
kFArg2,
kFArg3,
+ kFArg4,
+ kFArg5,
+ kFArg6,
+ kFArg7,
kRet0,
kRet1,
kInvokeTgt,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5b9c763..547c0f6 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -707,7 +707,7 @@
// which has problems with long, float, double
constexpr char arm64_supported_types[] = "ZBSCILVJFD";
// (x84_64) We still have troubles with compiling longs/doubles/floats
-constexpr char x86_64_supported_types[] = "ZBSCILV";
+constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
// TODO: Remove this when we are able to compile everything.
static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
@@ -718,7 +718,7 @@
// 1 is for the return type. Currently, we only accept 2 parameters at the most.
// (x86_64): For now we have the same limitation. But we might want to split this
// check in future into two separate cases for arm64 and x86_64.
- if (shorty_size > (1 + 2)) {
+ if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
return false;
}
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index bd9c8b4..3b30cde 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -113,6 +113,7 @@
case kHiddenArg: res_reg = rs_r12; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index b287399..ce95286 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -127,6 +127,7 @@
case kHiddenArg: res_reg = rs_x12; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = RegStorage::InvalidReg(); break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 842533b..ee68fe2 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -290,26 +290,51 @@
}
LoadValueDirectWideFixed(arg1, r_tmp);
} else {
- RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+ RegStorage r_tmp;
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+ }
LoadValueDirectWideFixed(arg1, r_tmp);
}
}
} else {
RegStorage r_tmp;
if (arg0.fp) {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+ }
} else {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+ }
}
LoadValueDirectWideFixed(arg0, r_tmp);
if (arg1.wide == 0) {
- LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+ if (cu_->instruction_set == kX86_64) {
+ LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+ } else {
+ LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+ }
} else {
RegStorage r_tmp;
if (arg1.fp) {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+ }
} else {
- r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+ if (cu_->instruction_set == kX86_64) {
+ r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+ } else {
+ r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+ }
}
LoadValueDirectWideFixed(arg1, r_tmp);
}
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c1a7c99..381c7ce 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -98,6 +98,7 @@
case kHiddenArg: res_reg = rs_rT0; break;
case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
case kCount: res_reg = rs_rMIPS_COUNT; break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f12b6f..a85be5e 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -68,20 +68,51 @@
// TODO: needs revisit for 64-bit.
RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
- RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
- RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
- RegStorage::InvalidReg();
-
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+ if (cu_->instruction_set == kX86) {
/*
* When doing a call for x86, it moves the stack pointer in order to push return.
* Thus, we add another 4 bytes to figure out the out of caller (in of callee).
- * TODO: This needs revisited for 64-bit.
*/
offset += sizeof(uint32_t);
}
+ if (cu_->instruction_set == kX86_64) {
+ /*
+ * When doing a call for x86, it moves the stack pointer in order to push return.
+ * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+ */
+ offset += sizeof(uint64_t);
+ }
+
+ if (cu_->instruction_set == kX86_64) {
+ RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+ if (!reg_arg.Valid()) {
+ RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+ LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
+ return new_reg;
+ } else {
+ // Check if we need to copy the arg to a different reg_class.
+ if (!RegClassMatches(reg_class, reg_arg)) {
+ if (wide) {
+ RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+ OpRegCopyWide(new_reg, reg_arg);
+ reg_arg = new_reg;
+ } else {
+ RegStorage new_reg = AllocTypedTemp(false, reg_class);
+ OpRegCopy(new_reg, reg_arg);
+ reg_arg = new_reg;
+ }
+ }
+ }
+ return reg_arg;
+ }
+
+ RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+ RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
+ RegStorage::InvalidReg();
+
// If the VR is wide and there is no register for high part, we need to load it.
if (wide && !reg_arg_high.Valid()) {
// If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
@@ -129,15 +160,22 @@
void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
- if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+ if (cu_->instruction_set == kX86) {
/*
* When doing a call for x86, it moves the stack pointer in order to push return.
* Thus, we add another 4 bytes to figure out the out of caller (in of callee).
- * TODO: This needs revisited for 64-bit.
*/
offset += sizeof(uint32_t);
}
+ if (cu_->instruction_set == kX86_64) {
+ /*
+ * When doing a call for x86, it moves the stack pointer in order to push return.
+ * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+ */
+ offset += sizeof(uint64_t);
+ }
+
if (!rl_dest.wide) {
RegStorage reg = GetArgMappingToPhysicalReg(in_position);
if (reg.Valid()) {
@@ -146,6 +184,16 @@
Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
}
} else {
+ if (cu_->instruction_set == kX86_64) {
+ RegStorage reg = GetArgMappingToPhysicalReg(in_position);
+ if (reg.Valid()) {
+ OpRegCopy(rl_dest.reg, reg);
+ } else {
+ LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
+ }
+ return;
+ }
+
RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ed94a8d..9718acd 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -910,13 +910,13 @@
void GenInvoke(CallInfo* info);
void GenInvokeNoInline(CallInfo* info);
virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
- int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
NextCallInsn next_call_insn,
const MethodReference& target_method,
uint32_t vtable_idx,
uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
bool skip_this);
- int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
NextCallInsn next_call_insn,
const MethodReference& target_method,
uint32_t vtable_idx,
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 648c148..8113f8e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -20,9 +20,43 @@
#include "dex/compiler_internals.h"
#include "x86_lir.h"
+#include <map>
+
namespace art {
class X86Mir2Lir : public Mir2Lir {
+ protected:
+ class InToRegStorageMapper {
+ public:
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+ virtual ~InToRegStorageMapper() {}
+ };
+
+ class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+ public:
+ InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+ virtual ~InToRegStorageX86_64Mapper() {}
+ virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+ private:
+ int cur_core_reg_;
+ int cur_fp_reg_;
+ };
+
+ class InToRegStorageMapping {
+ public:
+ InToRegStorageMapping() : initialized_(false) {}
+ void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+ int GetMaxMappedIn() { return max_mapped_in_; }
+ bool IsThereStackMapped() { return is_there_stack_mapped_; }
+ RegStorage Get(int in_position);
+ bool IsInitialized() { return initialized_; }
+ private:
+ std::map<int, RegStorage> mapping_;
+ int max_mapped_in_;
+ bool is_there_stack_mapped_;
+ bool initialized_;
+ };
+
public:
X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
@@ -56,6 +90,7 @@
// Required for target - register utilities.
RegStorage TargetReg(SpecialTargetRegister reg);
RegStorage GetArgMappingToPhysicalReg(int arg_num);
+ RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
RegLocation GetReturnAlt();
RegLocation GetReturnWideAlt();
RegLocation LocCReturn();
@@ -306,6 +341,22 @@
*/
void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
+ void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+
+ int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx,
+ uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+ bool skip_this);
+
+ int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+ NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx,
+ uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+ bool skip_this);
+
/*
* @brief Generate a relative call to the method that will be patched at link time.
* @param target_method The MethodReference of the method to be invoked.
@@ -794,6 +845,8 @@
* @param mir A kMirOpConst128b MIR instruction to match.
*/
LIR *AddVectorLiteral(MIR *mir);
+
+ InToRegStorageMapping in_to_reg_storage_mapping_;
};
} // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4d8fd1b..160ec62 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,10 +132,18 @@
X86NativeRegisterPool rX86_ARG1;
X86NativeRegisterPool rX86_ARG2;
X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+X86NativeRegisterPool rX86_ARG4;
+X86NativeRegisterPool rX86_ARG5;
+#endif
X86NativeRegisterPool rX86_FARG0;
X86NativeRegisterPool rX86_FARG1;
X86NativeRegisterPool rX86_FARG2;
X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_FARG4;
+X86NativeRegisterPool rX86_FARG5;
+X86NativeRegisterPool rX86_FARG6;
+X86NativeRegisterPool rX86_FARG7;
X86NativeRegisterPool rX86_RET0;
X86NativeRegisterPool rX86_RET1;
X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -145,10 +153,16 @@
RegStorage rs_rX86_ARG1;
RegStorage rs_rX86_ARG2;
RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_ARG4;
+RegStorage rs_rX86_ARG5;
RegStorage rs_rX86_FARG0;
RegStorage rs_rX86_FARG1;
RegStorage rs_rX86_FARG2;
RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_FARG4;
+RegStorage rs_rX86_FARG5;
+RegStorage rs_rX86_FARG6;
+RegStorage rs_rX86_FARG7;
RegStorage rs_rX86_RET0;
RegStorage rs_rX86_RET1;
RegStorage rs_rX86_INVOKE_TGT;
@@ -188,35 +202,27 @@
case kArg1: res_reg = rs_rX86_ARG1; break;
case kArg2: res_reg = rs_rX86_ARG2; break;
case kArg3: res_reg = rs_rX86_ARG3; break;
+ case kArg4: res_reg = rs_rX86_ARG4; break;
+ case kArg5: res_reg = rs_rX86_ARG5; break;
case kFArg0: res_reg = rs_rX86_FARG0; break;
case kFArg1: res_reg = rs_rX86_FARG1; break;
case kFArg2: res_reg = rs_rX86_FARG2; break;
case kFArg3: res_reg = rs_rX86_FARG3; break;
+ case kFArg4: res_reg = rs_rX86_FARG4; break;
+ case kFArg5: res_reg = rs_rX86_FARG5; break;
+ case kFArg6: res_reg = rs_rX86_FARG6; break;
+ case kFArg7: res_reg = rs_rX86_FARG7; break;
case kRet0: res_reg = rs_rX86_RET0; break;
case kRet1: res_reg = rs_rX86_RET1; break;
case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
case kHiddenArg: res_reg = rs_rAX; break;
case kHiddenFpArg: res_reg = rs_fr0; break;
case kCount: res_reg = rs_rX86_COUNT; break;
+ default: res_reg = RegStorage::InvalidReg();
}
return res_reg;
}
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
- // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
- // TODO: This is not 64-bit compliant and depends on new internal ABI.
- switch (arg_num) {
- case 0:
- return rs_rX86_ARG1;
- case 1:
- return rs_rX86_ARG2;
- case 2:
- return rs_rX86_ARG3;
- default:
- return RegStorage::InvalidReg();
- }
-}
-
/*
* Decode the register id.
*/
@@ -482,6 +488,20 @@
LockTemp(rs_rX86_ARG1);
LockTemp(rs_rX86_ARG2);
LockTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+ if (Gen64Bit()) {
+ LockTemp(rs_rX86_ARG4);
+ LockTemp(rs_rX86_ARG5);
+ LockTemp(rs_rX86_FARG0);
+ LockTemp(rs_rX86_FARG1);
+ LockTemp(rs_rX86_FARG2);
+ LockTemp(rs_rX86_FARG3);
+ LockTemp(rs_rX86_FARG4);
+ LockTemp(rs_rX86_FARG5);
+ LockTemp(rs_rX86_FARG6);
+ LockTemp(rs_rX86_FARG7);
+ }
+#endif
}
/* To be used when explicitly managing register use */
@@ -490,6 +510,20 @@
FreeTemp(rs_rX86_ARG1);
FreeTemp(rs_rX86_ARG2);
FreeTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+ if (Gen64Bit()) {
+ FreeTemp(rs_rX86_ARG4);
+ FreeTemp(rs_rX86_ARG5);
+ FreeTemp(rs_rX86_FARG0);
+ FreeTemp(rs_rX86_FARG1);
+ FreeTemp(rs_rX86_FARG2);
+ FreeTemp(rs_rX86_FARG3);
+ FreeTemp(rs_rX86_FARG4);
+ FreeTemp(rs_rX86_FARG5);
+ FreeTemp(rs_rX86_FARG6);
+ FreeTemp(rs_rX86_FARG7);
+ }
+#endif
}
bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
@@ -688,11 +722,37 @@
rs_rX86_ARG1 = rs_rSI;
rs_rX86_ARG2 = rs_rDX;
rs_rX86_ARG3 = rs_rCX;
+#ifdef TARGET_REX_SUPPORT
+ rs_rX86_ARG4 = rs_r8;
+ rs_rX86_ARG5 = rs_r9;
+#else
+ rs_rX86_ARG4 = RegStorage::InvalidReg();
+ rs_rX86_ARG5 = RegStorage::InvalidReg();
+#endif
+ rs_rX86_FARG0 = rs_fr0;
+ rs_rX86_FARG1 = rs_fr1;
+ rs_rX86_FARG2 = rs_fr2;
+ rs_rX86_FARG3 = rs_fr3;
+ rs_rX86_FARG4 = rs_fr4;
+ rs_rX86_FARG5 = rs_fr5;
+ rs_rX86_FARG6 = rs_fr6;
+ rs_rX86_FARG7 = rs_fr7;
rX86_ARG0 = rDI;
rX86_ARG1 = rSI;
rX86_ARG2 = rDX;
rX86_ARG3 = rCX;
- // TODO: ARG4(r8), ARG5(r9), floating point args.
+#ifdef TARGET_REX_SUPPORT
+ rX86_ARG4 = r8;
+ rX86_ARG5 = r9;
+#endif
+ rX86_FARG0 = fr0;
+ rX86_FARG1 = fr1;
+ rX86_FARG2 = fr2;
+ rX86_FARG3 = fr3;
+ rX86_FARG4 = fr4;
+ rX86_FARG5 = fr5;
+ rX86_FARG6 = fr6;
+ rX86_FARG7 = fr7;
} else {
rs_rX86_SP = rs_rX86_SP_32;
@@ -700,23 +760,32 @@
rs_rX86_ARG1 = rs_rCX;
rs_rX86_ARG2 = rs_rDX;
rs_rX86_ARG3 = rs_rBX;
+ rs_rX86_ARG4 = RegStorage::InvalidReg();
+ rs_rX86_ARG5 = RegStorage::InvalidReg();
+ rs_rX86_FARG0 = rs_rAX;
+ rs_rX86_FARG1 = rs_rCX;
+ rs_rX86_FARG2 = rs_rDX;
+ rs_rX86_FARG3 = rs_rBX;
+ rs_rX86_FARG4 = RegStorage::InvalidReg();
+ rs_rX86_FARG5 = RegStorage::InvalidReg();
+ rs_rX86_FARG6 = RegStorage::InvalidReg();
+ rs_rX86_FARG7 = RegStorage::InvalidReg();
rX86_ARG0 = rAX;
rX86_ARG1 = rCX;
rX86_ARG2 = rDX;
rX86_ARG3 = rBX;
+ rX86_FARG0 = rAX;
+ rX86_FARG1 = rCX;
+ rX86_FARG2 = rDX;
+ rX86_FARG3 = rBX;
+ // TODO(64): Initialize with invalid reg
+// rX86_ARG4 = RegStorage::InvalidReg();
+// rX86_ARG5 = RegStorage::InvalidReg();
}
- rs_rX86_FARG0 = rs_rAX;
- rs_rX86_FARG1 = rs_rCX;
- rs_rX86_FARG2 = rs_rDX;
- rs_rX86_FARG3 = rs_rBX;
rs_rX86_RET0 = rs_rAX;
rs_rX86_RET1 = rs_rDX;
rs_rX86_INVOKE_TGT = rs_rAX;
rs_rX86_COUNT = rs_rCX;
- rX86_FARG0 = rAX;
- rX86_FARG1 = rCX;
- rX86_FARG2 = rDX;
- rX86_FARG3 = rBX;
rX86_RET0 = rAX;
rX86_RET1 = rDX;
rX86_INVOKE_TGT = rAX;
@@ -1676,4 +1745,458 @@
return new_value;
}
+// ------------ ABI support: mapping of args to physical registers -------------
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+ const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
+ const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+ const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
+ rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
+ const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+ RegStorage result = RegStorage::InvalidReg();
+ if (is_double_or_float) {
+ if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+ result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+ if (result.Valid()) {
+ result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
+ }
+ }
+ } else {
+ if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+ result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+ if (result.Valid()) {
+ result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
+ }
+ }
+ }
+ return result;
+}
+
+RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+ DCHECK(IsInitialized());
+ auto res = mapping_.find(in_position);
+ return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+ DCHECK(mapper != nullptr);
+ max_mapped_in_ = -1;
+ is_there_stack_mapped_ = false;
+ for (int in_position = 0; in_position < count; in_position++) {
+ RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+ if (reg.Valid()) {
+ mapping_[in_position] = reg;
+ max_mapped_in_ = std::max(max_mapped_in_, in_position);
+ if (reg.Is64BitSolo()) {
+ // We covered 2 args, so skip the next one
+ in_position++;
+ }
+ } else {
+ is_there_stack_mapped_ = true;
+ }
+ }
+ initialized_ = true;
+}
+
+RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+ if (!Gen64Bit()) {
+ return GetCoreArgMappingToPhysicalReg(arg_num);
+ }
+
+ if (!in_to_reg_storage_mapping_.IsInitialized()) {
+ int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+ RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+ InToRegStorageX86_64Mapper mapper;
+ in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+ }
+ return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
+ // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+ // Not used for 64-bit, TODO: Move X86_32 to the same framework
+ switch (core_arg_num) {
+ case 0:
+ return rs_rX86_ARG1;
+ case 1:
+ return rs_rX86_ARG2;
+ case 2:
+ return rs_rX86_ARG3;
+ default:
+ return RegStorage::InvalidReg();
+ }
+}
+
+// ---------End of ABI support: mapping of args to physical registers -------------
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame. Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+ if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+ /*
+ * Dummy up a RegLocation for the incoming Method*
+ * It will attempt to keep kArg0 live (or copy it to home location
+ * if promoted).
+ */
+
+ RegLocation rl_src = rl_method;
+ rl_src.location = kLocPhysReg;
+ rl_src.reg = TargetReg(kArg0);
+ rl_src.home = false;
+ MarkLive(rl_src);
+ StoreValue(rl_method, rl_src);
+ // If Method* has been promoted, explicitly flush
+ if (rl_method.location == kLocPhysReg) {
+ StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+ }
+
+ if (cu_->num_ins == 0) {
+ return;
+ }
+
+ int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+ /*
+ * Copy incoming arguments to their proper home locations.
+ * NOTE: an older version of dx had an issue in which
+ * it would reuse static method argument registers.
+ * This could result in the same Dalvik virtual register
+ * being promoted to both core and fp regs. To account for this,
+ * we only copy to the corresponding promoted physical register
+ * if it matches the type of the SSA name for the incoming
+ * argument. It is also possible that long and double arguments
+ * end up half-promoted. In those cases, we must flush the promoted
+ * half to memory as well.
+ */
+ for (int i = 0; i < cu_->num_ins; i++) {
+ PromotionMap* v_map = &promotion_map_[start_vreg + i];
+ RegStorage reg = RegStorage::InvalidReg();
+ // get reg corresponding to input
+ reg = GetArgMappingToPhysicalReg(i);
+
+ if (reg.Valid()) {
+ // If arriving in register
+ bool need_flush = true;
+ RegLocation* t_loc = &ArgLocs[i];
+ if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+ need_flush = false;
+ } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+ OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+ need_flush = false;
+ } else {
+ need_flush = true;
+ }
+
+ // For wide args, force flush if not fully promoted
+ if (t_loc->wide) {
+ PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+ // Is only half promoted?
+ need_flush |= (p_map->core_location != v_map->core_location) ||
+ (p_map->fp_location != v_map->fp_location);
+ }
+ if (need_flush) {
+ if (t_loc->wide && t_loc->fp) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+ // Increment i to skip the next one
+ i++;
+ } else if (t_loc->wide && !t_loc->fp) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+ // Increment i to skip the next one
+ i++;
+ } else {
+ Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+ }
+ }
+ } else {
+ // If arriving in frame & promoted
+ if (v_map->core_location == kLocPhysReg) {
+ Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+ }
+ if (v_map->fp_location == kLocPhysReg) {
+ Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+ }
+ }
+ }
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3. On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer. Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+ int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx, uintptr_t direct_code,
+ uintptr_t direct_method, InvokeType type, bool skip_this) {
+ if (!Gen64Bit()) {
+ return Mir2Lir::GenDalvikArgsNoRange(info,
+ call_state, pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code,
+ direct_method, type, skip_this);
+ }
+ return GenDalvikArgsRange(info,
+ call_state, pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code,
+ direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo). Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying. This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ * If < 20 arguments
+ * Pass args 3-18 using vldm/vstm block copy
+ * Pass arg0, arg1 & arg2 in kArg1-kArg3
+ * If 20+ arguments
+ * Pass args arg19+ using memcpy block copy
+ * Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+ LIR** pcrLabel, NextCallInsn next_call_insn,
+ const MethodReference& target_method,
+ uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+ InvokeType type, bool skip_this) {
+ if (!Gen64Bit()) {
+ return Mir2Lir::GenDalvikArgsRange(info, call_state,
+ pcrLabel, next_call_insn,
+ target_method,
+ vtable_idx, direct_code, direct_method,
+ type, skip_this);
+ }
+
+ /* If no arguments, just return */
+ if (info->num_arg_words == 0)
+ return call_state;
+
+ const int start_index = skip_this ? 1 : 0;
+
+ InToRegStorageX86_64Mapper mapper;
+ InToRegStorageMapping in_to_reg_storage_mapping;
+ in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+ const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+ const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+ in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+ int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+ // Fisrt of all, check whether it make sense to use bulk copying
+ // Optimization is aplicable only for range case
+ // TODO: make a constant instead of 2
+ if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+ // Scan the rest of the args - if in phys_reg flush to memory
+ for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+ RegLocation loc = info->args[next_arg];
+ if (loc.wide) {
+ loc = UpdateLocWide(loc);
+ if (loc.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+ }
+ next_arg += 2;
+ } else {
+ loc = UpdateLoc(loc);
+ if (loc.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+ }
+ next_arg++;
+ }
+ }
+
+ // Logic below assumes that Method pointer is at offset zero from SP.
+ DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+ // The rest can be copied together
+ int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+ int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+
+ int current_src_offset = start_offset;
+ int current_dest_offset = outs_offset;
+
+ while (regs_left_to_pass_via_stack > 0) {
+ // This is based on the knowledge that the stack itself is 16-byte aligned.
+ bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+ bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+ size_t bytes_to_move;
+
+ /*
+ * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+ * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+ * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+ * We do this because we could potentially do a smaller move to align.
+ */
+ if (regs_left_to_pass_via_stack == 4 ||
+ (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+ // Moving 128-bits via xmm register.
+ bytes_to_move = sizeof(uint32_t) * 4;
+
+ // Allocate a free xmm temp. Since we are working through the calling sequence,
+ // we expect to have an xmm temporary available. AllocTempDouble will abort if
+ // there are no free registers.
+ RegStorage temp = AllocTempDouble();
+
+ LIR* ld1 = nullptr;
+ LIR* ld2 = nullptr;
+ LIR* st1 = nullptr;
+ LIR* st2 = nullptr;
+
+ /*
+ * The logic is similar for both loads and stores. If we have 16-byte alignment,
+ * do an aligned move. If we have 8-byte alignment, then do the move in two
+ * parts. This approach prevents possible cache line splits. Finally, fall back
+ * to doing an unaligned move. In most cases we likely won't split the cache
+ * line but we cannot prove it and thus take a conservative approach.
+ */
+ bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+ bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+ if (src_is_16b_aligned) {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+ } else if (src_is_8b_aligned) {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+ ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+ kMovHi128FP);
+ } else {
+ ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+ }
+
+ if (dest_is_16b_aligned) {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+ } else if (dest_is_8b_aligned) {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+ st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+ temp, kMovHi128FP);
+ } else {
+ st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+ }
+
+ // TODO If we could keep track of aliasing information for memory accesses that are wider
+ // than 64-bit, we wouldn't need to set up a barrier.
+ if (ld1 != nullptr) {
+ if (ld2 != nullptr) {
+ // For 64-bit load we can actually set up the aliasing information.
+ AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+ AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+ } else {
+ // Set barrier for 128-bit load.
+ SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+ ld1->u.m.def_mask = ENCODE_ALL;
+ }
+ }
+ if (st1 != nullptr) {
+ if (st2 != nullptr) {
+ // For 64-bit store we can actually set up the aliasing information.
+ AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+ AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+ } else {
+ // Set barrier for 128-bit store.
+ SetMemRefType(st1, false /* is_load */, kDalvikReg);
+ st1->u.m.def_mask = ENCODE_ALL;
+ }
+ }
+
+ // Free the temporary used for the data movement.
+ FreeTemp(temp);
+ } else {
+ // Moving 32-bits via general purpose register.
+ bytes_to_move = sizeof(uint32_t);
+
+ // Instead of allocating a new temp, simply reuse one of the registers being used
+ // for argument passing.
+ RegStorage temp = TargetReg(kArg3);
+
+ // Now load the argument VR and store to the outs.
+ Load32Disp(TargetReg(kSp), current_src_offset, temp);
+ Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+ }
+
+ current_src_offset += bytes_to_move;
+ current_dest_offset += bytes_to_move;
+ regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+ }
+ DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+ }
+
+ // Now handle rest not registers if they are
+ if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+ RegStorage regSingle = TargetReg(kArg2);
+ RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+ for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+ RegLocation rl_arg = info->args[i];
+ rl_arg = UpdateRawLoc(rl_arg);
+ RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ if (!reg.Valid()) {
+ int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+ if (rl_arg.wide) {
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+ } else {
+ LoadValueDirectWideFixed(rl_arg, regWide);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+ }
+ i++;
+ } else {
+ if (rl_arg.location == kLocPhysReg) {
+ StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+ } else {
+ LoadValueDirectFixed(rl_arg, regSingle);
+ StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+ }
+ }
+ call_state = next_call_insn(cu_, info, call_state, target_method,
+ vtable_idx, direct_code, direct_method, type);
+ }
+ }
+ }
+
+ // Finish with mapped registers
+ for (int i = start_index; i <= last_mapped_in; i++) {
+ RegLocation rl_arg = info->args[i];
+ rl_arg = UpdateRawLoc(rl_arg);
+ RegStorage reg = in_to_reg_storage_mapping.Get(i);
+ if (reg.Valid()) {
+ if (rl_arg.wide) {
+ LoadValueDirectWideFixed(rl_arg, reg);
+ i++;
+ } else {
+ LoadValueDirectFixed(rl_arg, reg);
+ }
+ call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+ direct_code, direct_method, type);
+ }
+ }
+
+ call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+ direct_code, direct_method, type);
+ if (pcrLabel) {
+ if (Runtime::Current()->ExplicitNullChecks()) {
+ *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+ } else {
+ *pcrLabel = nullptr;
+ // In lieu of generating a check for kArg1 being null, we need to
+ // perform a load when doing implicit checks.
+ RegStorage tmp = AllocTemp();
+ Load32Disp(TargetReg(kArg1), 0, tmp);
+ MarkPossibleNullPointerException(info->opt_flags);
+ FreeTemp(tmp);
+ }
+ }
+ return call_state;
+}
+
} // namespace art
+
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index bb8df89..f290548 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -334,10 +334,18 @@
extern X86NativeRegisterPool rX86_ARG1;
extern X86NativeRegisterPool rX86_ARG2;
extern X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+extern X86NativeRegisterPool rX86_ARG4;
+extern X86NativeRegisterPool rX86_ARG5;
+#endif
extern X86NativeRegisterPool rX86_FARG0;
extern X86NativeRegisterPool rX86_FARG1;
extern X86NativeRegisterPool rX86_FARG2;
extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_FARG4;
+extern X86NativeRegisterPool rX86_FARG5;
+extern X86NativeRegisterPool rX86_FARG6;
+extern X86NativeRegisterPool rX86_FARG7;
extern X86NativeRegisterPool rX86_RET0;
extern X86NativeRegisterPool rX86_RET1;
extern X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -347,10 +355,16 @@
extern RegStorage rs_rX86_ARG1;
extern RegStorage rs_rX86_ARG2;
extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_ARG4;
+extern RegStorage rs_rX86_ARG5;
extern RegStorage rs_rX86_FARG0;
extern RegStorage rs_rX86_FARG1;
extern RegStorage rs_rX86_FARG2;
extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_FARG4;
+extern RegStorage rs_rX86_FARG5;
+extern RegStorage rs_rX86_FARG6;
+extern RegStorage rs_rX86_FARG7;
extern RegStorage rs_rX86_RET0;
extern RegStorage rs_rX86_RET1;
extern RegStorage rs_rX86_INVOKE_TGT;
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1d524cb..5374f22 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -172,8 +172,12 @@
// | Padding |
// | RDI/Method* | <- sp
static constexpr bool kQuickSoftFloatAbi = false; // This is a hard float ABI.
- static constexpr size_t kNumQuickGprArgs = 5; // 3 arguments passed in GPRs.
- static constexpr size_t kNumQuickFprArgs = 8; // 0 arguments passed in FPRs.
+#ifdef TARGET_REX_SUPPORT
+ static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs.
+#else
+ static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs if r8..r15 not enabled.
+#endif
+ static constexpr size_t kNumQuickFprArgs = 8; // 8 arguments passed in FPRs.
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg.
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg.
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 168; // Offset of return address.
diff --git a/test/700-LoadArgRegs/expected.txt b/test/700-LoadArgRegs/expected.txt
new file mode 100644
index 0000000..4908e5b
--- /dev/null
+++ b/test/700-LoadArgRegs/expected.txt
@@ -0,0 +1,75 @@
+11
+21, 22
+31, 32, 33
+41, 42, 43, 44
+51, 52, 53, 54, 55
+61, 62, 63, 64, 65, 66
+71, 72, 73, 74, 75, 76, 77
+81, 82, 83, 84, 85, 86, 87, 88
+91, 92, 93, 94, 95, 96, 97, 98, 99
+101, 102, 103, 104, 105, 106, 107, 108, 109, 110
+111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111
+121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212
+61, 62, 63, 64, 65, 66
+true
+true, false
+true, false, true
+true, false, true, false
+true, false, true, false, true
+true, false, true, false, true, false
+true, false, true, false, true, false, true
+a
+a, b
+a, b, c
+a, b, c, d
+a, b, c, d, e
+a, b, c, d, e, f
+a, b, c, d, e, f, g
+11
+11, b
+11, b, true
+11, b, true, 12
+11, b, true, 12, e
+11, b, true, 12, e, false
+11, b, true, 12, e, false, 13
+1.1
+2.1, 2.2
+3.1, 3.2, 3.3
+4.1, 4.2, 4.3, 4.4
+5.1, 5.2, 5.3, 5.4, 5.5
+6.1, 6.2, 6.3, 6.4, 6.5, 6.6
+7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7
+8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8
+9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9
+10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 10.1
+1.01
+2.01, 2.02
+3.01, 3.02, 3.03
+4.01, 4.02, 4.03, 4.04
+5.01, 5.02, 5.03, 5.04, 5.05
+6.01, 6.02, 6.03, 6.04, 6.05, 6.06
+7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07
+8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08
+9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+-1.1, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09
+10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01
+100011
+100021, 100022
+100031, 100032, 100033
+100041, 100042, 100043, 100044
+100051, 100052, 100053, 100054, 100055
+100061, 100062, 100063, 100064, 100065, 100066
+100071, 100072, 100073, 100074, 100075, 100076, 100077
+100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088
+100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099
+100100100100011
+-11
+-21, -22
+-31, -32, -33
+-41, -42, -43, -44
+-51, -52, -53, -54, -55
+-61, -62, -63, -64, -65, -66
+-71, -72, -73, -74, -75, -76, -77
+-81, -82, -83, -84, -85, -86, -87, -88
+-91, -92, -93, -94, -95, -96, -97, -98, -99
+-1, -91, -92, -93, -94, -95, -96, -97, -98, -99
diff --git a/test/700-LoadArgRegs/info.txt b/test/700-LoadArgRegs/info.txt
new file mode 100644
index 0000000..dcaa46e
--- /dev/null
+++ b/test/700-LoadArgRegs/info.txt
@@ -0,0 +1 @@
+Simple tests for passing int/float/long/double arguments.
diff --git a/test/700-LoadArgRegs/src/Main.java b/test/700-LoadArgRegs/src/Main.java
new file mode 100644
index 0000000..281ab16
--- /dev/null
+++ b/test/700-LoadArgRegs/src/Main.java
@@ -0,0 +1,288 @@
+public class Main {
+
+ static void testI1(int p1) {
+ System.out.println(p1);
+ }
+ static void testI2(int p1, int p2) {
+ System.out.println(p1+", "+p2);
+ }
+ static void testI3(int p1, int p2, int p3) {
+ System.out.println(p1+", "+p2+", "+p3);
+ }
+ static void testI4(int p1, int p2, int p3, int p4) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4);
+ }
+ static void testI5(int p1, int p2, int p3, int p4, int p5) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+ }
+ static void testI6(int p1, int p2, int p3, int p4, int p5, int p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+ static void testI7(int p1, int p2, int p3, int p4, int p5, int p6, int p7) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+ }
+ static void testI8(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+ }
+ static void testI9(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+ }
+ static void testI10(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+ }
+ static void testI11(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11);
+ }
+ static void testI12(int p1, int p2, int p3, int p4, int p5, int p6, int p7, int p8, int p9, int p10, int p11, int p12) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10+", "+p11+", "+p12);
+ }
+ void testI6_nonstatic(int p1, int p2, int p3, int p4, int p5, int p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+
+ static void testB1(boolean p1) {
+ System.out.println(p1);
+ }
+ static void testB2(boolean p1, boolean p2) {
+ System.out.println(p1+", "+p2);
+ }
+ static void testB3(boolean p1, boolean p2, boolean p3) {
+ System.out.println(p1+", "+p2+", "+p3);
+ }
+ static void testB4(boolean p1, boolean p2, boolean p3, boolean p4) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4);
+ }
+ static void testB5(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+ }
+ static void testB6(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+ static void testB7(boolean p1, boolean p2, boolean p3, boolean p4, boolean p5, boolean p6, boolean p7) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+ }
+
+ static void testO1(Object p1) {
+ System.out.println(p1);
+ }
+ static void testO2(Object p1, Object p2) {
+ System.out.println(p1+", "+p2);
+ }
+ static void testO3(Object p1, Object p2, Object p3) {
+ System.out.println(p1+", "+p2+", "+p3);
+ }
+ static void testO4(Object p1, Object p2, Object p3, Object p4) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4);
+ }
+ static void testO5(Object p1, Object p2, Object p3, Object p4, Object p5) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+ }
+ static void testO6(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+ static void testO7(Object p1, Object p2, Object p3, Object p4, Object p5, Object p6, Object p7) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+ }
+
+ static void testIOB1(int p1) {
+ System.out.println(p1);
+ }
+ static void testIOB2(int p1, Object p2) {
+ System.out.println(p1+", "+p2);
+ }
+ static void testIOB3(int p1, Object p2, boolean p3) {
+ System.out.println(p1+", "+p2+", "+p3);
+ }
+ static void testIOB4(int p1, Object p2, boolean p3, int p4) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4);
+ }
+ static void testIOB5(int p1, Object p2, boolean p3, int p4, Object p5) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+ }
+ static void testIOB6(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+ static void testIOB7(int p1, Object p2, boolean p3, int p4, Object p5, boolean p6, int p7) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+ }
+
+ static void testF1(float p1) {
+ System.out.println(p1);
+ }
+ static void testF2(float p1, float p2) {
+ System.out.println(p1+", "+p2);
+ }
+ static void testF3(float p1, float p2, float p3) {
+ System.out.println(p1+", "+p2+", "+p3);
+ }
+ static void testF4(float p1, float p2, float p3, float p4) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4);
+ }
+ static void testF5(float p1, float p2, float p3, float p4, float p5) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5);
+ }
+ static void testF6(float p1, float p2, float p3, float p4, float p5, float p6) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6);
+ }
+ static void testF7(float p1, float p2, float p3, float p4, float p5, float p6, float p7) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7);
+ }
+ static void testF8(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8);
+ }
+ static void testF9(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9);
+ }
+ static void testF10(float p1, float p2, float p3, float p4, float p5, float p6, float p7, float p8, float p9, float p10) {
+ System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10);
+ }
+
+ static void testD1 (double p1) { System.out.println(p1); }
+ static void testD2 (double p1, double p2) { System.out.println(p1+", "+p2); }
+ static void testD3 (double p1, double p2, double p3) { System.out.println(p1+", "+p2+", "+p3); }
+ static void testD4 (double p1, double p2, double p3, double p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+ static void testD5 (double p1, double p2, double p3, double p4, double p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+ static void testD6 (double p1, double p2, double p3, double p4, double p5, double p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+ static void testD7 (double p1, double p2, double p3, double p4, double p5, double p6, double p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+ static void testD8 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+ static void testD9 (double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+ static void testD9f (float p0, double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+ static void testD10(double p1, double p2, double p3, double p4, double p5, double p6, double p7, double p8, double p9, double p10) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9+", "+p10); }
+
+ static void testI() {
+ testI1(11);
+ testI2(21, 22);
+ testI3(31, 32, 33);
+ testI4(41, 42, 43, 44);
+ testI5(51, 52, 53, 54, 55);
+ testI6(61, 62, 63, 64, 65, 66);
+ testI7(71, 72, 73, 74, 75, 76, 77);
+ testI8(81, 82, 83, 84, 85, 86, 87, 88);
+ testI9(91, 92, 93, 94, 95, 96, 97, 98, 99);
+ testI10(101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ testI11(111, 112, 113, 114, 115, 116, 117, 118, 119, 1110, 1111);
+ testI12(121, 122, 123, 124, 125, 126, 127, 128, 129, 1210, 1211, 1212);
+ new Main().testI6_nonstatic(61, 62, 63, 64, 65, 66);
+ }
+
+ static void testB() {
+ testB1(true);
+ testB2(true, false);
+ testB3(true, false, true);
+ testB4(true, false, true, false);
+ testB5(true, false, true, false, true);
+ testB6(true, false, true, false, true, false);
+ testB7(true, false, true, false, true, false, true);
+ }
+
+ static void testO() {
+ testO1("a");
+ testO2("a", "b");
+ testO3("a", "b", "c");
+ testO4("a", "b", "c", "d");
+ testO5("a", "b", "c", "d", "e");
+ testO6("a", "b", "c", "d", "e", "f");
+ testO7("a", "b", "c", "d", "e", "f", "g");
+ }
+
+ static void testIOB() {
+ testIOB1(11);
+ testIOB2(11, "b");
+ testIOB3(11, "b", true);
+ testIOB4(11, "b", true, 12);
+ testIOB5(11, "b", true, 12, "e");
+ testIOB6(11, "b", true, 12, "e", false);
+ testIOB7(11, "b", true, 12, "e", false, 13);
+ }
+
+ static void testF() {
+ testF1(1.1f);
+ testF2(2.1f, 2.2f);
+ testF3(3.1f, 3.2f, 3.3f);
+ testF4(4.1f, 4.2f, 4.3f, 4.4f);
+ testF5(5.1f, 5.2f, 5.3f, 5.4f, 5.5f);
+ testF6(6.1f, 6.2f, 6.3f, 6.4f, 6.5f, 6.6f);
+ testF7(7.1f, 7.2f, 7.3f, 7.4f, 7.5f, 7.6f, 7.7f);
+ testF8(8.1f, 8.2f, 8.3f, 8.4f, 8.5f, 8.6f, 8.7f, 8.8f);
+ testF9(9.1f, 9.2f, 9.3f, 9.4f, 9.5f, 9.6f, 9.7f, 9.8f, 9.9f);
+ testF10(10.1f, 10.2f, 10.3f, 10.4f, 10.5f, 10.6f, 10.7f, 10.8f, 10.9f, 10.1f);
+ }
+
+ static void testD() {
+
+ testD1(1.01);
+ testD2(2.01, 2.02);
+ testD3(3.01, 3.02, 3.03);
+ testD4(4.01, 4.02, 4.03, 4.04);
+ testD5(5.01, 5.02, 5.03, 5.04, 5.05);
+ testD6(6.01, 6.02, 6.03, 6.04, 6.05, 6.06);
+ testD7(7.01, 7.02, 7.03, 7.04, 7.05, 7.06, 7.07);
+ testD8(8.01, 8.02, 8.03, 8.04, 8.05, 8.06, 8.07, 8.08);
+ testD9(9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+ testD9f(-1.1f, 9.01, 9.02, 9.03, 9.04, 9.05, 9.06, 9.07, 9.08, 9.09);
+
+ // TODO: 10.01 as first arg fails: 10.009994506835938
+ testD10(10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08, 10.09, 10.01);
+ }
+
+ static void testL1(long p1) { System.out.println(p1); }
+// static void testL2x(long p1, long p2) { testL2(p1+p2, p2); } // TODO(64) GenAddLong 64BIT_TEMP
+ static void testL2(long p1, long p2) { System.out.println(p1+", "+p2); }
+ static void testL3(long p1, long p2, long p3) { System.out.println(p1+", "+p2+", "+p3); }
+ static void testL4(long p1, long p2, long p3, long p4) { System.out.println(p1+", "+p2+", "+p3+", "+p4); }
+ static void testL5(long p1, long p2, long p3, long p4, long p5) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5); }
+ static void testL6(long p1, long p2, long p3, long p4, long p5, long p6) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6); }
+ static void testL7(long p1, long p2, long p3, long p4, long p5, long p6, long p7) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7); }
+ static void testL8(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8); }
+ static void testL9(long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+ static void testL9i(int p0, long p1, long p2, long p3, long p4, long p5, long p6, long p7, long p8, long p9) { System.out.println(p0+", "+p1+", "+p2+", "+p3+", "+p4+", "+p5+", "+p6+", "+p7+", "+p8+", "+p9); }
+
+ static void testL() {
+// testL2x(100021, 100022);
+ testL1(100011);
+ testL2(100021, 100022);
+ testL3(100031, 100032, 100033);
+ testL4(100041, 100042, 100043, 100044);
+ testL5(100051, 100052, 100053, 100054, 100055);
+ testL6(100061, 100062, 100063, 100064, 100065, 100066);
+ testL7(100071, 100072, 100073, 100074, 100075, 100076, 100077);
+ testL8(100081, 100082, 100083, 100084, 100085, 100086, 100087, 100088);
+ testL9(100091, 100092, 100093, 100094, 100095, 100096, 100097, 100098, 100099);
+ }
+
+ static void testLL() {
+ testL1(100100100100011L);
+
+ testL1(-11L);
+ testL2(-21L, -22L);
+ testL3(-31L, -32L, -33L);
+ testL4(-41L, -42L, -43L, -44L);
+ testL5(-51L, -52L, -53L, -54L, -55L);
+ testL6(-61L, -62L, -63L, -64L, -65L, -66L);
+ testL7(-71L, -72L, -73L, -74L, -75L, -76L, -77L);
+ testL8(-81L, -82L, -83L, -84L, -85L, -86L, -87L, -88L);
+ testL9(-91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+ testL9i(-1, -91L, -92L, -93L, -94L, -95L, -96L, -97L, -98L, -99L);
+
+ // TODO(64) GenAddLong 64BIT_TEMP
+// testL2x(100100100100011L, 1L);
+// testL2x(100100100100011L, 100100100100011L);
+ }
+
+ static public void main(String[] args) throws Exception {
+
+ testI();
+ testB();
+ testO();
+ testIOB();
+ testF();
+
+ testD();
+
+ testL();
+
+ testLL();
+
+ }
+}