diff options
| author | 2014-12-08 18:38:42 +0000 | |
|---|---|---|
| committer | 2014-12-08 18:38:43 +0000 | |
| commit | 6c964c98400b8c0949d5e369968da2d4809b772f (patch) | |
| tree | 82c1893c0dbbd5a9b849b9c236fc775b4d20f3cc | |
| parent | c4925d4c02dc8f8d51cb2653b5e7a99f6c9fd7d7 (diff) | |
| parent | 717a3e447c6f7a922cf9c3efe522747a187a045d (diff) | |
Merge "Re-factor Quick ABI support"
| -rw-r--r-- | compiler/dex/mir_graph.cc | 6 | ||||
| -rw-r--r-- | compiler/dex/mir_graph.h | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/arm/codegen_arm.h | 74 | ||||
| -rw-r--r-- | compiler/dex/quick/arm/target_arm.cc | 293 | ||||
| -rw-r--r-- | compiler/dex/quick/arm64/codegen_arm64.h | 57 | ||||
| -rw-r--r-- | compiler/dex/quick/arm64/target_arm64.cc | 403 | ||||
| -rw-r--r-- | compiler/dex/quick/codegen_util.cc | 3 | ||||
| -rwxr-xr-x | compiler/dex/quick/gen_invoke.cc | 605 | ||||
| -rw-r--r-- | compiler/dex/quick/mips/codegen_mips.h | 21 | ||||
| -rw-r--r-- | compiler/dex/quick/mips/target_mips.cc | 26 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir-inl.h | 18 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 187 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 86 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 79 | ||||
| -rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 566 | ||||
| -rw-r--r-- | test/800-smali/expected.txt | 1 | ||||
| -rw-r--r-- | test/800-smali/smali/FloatIntConstPassing.smali | 29 | ||||
| -rw-r--r-- | test/800-smali/src/Main.java | 1 |
18 files changed, 679 insertions, 1777 deletions
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 023abca64e..6b4d737316 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -1590,6 +1590,12 @@ const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) { return cu_->dex_file->GetShorty(method_id.proto_idx_); } +const char* MIRGraph::GetShortyFromMethodReference(const MethodReference& target_method) { + const DexFile::MethodId& method_id = + target_method.dex_file->GetMethodId(target_method.dex_method_index); + return target_method.dex_file->GetShorty(method_id.proto_idx_); +} + /* Debug Utility - dump a compilation unit */ void MIRGraph::DumpMIRGraph() { const char* block_type_names[] = { diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 1a1884131a..da0dd88e84 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -1113,6 +1113,7 @@ class MIRGraph { std::string GetSSANameWithConst(int ssa_reg, bool singles_only); void GetBlockName(BasicBlock* bb, char* name); const char* GetShortyFromTargetIdx(int); + const char* GetShortyFromMethodReference(const MethodReference& target_method); void DumpMIRGraph(); CallInfo* NewMemCallInfo(BasicBlock* bb, MIR* mir, InvokeType type, bool is_range); BasicBlock* NewMemBB(BBType block_type, int block_id); diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 0bc4c3b7bf..0ae7ee3560 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -26,16 +26,6 @@ namespace art { class ArmMir2Lir FINAL : public Mir2Lir { protected: - // TODO: Consolidate hard float target support. - // InToRegStorageMapper and InToRegStorageMapping can be shared with all backends. - // Base class used to get RegStorage for next argument. - class InToRegStorageMapper { - public: - virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0; - virtual ~InToRegStorageMapper() { - } - }; - // Inherited class for ARM backend. class InToRegStorageArmMapper FINAL : public InToRegStorageMapper { public: @@ -43,46 +33,26 @@ class ArmMir2Lir FINAL : public Mir2Lir { : cur_core_reg_(0), cur_fp_reg_(0), cur_fp_double_reg_(0) { } - virtual ~InToRegStorageArmMapper() { - } - - RegStorage GetNextReg(bool is_double_or_float, bool is_wide) OVERRIDE; + RegStorage GetNextReg(ShortyArg arg) OVERRIDE; - private: - uint32_t cur_core_reg_; - uint32_t cur_fp_reg_; - uint32_t cur_fp_double_reg_; - }; - - // Class to map argument to RegStorage. The mapping object is initialized by a mapper. - class InToRegStorageMapping FINAL { - public: - InToRegStorageMapping() - : max_mapped_in_(0), is_there_stack_mapped_(false), initialized_(false) { + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + cur_fp_reg_ = 0; + cur_fp_double_reg_ = 0; } - int GetMaxMappedIn() const { - return max_mapped_in_; - } - - bool IsThereStackMapped() const { - return is_there_stack_mapped_; - } - - bool IsInitialized() const { - return initialized_; - } - - void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); - RegStorage Get(int in_position) const; - private: - std::map<int, RegStorage> mapping_; - int max_mapped_in_; - bool is_there_stack_mapped_; - bool initialized_; + size_t cur_core_reg_; + size_t cur_fp_reg_; + size_t cur_fp_double_reg_; }; + InToRegStorageArmMapper in_to_reg_storage_arm_mapper_; + InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE { + in_to_reg_storage_arm_mapper_.Reset(); + return &in_to_reg_storage_arm_mapper_; + } + public: ArmMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -127,7 +97,6 @@ class ArmMir2Lir FINAL : public Mir2Lir { } } - RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE; RegLocation GetReturnAlt() OVERRIDE; RegLocation GetReturnWideAlt() OVERRIDE; RegLocation LocCReturn() OVERRIDE; @@ -290,19 +259,6 @@ class ArmMir2Lir FINAL : public Mir2Lir { LIR* InvokeTrampoline(OpKind op, RegStorage r_tgt, QuickEntrypointEnum trampoline) OVERRIDE; size_t GetInstructionOffset(LIR* lir); - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; - private: void GenNegLong(RegLocation rl_dest, RegLocation rl_src); void GenMulLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, @@ -361,7 +317,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegStorage::FloatSolo32(reg_num * 2 + 1)); } - InToRegStorageMapping in_to_reg_storage_mapping_; + int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; }; } // namespace art diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 0e8f64556d..7190a49c26 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -896,7 +896,7 @@ void ArmMir2Lir::InstallLiteralPools() { Mir2Lir::InstallLiteralPools(); } -RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_float, bool is_wide) { +RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(ShortyArg arg) { const RegStorage coreArgMappingToPhysicalReg[] = {rs_r1, rs_r2, rs_r3}; const int coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); @@ -906,28 +906,18 @@ RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_flo constexpr uint32_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg); static_assert(fpArgMappingToPhysicalRegSize % 2 == 0, "Number of FP Arg regs is not even"); - if (kArm32QuickCodeUseSoftFloat) { - is_double_or_float = false; // Regard double as long, float as int. - is_wide = false; // Map long separately. - } - RegStorage result = RegStorage::InvalidReg(); - if (is_double_or_float) { - // TODO: Remove "cur_fp_double_reg_ % 2 != 0" when we return double as double. - if (is_wide || cur_fp_double_reg_ % 2 != 0) { + // Regard double as long, float as int for kArm32QuickCodeUseSoftFloat. + if (arg.IsFP() && !kArm32QuickCodeUseSoftFloat) { + if (arg.IsWide()) { cur_fp_double_reg_ = std::max(cur_fp_double_reg_, RoundUp(cur_fp_reg_, 2)); if (cur_fp_double_reg_ < fpArgMappingToPhysicalRegSize) { - // TODO: Replace by following code in the branch when FlushIns() support 64-bit registers. - // result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_], - // fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]); - // result = As64BitFloatReg(result); - // cur_fp_double_reg_ += 2; - result = fpArgMappingToPhysicalReg[cur_fp_double_reg_]; - cur_fp_double_reg_++; + result = RegStorage::MakeRegPair(fpArgMappingToPhysicalReg[cur_fp_double_reg_], + fpArgMappingToPhysicalReg[cur_fp_double_reg_ + 1]); + result = As64BitFloatReg(result); + cur_fp_double_reg_ += 2; } } else { - // TODO: Remove the check when we return double as double. - DCHECK_EQ(cur_fp_double_reg_ % 2, 0U); if (cur_fp_reg_ % 2 == 0) { cur_fp_reg_ = std::max(cur_fp_double_reg_, cur_fp_reg_); } @@ -939,270 +929,23 @@ RegStorage ArmMir2Lir::InToRegStorageArmMapper::GetNextReg(bool is_double_or_flo } else { if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { result = coreArgMappingToPhysicalReg[cur_core_reg_++]; - // TODO: Enable following code when FlushIns() support 64-bit registers. - // if (is_wide && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - // result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]); - // } + if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = RegStorage::MakeRegPair(result, coreArgMappingToPhysicalReg[cur_core_reg_++]); + } } } return result; } -RegStorage ArmMir2Lir::InToRegStorageMapping::Get(int in_position) const { - DCHECK(IsInitialized()); - auto res = mapping_.find(in_position); - return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); -} - -void ArmMir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, - InToRegStorageMapper* mapper) { - DCHECK(mapper != nullptr); - max_mapped_in_ = -1; - is_there_stack_mapped_ = false; - for (int in_position = 0; in_position < count; in_position++) { - RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, - arg_locs[in_position].wide); - if (reg.Valid()) { - mapping_[in_position] = reg; - // TODO: Enable the following code when FlushIns() support 64-bit argument registers. - // if (arg_locs[in_position].wide) { - // if (reg.Is32Bit()) { - // // As it is a split long, the hi-part is on stack. - // is_there_stack_mapped_ = true; - // } - // // We covered 2 v-registers, so skip the next one - // in_position++; - // } - max_mapped_in_ = std::max(max_mapped_in_, in_position); - } else { - is_there_stack_mapped_ = true; - } - } - initialized_ = true; -} - -// TODO: Should be able to return long, double registers. -// Need check some common code as it will break some assumption. -RegStorage ArmMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - if (!in_to_reg_storage_mapping_.IsInitialized()) { - int start_vreg = mir_graph_->GetFirstInVR(); - RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; - - InToRegStorageArmMapper mapper; - in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper); - } - return in_to_reg_storage_mapping_.Get(arg_num); -} - -int ArmMir2Lir::GenDalvikArgsNoRange(CallInfo* info, - int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { +int ArmMir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) { if (kArm32QuickCodeUseSoftFloat) { - return Mir2Lir::GenDalvikArgsNoRange(info, call_state, pcrLabel, next_call_insn, target_method, - vtable_idx, direct_code, direct_method, type, skip_this); - } else { - return GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, vtable_idx, - direct_code, direct_method, type, skip_this); - } -} - -int ArmMir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, - LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - if (kArm32QuickCodeUseSoftFloat) { - return Mir2Lir::GenDalvikArgsRange(info, call_state, pcrLabel, next_call_insn, target_method, - vtable_idx, direct_code, direct_method, type, skip_this); - } - - // TODO: Rework the implementation when argument register can be long or double. - - /* If no arguments, just return */ - if (info->num_arg_words == 0) { - return call_state; - } - - const int start_index = skip_this ? 1 : 0; - - InToRegStorageArmMapper mapper; - InToRegStorageMapping in_to_reg_storage_mapping; - in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); - const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); - int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1); - - // First of all, check whether it makes sense to use bulk copying. - // Bulk copying is done only for the range case. - // TODO: make a constant instead of 2 - if (info->is_range && regs_left_to_pass_via_stack >= 2) { - // Scan the rest of the args - if in phys_reg flush to memory - for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) { - RegLocation loc = info->args[next_arg]; - if (loc.wide) { - // TODO: Only flush hi-part. - if (loc.high_word) { - loc = info->args[--next_arg]; - } - loc = UpdateLocWide(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); - } - next_arg += 2; - } else { - loc = UpdateLoc(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (loc.ref) { - StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile); - } else { - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, - kNotVolatile); - } - } - next_arg++; - } - } - - // The rest can be copied together - int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low); - int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1, - cu_->instruction_set); - - int current_src_offset = start_offset; - int current_dest_offset = outs_offset; - - // Only davik regs are accessed in this loop; no next_call_insn() calls. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - while (regs_left_to_pass_via_stack > 0) { - /* - * TODO: Improve by adding block copy for large number of arguments. This - * should be done, if possible, as a target-depending helper. For now, just - * copy a Dalvik vreg at a time. - */ - // Moving 32-bits via general purpose register. - size_t bytes_to_move = sizeof(uint32_t); - - // Instead of allocating a new temp, simply reuse one of the registers being used - // for argument passing. - RegStorage temp = TargetReg(kArg3, kNotWide); - - // Now load the argument VR and store to the outs. - Load32Disp(TargetPtrReg(kSp), current_src_offset, temp); - Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp); - - current_src_offset += bytes_to_move; - current_dest_offset += bytes_to_move; - regs_left_to_pass_via_stack -= (bytes_to_move >> 2); - } - DCHECK_EQ(regs_left_to_pass_via_stack, 0); - } - - // Now handle rest not registers if they are - if (in_to_reg_storage_mapping.IsThereStackMapped()) { - RegStorage regWide = TargetReg(kArg2, kWide); - for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - // TODO: Only pass split wide hi-part via stack. - if (!reg.Valid() || rl_arg.wide) { - int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (rl_arg.wide) { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); - } else { - LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile); - } - } else { - if (rl_arg.location == kLocPhysReg) { - if (rl_arg.ref) { - StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile); - } else { - StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); - } - } else { - if (rl_arg.ref) { - RegStorage regSingle = TargetReg(kArg2, kRef); - LoadValueDirectFixed(rl_arg, regSingle); - StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile); - } else { - RegStorage regSingle = TargetReg(kArg2, kNotWide); - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile); - } - } - } - } - - call_state = next_call_insn(cu_, info, call_state, target_method, - vtable_idx, direct_code, direct_method, type); - } - if (rl_arg.wide) { - i++; - } - } - } - - // Finish with mapped registers - for (int i = start_index; i <= last_mapped_in; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - if (reg.Valid()) { - if (reg.Is64Bit()) { - LoadValueDirectWideFixed(rl_arg, reg); - } else { - // TODO: Only split long should be the case we need to care about. - if (rl_arg.wide) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - int high_word = rl_arg.high_word ? 1 : 0; - rl_arg = high_word ? info->args[i - 1] : rl_arg; - if (rl_arg.location == kLocPhysReg) { - RegStorage rs_arg = rl_arg.reg; - if (rs_arg.IsDouble() && rs_arg.Is64BitSolo()) { - rs_arg = As64BitFloatRegPair(rs_arg); - } - RegStorage rs_arg_low = rs_arg.GetLow(); - RegStorage rs_arg_high = rs_arg.GetHigh(); - OpRegCopy(reg, high_word ? rs_arg_high : rs_arg_low); - } else { - Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + high_word), reg); - } - } else { - LoadValueDirectFixed(rl_arg, reg); - } - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } - if (reg.Is64Bit()) { - i++; - } - } - - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - if (pcrLabel) { - if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) { - *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); - } else { - *pcrLabel = nullptr; - // In lieu of generating a check for kArg1 being null, we need to - // perform a load when doing implicit checks. - RegStorage tmp = AllocTemp(); - Load32Disp(TargetReg(kArg1, kRef), 0, tmp); - MarkPossibleNullPointerException(info->opt_flags); - FreeTemp(tmp); - } + return Mir2Lir::GenDalvikArgsBulkCopy(info, first, count); } - return call_state; + /* + * TODO: Improve by adding block copy for large number of arguments. For now, just + * copy a Dalvik vreg at a time. + */ + return count; } } // namespace art diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 5e10f80fa5..766ac23ef9 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -27,38 +27,25 @@ namespace art { class Arm64Mir2Lir FINAL : public Mir2Lir { protected: - // TODO: consolidate 64-bit target support. - class InToRegStorageMapper { - public: - virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0; - virtual ~InToRegStorageMapper() {} - }; - class InToRegStorageArm64Mapper : public InToRegStorageMapper { public: InToRegStorageArm64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {} virtual ~InToRegStorageArm64Mapper() {} - virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref); + virtual RegStorage GetNextReg(ShortyArg arg); + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + cur_fp_reg_ = 0; + } private: - int cur_core_reg_; - int cur_fp_reg_; + size_t cur_core_reg_; + size_t cur_fp_reg_; }; - class InToRegStorageMapping { - public: - InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false), - initialized_(false) {} - void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); - int GetMaxMappedIn() { return max_mapped_in_; } - bool IsThereStackMapped() { return is_there_stack_mapped_; } - RegStorage Get(int in_position); - bool IsInitialized() { return initialized_; } - private: - std::map<int, RegStorage> mapping_; - int max_mapped_in_; - bool is_there_stack_mapped_; - bool initialized_; - }; + InToRegStorageArm64Mapper in_to_reg_storage_arm64_mapper_; + InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE { + in_to_reg_storage_arm64_mapper_.Reset(); + return &in_to_reg_storage_arm64_mapper_; + } public: Arm64Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -113,7 +100,6 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { RegStorage TargetPtrReg(SpecialTargetRegister symbolic_reg) OVERRIDE { return As64BitReg(TargetReg(symbolic_reg)); } - RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE; RegLocation GetReturnAlt() OVERRIDE; RegLocation GetReturnWideAlt() OVERRIDE; RegLocation LocCReturn() OVERRIDE; @@ -240,22 +226,6 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { bool InexpensiveConstantLong(int64_t value) OVERRIDE; bool InexpensiveConstantDouble(int64_t value) OVERRIDE; - void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; - - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; - - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; - bool WideGPRsAreAliases() const OVERRIDE { return true; // 64b architecture. } @@ -422,10 +392,11 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { void GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags); - InToRegStorageMapping in_to_reg_storage_mapping_; static const A64EncodingMap EncodingMap[kA64Last]; ArenaVector<LIR*> call_method_insns_; + + int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; }; } // namespace art diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 094ff51eee..e7fa8ed475 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -790,27 +790,23 @@ const char* Arm64Mir2Lir::GetTargetInstFmt(int opcode) { return Arm64Mir2Lir::EncodingMap[UNWIDE(opcode)].fmt; } -RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or_float, - bool is_wide, - bool is_ref) { +RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(ShortyArg arg) { const RegStorage coreArgMappingToPhysicalReg[] = {rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7}; - const int coreArgMappingToPhysicalRegSize = - sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage); + const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); const RegStorage fpArgMappingToPhysicalReg[] = {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7}; - const int fpArgMappingToPhysicalRegSize = - sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage); + const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg); RegStorage result = RegStorage::InvalidReg(); - if (is_double_or_float) { + if (arg.IsFP()) { if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { - DCHECK(!is_ref); + DCHECK(!arg.IsRef()); result = fpArgMappingToPhysicalReg[cur_fp_reg_++]; if (result.Valid()) { // TODO: switching between widths remains a bit ugly. Better way? int res_reg = result.GetReg(); - result = is_wide ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg); + result = arg.IsWide() ? RegStorage::FloatSolo64(res_reg) : RegStorage::FloatSolo32(res_reg); } } } else { @@ -819,388 +815,15 @@ RegStorage Arm64Mir2Lir::InToRegStorageArm64Mapper::GetNextReg(bool is_double_or if (result.Valid()) { // TODO: switching between widths remains a bit ugly. Better way? int res_reg = result.GetReg(); - DCHECK(!(is_wide && is_ref)); - result = (is_wide || is_ref) ? RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg); + DCHECK(!(arg.IsWide() && arg.IsRef())); + result = (arg.IsWide() || arg.IsRef()) ? + RegStorage::Solo64(res_reg) : RegStorage::Solo32(res_reg); } } } return result; } -RegStorage Arm64Mir2Lir::InToRegStorageMapping::Get(int in_position) { - DCHECK(IsInitialized()); - auto res = mapping_.find(in_position); - return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); -} - -void Arm64Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, - InToRegStorageMapper* mapper) { - DCHECK(mapper != nullptr); - max_mapped_in_ = -1; - is_there_stack_mapped_ = false; - for (int in_position = 0; in_position < count; in_position++) { - RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, - arg_locs[in_position].wide, - arg_locs[in_position].ref); - if (reg.Valid()) { - mapping_[in_position] = reg; - if (arg_locs[in_position].wide) { - // We covered 2 args, so skip the next one - in_position++; - } - max_mapped_in_ = std::max(max_mapped_in_, in_position); - } else { - is_there_stack_mapped_ = true; - } - } - initialized_ = true; -} - - -// Deprecate. Use the new mechanism. -// TODO(Arm64): reuse info in QuickArgumentVisitor? -static RegStorage GetArgPhysicalReg(RegLocation* loc, int* num_gpr_used, int* num_fpr_used, - OpSize* op_size) { - if (loc->fp) { - int n = *num_fpr_used; - if (n < 8) { - *num_fpr_used = n + 1; - RegStorage::RegStorageKind reg_kind; - if (loc->wide) { - *op_size = kDouble; - reg_kind = RegStorage::k64BitSolo; - } else { - *op_size = kSingle; - reg_kind = RegStorage::k32BitSolo; - } - return RegStorage(RegStorage::kValid | reg_kind | RegStorage::kFloatingPoint | n); - } - } else { - int n = *num_gpr_used; - if (n < 8) { - *num_gpr_used = n + 1; - if (loc->wide || loc->ref) { - *op_size = k64; - return RegStorage::Solo64(n); - } else { - *op_size = k32; - return RegStorage::Solo32(n); - } - } - } - *op_size = kWord; - return RegStorage::InvalidReg(); -} - -RegStorage Arm64Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - if (!in_to_reg_storage_mapping_.IsInitialized()) { - int start_vreg = mir_graph_->GetFirstInVR(); - RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; - - InToRegStorageArm64Mapper mapper; - in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper); - } - return in_to_reg_storage_mapping_.Get(arg_num); -} - - -/* - * If there are any ins passed in registers that have not been promoted - * to a callee-save register, flush them to the frame. Perform initial - * assignment of promoted arguments. - * - * ArgLocs is an array of location records describing the incoming arguments - * with one location record per word of argument. - */ -void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { - int num_gpr_used = 1; - int num_fpr_used = 0; - - /* - * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod> - * It will attempt to keep kArg0 live (or copy it to home location - * if promoted). - */ - RegLocation rl_src = rl_method; - rl_src.location = kLocPhysReg; - rl_src.reg = TargetReg(kArg0, kRef); - rl_src.home = false; - MarkLive(rl_src); - StoreValue(rl_method, rl_src); - // If Method* has been promoted, explicitly flush - if (rl_method.location == kLocPhysReg) { - StoreRefDisp(TargetPtrReg(kSp), 0, rl_src.reg, kNotVolatile); - } - - if (mir_graph_->GetNumOfInVRs() == 0) { - return; - } - - // Handle dalvik registers. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - int start_vreg = mir_graph_->GetFirstInVR(); - for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) { - RegLocation* t_loc = &ArgLocs[i]; - OpSize op_size; - RegStorage reg = GetArgPhysicalReg(t_loc, &num_gpr_used, &num_fpr_used, &op_size); - - if (reg.Valid()) { - // If arriving in register. - - // We have already updated the arg location with promoted info - // so we can be based on it. - if (t_loc->location == kLocPhysReg) { - // Just copy it. - OpRegCopy(t_loc->reg, reg); - } else { - // Needs flush. - if (t_loc->ref) { - StoreRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, kNotVolatile); - } else { - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32, - kNotVolatile); - } - } - } else { - // If arriving in frame & promoted. - if (t_loc->location == kLocPhysReg) { - if (t_loc->ref) { - LoadRefDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile); - } else { - LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), t_loc->reg, - t_loc->wide ? k64 : k32, kNotVolatile); - } - } - } - if (t_loc->wide) { - // Increment i to skip the next one. - i++; - } - // if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { - // OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); - // } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { - // OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg); - // } else { - // StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, op_size, kNotVolatile); - // if (reg.Is64Bit()) { - // if (SRegOffset(start_vreg + i) + 4 != SRegOffset(start_vreg + i + 1)) { - // LOG(FATAL) << "64 bit value stored in non-consecutive 4 bytes slots"; - // } - // i += 1; - // } - // } - // } else { - // // If arriving in frame & promoted - // if (v_map->core_location == kLocPhysReg) { - // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), - // RegStorage::Solo32(v_map->core_reg)); - // } - // if (v_map->fp_location == kLocPhysReg) { - // LoadWordDisp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->fp_reg)); - // } - } -} - -/* - * Load up to 5 arguments, the first three of which will be in - * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, - * and as part of the load sequence, it must be replaced with - * the target method pointer. - */ -int Arm64Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, - int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - return GenDalvikArgsRange(info, - call_state, pcrLabel, next_call_insn, - target_method, - vtable_idx, direct_code, - direct_method, type, skip_this); -} - -/* - * May have 0+ arguments (also used for jumbo). Note that - * source virtual registers may be in physical registers, so may - * need to be flushed to home location before copying. This - * applies to arg3 and above (see below). - * - * FIXME: update comments. - * - * Two general strategies: - * If < 20 arguments - * Pass args 3-18 using vldm/vstm block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * If 20+ arguments - * Pass args arg19+ using memcpy block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * - */ -int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, - LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - /* If no arguments, just return */ - if (info->num_arg_words == 0) - return call_state; - - const int start_index = skip_this ? 1 : 0; - - InToRegStorageArm64Mapper mapper; - InToRegStorageMapping in_to_reg_storage_mapping; - in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); - const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); - int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1); - - // First of all, check whether it makes sense to use bulk copying. - // Bulk copying is done only for the range case. - // TODO: make a constant instead of 2 - if (info->is_range && regs_left_to_pass_via_stack >= 2) { - // Scan the rest of the args - if in phys_reg flush to memory - for (int next_arg = last_mapped_in + 1; next_arg < info->num_arg_words;) { - RegLocation loc = info->args[next_arg]; - if (loc.wide) { - loc = UpdateLocWide(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); - } - next_arg += 2; - } else { - loc = UpdateLoc(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (loc.ref) { - StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile); - } else { - StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, - kNotVolatile); - } - } - next_arg++; - } - } - - // The rest can be copied together - int start_offset = SRegOffset(info->args[last_mapped_in + 1].s_reg_low); - int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + 1, - cu_->instruction_set); - - int current_src_offset = start_offset; - int current_dest_offset = outs_offset; - - // Only davik regs are accessed in this loop; no next_call_insn() calls. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - while (regs_left_to_pass_via_stack > 0) { - /* - * TODO: Improve by adding block copy for large number of arguments. This - * should be done, if possible, as a target-depending helper. For now, just - * copy a Dalvik vreg at a time. - */ - // Moving 32-bits via general purpose register. - size_t bytes_to_move = sizeof(uint32_t); - - // Instead of allocating a new temp, simply reuse one of the registers being used - // for argument passing. - RegStorage temp = TargetReg(kArg3, kNotWide); - - // Now load the argument VR and store to the outs. - Load32Disp(TargetPtrReg(kSp), current_src_offset, temp); - Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp); - - current_src_offset += bytes_to_move; - current_dest_offset += bytes_to_move; - regs_left_to_pass_via_stack -= (bytes_to_move >> 2); - } - DCHECK_EQ(regs_left_to_pass_via_stack, 0); - } - - // Now handle rest not registers if they are - if (in_to_reg_storage_mapping.IsThereStackMapped()) { - RegStorage regWide = TargetReg(kArg3, kWide); - for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - if (!reg.Valid()) { - int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (rl_arg.wide) { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); - } else { - LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile); - } - } else { - if (rl_arg.location == kLocPhysReg) { - if (rl_arg.ref) { - StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile); - } else { - StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); - } - } else { - if (rl_arg.ref) { - RegStorage regSingle = TargetReg(kArg2, kRef); - LoadValueDirectFixed(rl_arg, regSingle); - StoreRefDisp(TargetPtrReg(kSp), out_offset, regSingle, kNotVolatile); - } else { - RegStorage regSingle = TargetReg(kArg2, kNotWide); - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile); - } - } - } - } - call_state = next_call_insn(cu_, info, call_state, target_method, - vtable_idx, direct_code, direct_method, type); - } - if (rl_arg.wide) { - i++; - } - } - } - - // Finish with mapped registers - for (int i = start_index; i <= last_mapped_in; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - if (reg.Valid()) { - if (rl_arg.wide) { - LoadValueDirectWideFixed(rl_arg, reg); - } else { - LoadValueDirectFixed(rl_arg, reg); - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } - if (rl_arg.wide) { - i++; - } - } - - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - if (pcrLabel) { - if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) { - *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); - } else { - *pcrLabel = nullptr; - // In lieu of generating a check for kArg1 being null, we need to - // perform a load when doing implicit checks. - RegStorage tmp = AllocTemp(); - Load32Disp(TargetReg(kArg1, kRef), 0, tmp); - MarkPossibleNullPointerException(info->opt_flags); - FreeTemp(tmp); - } - } - return call_state; -} - void Arm64Mir2Lir::InstallLiteralPools() { // PC-relative calls to methods. patches_.reserve(call_method_insns_.size()); @@ -1218,4 +841,12 @@ void Arm64Mir2Lir::InstallLiteralPools() { Mir2Lir::InstallLiteralPools(); } +int Arm64Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* /*info*/, int /*first*/, int count) { + /* + * TODO: Improve by adding block copy for large number of arguments. For now, just + * copy a Dalvik vreg at a time. + */ + return count; +} + } // namespace art diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 066041c6ad..cc61e93d82 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -997,7 +997,8 @@ Mir2Lir::Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena last_lir_insn_(nullptr), slow_paths_(arena->Adapter(kArenaAllocSlowPaths)), mem_ref_type_(ResourceMask::kHeapRef), - mask_cache_(arena) { + mask_cache_(arena), + in_to_reg_storage_mapping_(arena) { switch_tables_.reserve(4); fill_array_data_.reserve(4); tempreg_info_.reserve(20); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 31b81bfb92..9462d3d08f 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -401,59 +401,50 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { * half to memory as well. */ ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) { - PromotionMap* v_map = &promotion_map_[start_vreg + i]; + RegLocation* t_loc = nullptr; + for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i += t_loc->wide ? 2 : 1) { + // get reg corresponding to input RegStorage reg = GetArgMappingToPhysicalReg(i); + t_loc = &ArgLocs[i]; + + // If the wide input appeared as single, flush it and go + // as it comes from memory. + if (t_loc->wide && reg.Valid() && !reg.Is64Bit()) { + StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg, k32, kNotVolatile); + reg = RegStorage::InvalidReg(); + } if (reg.Valid()) { - // If arriving in register - bool need_flush = true; - RegLocation* t_loc = &ArgLocs[i]; - if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) { - OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg); - need_flush = false; - } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) { - OpRegCopy(RegStorage::Solo32(v_map->fp_reg), reg); - need_flush = false; + // If arriving in register. + + // We have already updated the arg location with promoted info + // so we can be based on it. + if (t_loc->location == kLocPhysReg) { + // Just copy it. + if (t_loc->wide) { + OpRegCopyWide(t_loc->reg, reg); + } else { + OpRegCopy(t_loc->reg, reg); + } } else { - need_flush = true; - } - - // For wide args, force flush if not fully promoted - if (t_loc->wide) { - PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1); - // Is only half promoted? - need_flush |= (p_map->core_location != v_map->core_location) || - (p_map->fp_location != v_map->fp_location); - if ((cu_->instruction_set == kThumb2) && t_loc->fp && !need_flush) { - /* - * In Arm, a double is represented as a pair of consecutive single float - * registers starting at an even number. It's possible that both Dalvik vRegs - * representing the incoming double were independently promoted as singles - but - * not in a form usable as a double. If so, we need to flush - even though the - * incoming arg appears fully in register. At this point in the code, both - * halves of the double are promoted. Make sure they are in a usable form. - */ - int lowreg_index = start_vreg + i + (t_loc->high_word ? -1 : 0); - int low_reg = promotion_map_[lowreg_index].fp_reg; - int high_reg = promotion_map_[lowreg_index + 1].fp_reg; - if (((low_reg & 0x1) != 0) || (high_reg != (low_reg + 1))) { - need_flush = true; - } + // Needs flush. + int offset = SRegOffset(start_vreg + i); + if (t_loc->ref) { + StoreRefDisp(TargetPtrReg(kSp), offset, reg, kNotVolatile); + } else { + StoreBaseDisp(TargetPtrReg(kSp), offset, reg, t_loc->wide ? k64 : k32, kNotVolatile); } } - if (need_flush) { - Store32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), reg); - } } else { - // If arriving in frame & promoted - if (v_map->core_location == kLocPhysReg) { - Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), - RegStorage::Solo32(v_map->core_reg)); - } - if (v_map->fp_location == kLocPhysReg) { - Load32Disp(TargetPtrReg(kSp), SRegOffset(start_vreg + i), - RegStorage::Solo32(v_map->fp_reg)); + // If arriving in frame & promoted. + if (t_loc->location == kLocPhysReg) { + int offset = SRegOffset(start_vreg + i); + if (t_loc->ref) { + LoadRefDisp(TargetPtrReg(kSp), offset, t_loc->reg, kNotVolatile); + } else { + LoadBaseDisp(TargetPtrReg(kSp), offset, t_loc->reg, t_loc->wide ? k64 : k32, + kNotVolatile); + } } } } @@ -568,7 +559,7 @@ static int NextSDCallInsn(CompilationUnit* cu, CallInfo* info, * emit the next instruction in a virtual invoke sequence. * We can use kLr as a temp prior to target address loading * Note also that we'll load the first argument ("this") into - * kArg1 here rather than the standard LoadArgRegs. + * kArg1 here rather than the standard GenDalvikArgs. */ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, int state, const MethodReference& target_method, @@ -612,7 +603,7 @@ static int NextVCallInsn(CompilationUnit* cu, CallInfo* info, * Emit the next instruction in an invoke interface sequence. This will do a lookup in the * class's IMT, calling either the actual method or art_quick_imt_conflict_trampoline if * more than one interface method map to the same index. Note also that we'll load the first - * argument ("this") into kArg1 here rather than the standard LoadArgRegs. + * argument ("this") into kArg1 here rather than the standard GenDalvikArgs. */ static int NextInterfaceCallInsn(CompilationUnit* cu, CallInfo* info, int state, const MethodReference& target_method, @@ -719,158 +710,6 @@ static int NextInterfaceCallInsnWithAccessCheck(CompilationUnit* cu, target_method, 0); } -int Mir2Lir::LoadArgRegs(CallInfo* info, int call_state, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - int last_arg_reg = 3 - 1; - int arg_regs[3] = {TargetReg(kArg1, kNotWide).GetReg(), TargetReg(kArg2, kNotWide).GetReg(), - TargetReg(kArg3, kNotWide).GetReg()}; - - int next_reg = 0; - int next_arg = 0; - if (skip_this) { - next_reg++; - next_arg++; - } - for (; (next_reg <= last_arg_reg) && (next_arg < info->num_arg_words); next_reg++) { - RegLocation rl_arg = info->args[next_arg++]; - rl_arg = UpdateRawLoc(rl_arg); - if (rl_arg.wide && (next_reg <= last_arg_reg - 1)) { - RegStorage r_tmp(RegStorage::k64BitPair, arg_regs[next_reg], arg_regs[next_reg + 1]); - LoadValueDirectWideFixed(rl_arg, r_tmp); - next_reg++; - next_arg++; - } else { - if (rl_arg.wide) { - rl_arg = NarrowRegLoc(rl_arg); - rl_arg.is_const = false; - } - LoadValueDirectFixed(rl_arg, RegStorage::Solo32(arg_regs[next_reg])); - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } - return call_state; -} - -/* - * Load up to 5 arguments, the first three of which will be in - * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, - * and as part of the load sequence, it must be replaced with - * the target method pointer. Note, this may also be called - * for "range" variants if the number of arguments is 5 or fewer. - */ -int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, - int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - RegLocation rl_arg; - - /* If no arguments, just return */ - if (info->num_arg_words == 0) - return call_state; - - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - - DCHECK_LE(info->num_arg_words, 5); - if (info->num_arg_words > 3) { - int32_t next_use = 3; - // Detect special case of wide arg spanning arg3/arg4 - RegLocation rl_use0 = info->args[0]; - RegLocation rl_use1 = info->args[1]; - RegLocation rl_use2 = info->args[2]; - if (((!rl_use0.wide && !rl_use1.wide) || rl_use0.wide) && rl_use2.wide) { - RegStorage reg; - // Wide spans, we need the 2nd half of uses[2]. - rl_arg = UpdateLocWide(rl_use2); - if (rl_arg.location == kLocPhysReg) { - if (rl_arg.reg.IsPair()) { - reg = rl_arg.reg.GetHigh(); - } else { - RegisterInfo* reg_info = GetRegInfo(rl_arg.reg); - reg_info = reg_info->FindMatchingView(RegisterInfo::kHighSingleStorageMask); - if (reg_info == nullptr) { - // NOTE: For hard float convention we won't split arguments across reg/mem. - UNIMPLEMENTED(FATAL) << "Needs hard float api."; - } - reg = reg_info->GetReg(); - } - } else { - // kArg2 & rArg3 can safely be used here - reg = TargetReg(kArg3, kNotWide); - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - Load32Disp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low) + 4, reg); - } - call_state = next_call_insn(cu_, info, call_state, target_method, - vtable_idx, direct_code, direct_method, type); - } - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - Store32Disp(TargetPtrReg(kSp), (next_use + 1) * 4, reg); - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - next_use++; - } - // Loop through the rest - while (next_use < info->num_arg_words) { - RegStorage arg_reg; - rl_arg = info->args[next_use]; - rl_arg = UpdateRawLoc(rl_arg); - if (rl_arg.location == kLocPhysReg) { - arg_reg = rl_arg.reg; - } else { - arg_reg = TargetReg(kArg2, rl_arg.wide ? kWide : kNotWide); - if (rl_arg.wide) { - LoadValueDirectWideFixed(rl_arg, arg_reg); - } else { - LoadValueDirectFixed(rl_arg, arg_reg); - } - call_state = next_call_insn(cu_, info, call_state, target_method, - vtable_idx, direct_code, direct_method, type); - } - int outs_offset = (next_use + 1) * 4; - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (rl_arg.wide) { - StoreBaseDisp(TargetPtrReg(kSp), outs_offset, arg_reg, k64, kNotVolatile); - next_use += 2; - } else { - Store32Disp(TargetPtrReg(kSp), outs_offset, arg_reg); - next_use++; - } - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } - } - - call_state = LoadArgRegs(info, call_state, next_call_insn, - target_method, vtable_idx, direct_code, direct_method, - type, skip_this); - - if (pcrLabel) { - if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) { - *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); - } else { - *pcrLabel = nullptr; - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && - (info->opt_flags & MIR_IGNORE_NULL_CHECK)) { - return call_state; - } - // In lieu of generating a check for kArg1 being null, we need to - // perform a load when doing implicit checks. - GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); - } - } - return call_state; -} - // Default implementation of implicit null pointer check. // Overridden by arch specific as necessary. void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) { @@ -883,210 +722,195 @@ void Mir2Lir::GenImplicitNullCheck(RegStorage reg, int opt_flags) { FreeTemp(tmp); } - -/* - * May have 0+ arguments (also used for jumbo). Note that - * source virtual registers may be in physical registers, so may - * need to be flushed to home location before copying. This - * applies to arg3 and above (see below). - * - * Two general strategies: - * If < 20 arguments - * Pass args 3-18 using vldm/vstm block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * If 20+ arguments - * Pass args arg19+ using memcpy block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * +/** + * @brief Used to flush promoted registers if they are used as argument + * in an invocation. + * @param info the infromation about arguments for invocation. + * @param start the first argument we should start to look from. */ -int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, - LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, - InvokeType type, bool skip_this) { - // If we can treat it as non-range (Jumbo ops will use range form) - if (info->num_arg_words <= 5) - return GenDalvikArgsNoRange(info, call_state, pcrLabel, - next_call_insn, target_method, vtable_idx, - direct_code, direct_method, type, skip_this); - /* - * First load the non-register arguments. Both forms expect all - * of the source arguments to be in their home frame location, so - * scan the s_reg names and flush any that have been promoted to - * frame backing storage. - */ +void Mir2Lir::GenDalvikArgsFlushPromoted(CallInfo* info, int start) { + if (cu_->disable_opt & (1 << kPromoteRegs)) { + // This make sense only if promotion is enabled. + return; + } + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); // Scan the rest of the args - if in phys_reg flush to memory - for (int next_arg = 0; next_arg < info->num_arg_words;) { + for (int next_arg = start; next_arg < info->num_arg_words;) { RegLocation loc = info->args[next_arg]; if (loc.wide) { loc = UpdateLocWide(loc); - if ((next_arg >= 2) && (loc.location == kLocPhysReg)) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (loc.location == kLocPhysReg) { StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); } next_arg += 2; } else { loc = UpdateLoc(loc); - if ((next_arg >= 3) && (loc.location == kLocPhysReg)) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - Store32Disp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg); + if (loc.location == kLocPhysReg) { + if (loc.ref) { + StoreRefDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetPtrReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32, + kNotVolatile); + } } next_arg++; } } +} - // The first 3 arguments are passed via registers. - // TODO: For 64-bit, instead of hardcoding 4 for Method* size, we should either - // get size of uintptr_t or size of object reference according to model being used. - int outs_offset = 4 /* Method* */ + (3 * sizeof(uint32_t)); - int start_offset = SRegOffset(info->args[3].s_reg_low); - int regs_left_to_pass_via_stack = info->num_arg_words - 3; - DCHECK_GT(regs_left_to_pass_via_stack, 0); - - if (cu_->instruction_set == kThumb2 && regs_left_to_pass_via_stack <= 16) { - // Use vldm/vstm pair using kArg3 as a temp - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), start_offset); - LIR* ld = nullptr; - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - ld = OpVldm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack); - } - // TUNING: loosen barrier - ld->u.m.def_mask = &kEncodeAll; - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - OpRegRegImm(kOpAdd, TargetReg(kArg3, kRef), TargetPtrReg(kSp), 4 /* Method* */ + (3 * 4)); - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - LIR* st = nullptr; - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - st = OpVstm(TargetReg(kArg3, kRef), regs_left_to_pass_via_stack); - } - st->u.m.def_mask = &kEncodeAll; - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } else if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - int current_src_offset = start_offset; - int current_dest_offset = outs_offset; - - // Only davik regs are accessed in this loop; no next_call_insn() calls. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - while (regs_left_to_pass_via_stack > 0) { - // This is based on the knowledge that the stack itself is 16-byte aligned. - bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; - bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; - size_t bytes_to_move; - - /* - * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a - * a 128-bit move because we won't get the chance to try to aligned. If there are more than - * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. - * We do this because we could potentially do a smaller move to align. - */ - if (regs_left_to_pass_via_stack == 4 || - (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { - // Moving 128-bits via xmm register. - bytes_to_move = sizeof(uint32_t) * 4; - - // Allocate a free xmm temp. Since we are working through the calling sequence, - // we expect to have an xmm temporary available. AllocTempDouble will abort if - // there are no free registers. - RegStorage temp = AllocTempDouble(); - - LIR* ld1 = nullptr; - LIR* ld2 = nullptr; - LIR* st1 = nullptr; - LIR* st2 = nullptr; - - /* - * The logic is similar for both loads and stores. If we have 16-byte alignment, - * do an aligned move. If we have 8-byte alignment, then do the move in two - * parts. This approach prevents possible cache line splits. Finally, fall back - * to doing an unaligned move. In most cases we likely won't split the cache - * line but we cannot prove it and thus take a conservative approach. - */ - bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; - bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; - - if (src_is_16b_aligned) { - ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP); - } else if (src_is_8b_aligned) { - ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP); - ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1), - kMovHi128FP); - } else { - ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP); - } +/** + * @brief Used to optimize the copying of VRs which are arguments of invocation. + * Please note that you should flush promoted registers first if you copy. + * If implementation does copying it may skip several of the first VRs but must copy + * till the end. Implementation must return the number of skipped VRs + * (it might be all VRs). + * @see GenDalvikArgsFlushPromoted + * @param info the information about arguments for invocation. + * @param first the first argument we should start to look from. + * @param count the number of remaining arguments we can handle. + * @return the number of arguments which we did not handle. Unhandled arguments + * must be attached to the first one. + */ +int Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) { + // call is pretty expensive, let's use it if count is big. + if (count > 16) { + GenDalvikArgsFlushPromoted(info, first); + int start_offset = SRegOffset(info->args[first].s_reg_low); + int outs_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set); - if (dest_is_16b_aligned) { - st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP); - } else if (dest_is_8b_aligned) { - st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP); - st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1), - temp, kMovHi128FP); - } else { - st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP); - } + OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset); + OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset); + CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef), + count * 4, false); + count = 0; + } + return count; +} + +int Mir2Lir::GenDalvikArgs(CallInfo* info, int call_state, + LIR** pcrLabel, NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, + InvokeType type, bool skip_this) { + // If no arguments, just return. + if (info->num_arg_words == 0) + return call_state; - // TODO If we could keep track of aliasing information for memory accesses that are wider - // than 64-bit, we wouldn't need to set up a barrier. - if (ld1 != nullptr) { - if (ld2 != nullptr) { - // For 64-bit load we can actually set up the aliasing information. - AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); - AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, - true); + const int start_index = skip_this ? 1 : 0; + + // Get architecture dependent mapping between output VRs and physical registers + // basing on shorty of method to call. + InToRegStorageMapping in_to_reg_storage_mapping(arena_); + { + const char* target_shorty = mir_graph_->GetShortyFromMethodReference(target_method); + ShortyIterator shorty_iterator(target_shorty, type == kStatic); + in_to_reg_storage_mapping.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper()); + } + + int stack_map_start = std::max(in_to_reg_storage_mapping.GetMaxMappedIn() + 1, start_index); + if ((stack_map_start < info->num_arg_words) && info->args[stack_map_start].high_word) { + // It is possible that the last mapped reg is 32 bit while arg is 64-bit. + // It will be handled together with low part mapped to register. + stack_map_start++; + } + int regs_left_to_pass_via_stack = info->num_arg_words - stack_map_start; + + // If it is a range case we can try to copy remaining VRs (not mapped to physical registers) + // using more optimal algorithm. + if (info->is_range && regs_left_to_pass_via_stack > 1) { + regs_left_to_pass_via_stack = GenDalvikArgsBulkCopy(info, stack_map_start, + regs_left_to_pass_via_stack); + } + + // Now handle any remaining VRs mapped to stack. + if (in_to_reg_storage_mapping.HasArgumentsOnStack()) { + // Two temps but do not use kArg1, it might be this which we can skip. + // Separate single and wide - it can give some advantage. + RegStorage regRef = TargetReg(kArg3, kRef); + RegStorage regSingle = TargetReg(kArg3, kNotWide); + RegStorage regWide = TargetReg(kArg2, kWide); + for (int i = start_index; + i < stack_map_start + regs_left_to_pass_via_stack; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (!reg.Valid()) { + int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + { + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.wide) { + if (rl_arg.location == kLocPhysReg) { + StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); + } else { + LoadValueDirectWideFixed(rl_arg, regWide); + StoreBaseDisp(TargetPtrReg(kSp), out_offset, regWide, k64, kNotVolatile); + } } else { - // Set barrier for 128-bit load. - ld1->u.m.def_mask = &kEncodeAll; + if (rl_arg.location == kLocPhysReg) { + if (rl_arg.ref) { + StoreRefDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, kNotVolatile); + } else { + StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k32, kNotVolatile); + } + } else { + if (rl_arg.ref) { + LoadValueDirectFixed(rl_arg, regRef); + StoreRefDisp(TargetPtrReg(kSp), out_offset, regRef, kNotVolatile); + } else { + LoadValueDirectFixed(rl_arg, regSingle); + StoreBaseDisp(TargetPtrReg(kSp), out_offset, regSingle, k32, kNotVolatile); + } + } } } - if (st1 != nullptr) { - if (st2 != nullptr) { - // For 64-bit store we can actually set up the aliasing information. - AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); - AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, - true); + call_state = next_call_insn(cu_, info, call_state, target_method, + vtable_idx, direct_code, direct_method, type); + } + if (rl_arg.wide) { + i++; + } + } + } + + // Finish with VRs mapped to physical registers. + for (int i = start_index; i < stack_map_start; i++) { + RegLocation rl_arg = info->args[i]; + rl_arg = UpdateRawLoc(rl_arg); + RegStorage reg = in_to_reg_storage_mapping.Get(i); + if (reg.Valid()) { + if (rl_arg.wide) { + // if reg is not 64-bit (it is half of 64-bit) then handle it separately. + if (!reg.Is64Bit()) { + // TODO: REVISIT: This adds a spill of low part while we could just copy it. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + if (rl_arg.location == kLocPhysReg) { + int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); + // Dump it to memory and then load only low part + StoreBaseDisp(TargetPtrReg(kSp), out_offset, rl_arg.reg, k64, kNotVolatile); + LoadBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile); } else { - // Set barrier for 128-bit store. - st1->u.m.def_mask = &kEncodeAll; + int out_offset = StackVisitor::GetOutVROffset(i + 1, cu_->instruction_set); + // First, use target reg for high part. + LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low + 1), reg, k32, + kNotVolatile); + StoreBaseDisp(TargetPtrReg(kSp), out_offset, reg, k32, kNotVolatile); + // Now load target reg with low part. + LoadBaseDisp(TargetPtrReg(kSp), SRegOffset(rl_arg.s_reg_low), reg, k32, kNotVolatile); } + } else { + LoadValueDirectWideFixed(rl_arg, reg); } - - // Free the temporary used for the data movement. - FreeTemp(temp); } else { - // Moving 32-bits via general purpose register. - bytes_to_move = sizeof(uint32_t); - - // Instead of allocating a new temp, simply reuse one of the registers being used - // for argument passing. - RegStorage temp = TargetReg(kArg3, kNotWide); - - // Now load the argument VR and store to the outs. - Load32Disp(TargetPtrReg(kSp), current_src_offset, temp); - Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp); + LoadValueDirectFixed(rl_arg, reg); } - - current_src_offset += bytes_to_move; - current_dest_offset += bytes_to_move; - regs_left_to_pass_via_stack -= (bytes_to_move >> 2); + call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, + direct_code, direct_method, type); + } + if (rl_arg.wide) { + i++; } - } else { - // Generate memcpy - OpRegRegImm(kOpAdd, TargetReg(kArg0, kRef), TargetPtrReg(kSp), outs_offset); - OpRegRegImm(kOpAdd, TargetReg(kArg1, kRef), TargetPtrReg(kSp), start_offset); - CallRuntimeHelperRegRegImm(kQuickMemcpy, TargetReg(kArg0, kRef), TargetReg(kArg1, kRef), - (info->num_arg_words - 3) * 4, false); } - call_state = LoadArgRegs(info, call_state, next_call_insn, - target_method, vtable_idx, direct_code, direct_method, - type, skip_this); - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); if (pcrLabel) { @@ -1094,18 +918,20 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); } else { *pcrLabel = nullptr; - if (!(cu_->disable_opt & (1 << kNullCheckElimination)) && - (info->opt_flags & MIR_IGNORE_NULL_CHECK)) { - return call_state; - } - // In lieu of generating a check for kArg1 being null, we need to - // perform a load when doing implicit checks. GenImplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); } } return call_state; } +RegStorage Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { + if (!in_to_reg_storage_mapping_.IsInitialized()) { + ShortyIterator shorty_iterator(cu_->shorty, cu_->invoke_type == kStatic); + in_to_reg_storage_mapping_.Initialize(&shorty_iterator, GetResetedInToRegStorageMapper()); + } + return in_to_reg_storage_mapping_.Get(arg_num); +} + RegLocation Mir2Lir::InlineTarget(CallInfo* info) { RegLocation res; if (info->result.location == kLocInvalid) { @@ -1719,17 +1545,10 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) { skip_this = fast_path; } MethodReference target_method = method_info.GetTargetMethod(); - if (!info->is_range) { - call_state = GenDalvikArgsNoRange(info, call_state, p_null_ck, - next_call_insn, target_method, method_info.VTableIndex(), - method_info.DirectCode(), method_info.DirectMethod(), - original_type, skip_this); - } else { - call_state = GenDalvikArgsRange(info, call_state, p_null_ck, - next_call_insn, target_method, method_info.VTableIndex(), - method_info.DirectCode(), method_info.DirectMethod(), - original_type, skip_this); - } + call_state = GenDalvikArgs(info, call_state, p_null_ck, + next_call_insn, target_method, method_info.VTableIndex(), + method_info.DirectCode(), method_info.DirectMethod(), + original_type, skip_this); // Finish up any of the call sequence not interleaved in arg loading while (call_state >= 0) { call_state = next_call_insn(cu_, info, call_state, target_method, method_info.VTableIndex(), diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index e08846c325..8f976df09d 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -24,6 +24,26 @@ namespace art { class MipsMir2Lir FINAL : public Mir2Lir { + protected: + class InToRegStorageMipsMapper : public InToRegStorageMapper { + public: + explicit InToRegStorageMipsMapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {} + virtual RegStorage GetNextReg(ShortyArg arg); + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + } + protected: + Mir2Lir* m2l_; + private: + size_t cur_core_reg_; + }; + + InToRegStorageMipsMapper in_to_reg_storage_mips_mapper_; + InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE { + in_to_reg_storage_mips_mapper_.Reset(); + return &in_to_reg_storage_mips_mapper_; + } + public: MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -56,7 +76,6 @@ class MipsMir2Lir FINAL : public Mir2Lir { // Required for target - register utilities. RegStorage Solo64ToPair64(RegStorage reg); RegStorage TargetReg(SpecialTargetRegister reg); - RegStorage GetArgMappingToPhysicalReg(int arg_num); RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 185112dbf9..efa130c65d 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -122,18 +122,20 @@ RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) { return res_reg; } -RegStorage MipsMir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - switch (arg_num) { - case 0: - return rs_rMIPS_ARG1; - case 1: - return rs_rMIPS_ARG2; - case 2: - return rs_rMIPS_ARG3; - default: - return RegStorage::InvalidReg(); +RegStorage MipsMir2Lir::InToRegStorageMipsMapper::GetNextReg(ShortyArg arg) { + const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3}; + const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); + + RegStorage result = RegStorage::InvalidReg(); + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : kNotWide); + if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = RegStorage::MakeRegPair( + result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); + } } + return result; } /* @@ -602,7 +604,7 @@ RegisterClass MipsMir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volati } MipsMir2Lir::MipsMir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) - : Mir2Lir(cu, mir_graph, arena) { + : Mir2Lir(cu, mir_graph, arena), in_to_reg_storage_mips_mapper_(this) { for (int i = 0; i < kMipsLast; i++) { DCHECK_EQ(MipsMir2Lir::EncodingMap[i].opcode, i) << "Encoding order for " << MipsMir2Lir::EncodingMap[i].name diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 0aefc2dea8..144790e9d7 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -276,6 +276,24 @@ inline void Mir2Lir::CheckRegStorage(RegStorage rs, WidenessCheck wide, RefCheck } } +inline Mir2Lir::ShortyIterator::ShortyIterator(const char* shorty, bool is_static) + : cur_(shorty + 1), pending_this_(!is_static), initialized_(false) { + DCHECK(shorty != nullptr); + DCHECK_NE(*shorty, 0); +} + +inline bool Mir2Lir::ShortyIterator::Next() { + if (!initialized_) { + initialized_ = true; + } else if (pending_this_) { + pending_this_ = false; + } else if (*cur_ != 0) { + cur_++; + } + + return *cur_ != 0 || pending_this_; +} + } // namespace art #endif // ART_COMPILER_DEX_QUICK_MIR_TO_LIR_INL_H_ diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 320c0f4900..bd88091add 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -53,20 +53,14 @@ RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) { return res; } -void Mir2Lir::LockArg(int in_position, bool wide) { - RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : - RegStorage::InvalidReg(); +void Mir2Lir::LockArg(int in_position, bool) { + RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); - if (reg_arg_low.Valid()) { - LockTemp(reg_arg_low); - } - if (reg_arg_high.Valid() && reg_arg_low.NotExactlyEquals(reg_arg_high)) { - LockTemp(reg_arg_high); + if (reg_arg.Valid()) { + LockTemp(reg_arg); } } -// TODO: simplify when 32-bit targets go hard-float. RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) { ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); @@ -87,81 +81,38 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) offset += sizeof(uint64_t); } - if (cu_->target64) { - RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); - if (!reg_arg.Valid()) { - RegStorage new_reg = - wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class); - LoadBaseDisp(TargetPtrReg(kSp), offset, new_reg, wide ? k64 : k32, kNotVolatile); - return new_reg; - } else { - // Check if we need to copy the arg to a different reg_class. - if (!RegClassMatches(reg_class, reg_arg)) { - if (wide) { - RegStorage new_reg = AllocTypedTempWide(false, reg_class); - OpRegCopyWide(new_reg, reg_arg); - reg_arg = new_reg; - } else { - RegStorage new_reg = AllocTypedTemp(false, reg_class); - OpRegCopy(new_reg, reg_arg); - reg_arg = new_reg; - } - } - } - return reg_arg; - } + RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : - RegStorage::InvalidReg(); - - // If the VR is wide and there is no register for high part, we need to load it. - if (wide && !reg_arg_high.Valid()) { - // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg. - if (!reg_arg_low.Valid()) { - RegStorage new_regs = AllocTypedTempWide(false, reg_class); - LoadBaseDisp(TargetPtrReg(kSp), offset, new_regs, k64, kNotVolatile); - return new_regs; // The reg_class is OK, we can return. - } else { - // Assume that no ABI allows splitting a wide fp reg between a narrow fp reg and memory, - // i.e. the low part is in a core reg. Load the second part in a core reg as well for now. - DCHECK(!reg_arg_low.IsFloat()); - reg_arg_high = AllocTemp(); - int offset_high = offset + sizeof(uint32_t); - Load32Disp(TargetPtrReg(kSp), offset_high, reg_arg_high); - // Continue below to check the reg_class. - } + // TODO: REVISIT: This adds a spill of low part while we could just copy it. + if (reg_arg.Valid() && wide && (reg_arg.GetWideKind() == kNotWide)) { + // For wide register we've got only half of it. + // Flush it to memory then. + StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile); + reg_arg = RegStorage::InvalidReg(); } - // If the low part is not in a register yet, we need to load it. - if (!reg_arg_low.Valid()) { - // Assume that if the low part of a wide arg is passed in memory, so is the high part, - // thus we don't get here for wide args as it's handled above. Big-endian ABIs could - // conceivably break this assumption but Android supports only little-endian architectures. - DCHECK(!wide); - reg_arg_low = AllocTypedTemp(false, reg_class); - Load32Disp(TargetPtrReg(kSp), offset, reg_arg_low); - return reg_arg_low; // The reg_class is OK, we can return. - } - - RegStorage reg_arg = wide ? RegStorage::MakeRegPair(reg_arg_low, reg_arg_high) : reg_arg_low; - // Check if we need to copy the arg to a different reg_class. - if (!RegClassMatches(reg_class, reg_arg)) { - if (wide) { - RegStorage new_regs = AllocTypedTempWide(false, reg_class); - OpRegCopyWide(new_regs, reg_arg); - reg_arg = new_regs; - } else { - RegStorage new_reg = AllocTypedTemp(false, reg_class); - OpRegCopy(new_reg, reg_arg); - reg_arg = new_reg; + if (!reg_arg.Valid()) { + reg_arg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class); + LoadBaseDisp(TargetPtrReg(kSp), offset, reg_arg, wide ? k64 : k32, kNotVolatile); + } else { + // Check if we need to copy the arg to a different reg_class. + if (!RegClassMatches(reg_class, reg_arg)) { + if (wide) { + RegStorage new_reg = AllocTypedTempWide(false, reg_class); + OpRegCopyWide(new_reg, reg_arg); + reg_arg = new_reg; + } else { + RegStorage new_reg = AllocTypedTemp(false, reg_class); + OpRegCopy(new_reg, reg_arg); + reg_arg = new_reg; + } } } return reg_arg; } -// TODO: simpilfy when 32-bit targets go hard float. void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { + DCHECK_EQ(rl_dest.location, kLocPhysReg); ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set); if (cu_->instruction_set == kX86) { @@ -180,48 +131,23 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) { offset += sizeof(uint64_t); } - if (!rl_dest.wide) { - RegStorage reg = GetArgMappingToPhysicalReg(in_position); - if (reg.Valid()) { - OpRegCopy(rl_dest.reg, reg); - } else { - Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg); - } - } else { - if (cu_->target64) { - RegStorage reg = GetArgMappingToPhysicalReg(in_position); - if (reg.Valid()) { - OpRegCopy(rl_dest.reg, reg); - } else { - LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile); - } - return; - } - - RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); - RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1); + RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position); - if (cu_->instruction_set == kX86) { - // Can't handle double split between reg & memory. Flush reg half to memory. - if (rl_dest.reg.IsDouble() && (reg_arg_low.Valid() != reg_arg_high.Valid())) { - DCHECK(reg_arg_low.Valid()); - DCHECK(!reg_arg_high.Valid()); - Store32Disp(TargetPtrReg(kSp), offset, reg_arg_low); - reg_arg_low = RegStorage::InvalidReg(); - } - } + // TODO: REVISIT: This adds a spill of low part while we could just copy it. + if (reg_arg.Valid() && rl_dest.wide && (reg_arg.GetWideKind() == kNotWide)) { + // For wide register we've got only half of it. + // Flush it to memory then. + StoreBaseDisp(TargetPtrReg(kSp), offset, reg_arg, k32, kNotVolatile); + reg_arg = RegStorage::InvalidReg(); + } - if (reg_arg_low.Valid() && reg_arg_high.Valid()) { - OpRegCopyWide(rl_dest.reg, RegStorage::MakeRegPair(reg_arg_low, reg_arg_high)); - } else if (reg_arg_low.Valid() && !reg_arg_high.Valid()) { - OpRegCopy(rl_dest.reg, reg_arg_low); - int offset_high = offset + sizeof(uint32_t); - Load32Disp(TargetPtrReg(kSp), offset_high, rl_dest.reg.GetHigh()); - } else if (!reg_arg_low.Valid() && reg_arg_high.Valid()) { - OpRegCopy(rl_dest.reg.GetHigh(), reg_arg_high); - Load32Disp(TargetPtrReg(kSp), offset, rl_dest.reg.GetLow()); + if (!reg_arg.Valid()) { + LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, rl_dest.wide ? k64 : k32, kNotVolatile); + } else { + if (rl_dest.wide) { + OpRegCopyWide(rl_dest.reg, reg_arg); } else { - LoadBaseDisp(TargetPtrReg(kSp), offset, rl_dest.reg, k64, kNotVolatile); + OpRegCopy(rl_dest.reg, reg_arg); } } } @@ -1372,4 +1298,35 @@ size_t Mir2Lir::GetInstructionOffset(LIR* lir) { UNREACHABLE(); } +void Mir2Lir::InToRegStorageMapping::Initialize(ShortyIterator* shorty, + InToRegStorageMapper* mapper) { + DCHECK(mapper != nullptr); + DCHECK(shorty != nullptr); + max_mapped_in_ = -1; + has_arguments_on_stack_ = false; + while (shorty->Next()) { + ShortyArg arg = shorty->GetArg(); + RegStorage reg = mapper->GetNextReg(arg); + if (reg.Valid()) { + mapping_.Put(count_, reg); + max_mapped_in_ = count_; + // If the VR is wide and was mapped as wide then account for it. + if (arg.IsWide() && reg.Is64Bit()) { + max_mapped_in_++; + } + } else { + has_arguments_on_stack_ = true; + } + count_ += arg.IsWide() ? 2 : 1; + } + initialized_ = true; +} + +RegStorage Mir2Lir::InToRegStorageMapping::Get(int in_position) { + DCHECK(IsInitialized()); + DCHECK_LT(in_position, count_); + auto res = mapping_.find(in_position); + return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); +} + } // namespace art diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 5d78a6e25c..dd0933018f 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -905,19 +905,14 @@ class Mir2Lir : public Backend { virtual LIR* GenCallInsn(const MirMethodLoweringInfo& method_info); virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method); - virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this); - virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this); - + virtual int GenDalvikArgs(CallInfo* info, int call_state, LIR** pcrLabel, + NextCallInsn next_call_insn, + const MethodReference& target_method, + uint32_t vtable_idx, + uintptr_t direct_code, uintptr_t direct_method, InvokeType type, + bool skip_this); + virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count); + virtual void GenDalvikArgsFlushPromoted(CallInfo* info, int start); /** * @brief Used to determine the register location of destination. * @details This is needed during generation of inline intrinsics because it finds destination @@ -958,12 +953,6 @@ class Mir2Lir : public Backend { bool GenInlinedUnsafeGet(CallInfo* info, bool is_long, bool is_volatile); bool GenInlinedUnsafePut(CallInfo* info, bool is_long, bool is_object, bool is_volatile, bool is_ordered); - virtual int LoadArgRegs(CallInfo* info, int call_state, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this); // Shared by all targets - implemented in gen_loadstore.cc. RegLocation LoadCurrMethod(); @@ -1228,7 +1217,7 @@ class Mir2Lir : public Backend { } } - virtual RegStorage GetArgMappingToPhysicalReg(int arg_num) = 0; + RegStorage GetArgMappingToPhysicalReg(int arg_num); virtual RegLocation GetReturnAlt() = 0; virtual RegLocation GetReturnWideAlt() = 0; virtual RegLocation LocCReturn() = 0; @@ -1780,6 +1769,63 @@ class Mir2Lir : public Backend { // to deduplicate the masks. ResourceMaskCache mask_cache_; + protected: + // ABI support + class ShortyArg { + public: + explicit ShortyArg(char type) : type_(type) { } + bool IsFP() { return type_ == 'F' || type_ == 'D'; } + bool IsWide() { return type_ == 'J' || type_ == 'D'; } + bool IsRef() { return type_ == 'L'; } + char GetType() { return type_; } + private: + char type_; + }; + + class ShortyIterator { + public: + ShortyIterator(const char* shorty, bool is_static); + bool Next(); + ShortyArg GetArg() { return ShortyArg(pending_this_ ? 'L' : *cur_); } + private: + const char* cur_; + bool pending_this_; + bool initialized_; + }; + + class InToRegStorageMapper { + public: + virtual RegStorage GetNextReg(ShortyArg arg) = 0; + virtual ~InToRegStorageMapper() {} + virtual void Reset() = 0; + }; + + class InToRegStorageMapping { + public: + explicit InToRegStorageMapping(ArenaAllocator* arena) + : mapping_(std::less<int>(), arena->Adapter()), count_(0), + max_mapped_in_(0), has_arguments_on_stack_(false), initialized_(false) {} + void Initialize(ShortyIterator* shorty, InToRegStorageMapper* mapper); + /** + * @return the index of last VR mapped to physical register. In other words + * any VR starting from (return value + 1) index is mapped to memory. + */ + int GetMaxMappedIn() { return max_mapped_in_; } + bool HasArgumentsOnStack() { return has_arguments_on_stack_; } + RegStorage Get(int in_position); + bool IsInitialized() { return initialized_; } + private: + ArenaSafeMap<int, RegStorage> mapping_; + int count_; + int max_mapped_in_; + bool has_arguments_on_stack_; + bool initialized_; + }; + + // Cached mapping of method input to reg storage according to ABI. + InToRegStorageMapping in_to_reg_storage_mapping_; + virtual InToRegStorageMapper* GetResetedInToRegStorageMapper() = 0; + private: static bool SizeMatchesTypeForEntrypoint(OpSize size, Primitive::Type type); }; // Class Mir2Lir diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 9cb0bf53e6..c7d83dda62 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -28,40 +28,48 @@ namespace art { class X86Mir2Lir : public Mir2Lir { protected: - class InToRegStorageMapper { - public: - virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref) = 0; - virtual ~InToRegStorageMapper() {} - }; - class InToRegStorageX86_64Mapper : public InToRegStorageMapper { public: - explicit InToRegStorageX86_64Mapper(Mir2Lir* ml) : ml_(ml), cur_core_reg_(0), cur_fp_reg_(0) {} - virtual ~InToRegStorageX86_64Mapper() {} - virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide, bool is_ref); + explicit InToRegStorageX86_64Mapper(Mir2Lir* m2l) + : m2l_(m2l), cur_core_reg_(0), cur_fp_reg_(0) {} + virtual RegStorage GetNextReg(ShortyArg arg); + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + cur_fp_reg_ = 0; + } protected: - Mir2Lir* ml_; + Mir2Lir* m2l_; private: - int cur_core_reg_; - int cur_fp_reg_; + size_t cur_core_reg_; + size_t cur_fp_reg_; }; - class InToRegStorageMapping { + class InToRegStorageX86Mapper : public InToRegStorageMapper { public: - InToRegStorageMapping() : max_mapped_in_(0), is_there_stack_mapped_(false), - initialized_(false) {} - void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper); - int GetMaxMappedIn() { return max_mapped_in_; } - bool IsThereStackMapped() { return is_there_stack_mapped_; } - RegStorage Get(int in_position); - bool IsInitialized() { return initialized_; } + explicit InToRegStorageX86Mapper(Mir2Lir* m2l) : m2l_(m2l), cur_core_reg_(0) {} + virtual RegStorage GetNextReg(ShortyArg arg); + virtual void Reset() OVERRIDE { + cur_core_reg_ = 0; + } + protected: + Mir2Lir* m2l_; private: - std::map<int, RegStorage> mapping_; - int max_mapped_in_; - bool is_there_stack_mapped_; - bool initialized_; + size_t cur_core_reg_; }; + InToRegStorageX86_64Mapper in_to_reg_storage_x86_64_mapper_; + InToRegStorageX86Mapper in_to_reg_storage_x86_mapper_; + InToRegStorageMapper* GetResetedInToRegStorageMapper() OVERRIDE { + InToRegStorageMapper* res; + if (cu_->target64) { + res = &in_to_reg_storage_x86_64_mapper_; + } else { + res = &in_to_reg_storage_x86_mapper_; + } + res->Reset(); + return res; + } + class ExplicitTempRegisterLock { public: ExplicitTempRegisterLock(X86Mir2Lir* mir_to_lir, int n_regs, ...); @@ -71,6 +79,8 @@ class X86Mir2Lir : public Mir2Lir { X86Mir2Lir* const mir_to_lir_; }; + virtual int GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) OVERRIDE; + public: X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena); @@ -125,8 +135,6 @@ class X86Mir2Lir : public Mir2Lir { return TargetReg(symbolic_reg, cu_->target64 ? kWide : kNotWide); } - RegStorage GetArgMappingToPhysicalReg(int arg_num) OVERRIDE; - RegLocation GetReturnAlt() OVERRIDE; RegLocation GetReturnWideAlt() OVERRIDE; RegLocation LocCReturn() OVERRIDE; @@ -350,22 +358,7 @@ class X86Mir2Lir : public Mir2Lir { void LoadClassType(const DexFile& dex_file, uint32_t type_idx, SpecialTargetRegister symbolic_reg) OVERRIDE; - void FlushIns(RegLocation* ArgLocs, RegLocation rl_method) OVERRIDE; - NextCallInsn GetNextSDCallInsn() OVERRIDE; - int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; - - int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel, - NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, - uintptr_t direct_code, uintptr_t direct_method, InvokeType type, - bool skip_this) OVERRIDE; /* * @brief Generate a relative call to the method that will be patched at link time. @@ -439,8 +432,6 @@ class X86Mir2Lir : public Mir2Lir { LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_src, OpSize size, int opt_flags = 0); - RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num) const; - int AssignInsnOffsets(); void AssignOffsets(); AssemblerStatus AssembleInstructions(CodeOffset start_addr); @@ -1000,8 +991,6 @@ class X86Mir2Lir : public Mir2Lir { */ static void DumpRegLocation(RegLocation loc); - InToRegStorageMapping in_to_reg_storage_mapping_; - private: void SwapBits(RegStorage result_reg, int shift, int32_t value); void SwapBits64(RegStorage result_reg, int shift, int64_t value); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index ae80e9f1c5..5f6cdda0d3 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -814,6 +814,7 @@ RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatil X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena) : Mir2Lir(cu, mir_graph, arena), + in_to_reg_storage_x86_64_mapper_(this), in_to_reg_storage_x86_mapper_(this), base_of_code_(nullptr), store_method_addr_(false), store_method_addr_used_(false), method_address_insns_(arena->Adapter()), class_type_address_insns_(arena->Adapter()), @@ -2407,451 +2408,44 @@ LIR* X86Mir2Lir::AddVectorLiteral(int32_t* constants) { } // ------------ ABI support: mapping of args to physical registers ------------- -RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide, - bool is_ref) { +RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(ShortyArg arg) { const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3, kArg4, kArg5}; - const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / - sizeof(SpecialTargetRegister); + const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); const SpecialTargetRegister fpArgMappingToPhysicalReg[] = {kFArg0, kFArg1, kFArg2, kFArg3, kFArg4, kFArg5, kFArg6, kFArg7}; - const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / - sizeof(SpecialTargetRegister); + const size_t fpArgMappingToPhysicalRegSize = arraysize(fpArgMappingToPhysicalReg); - if (is_double_or_float) { + if (arg.IsFP()) { if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) { - return ml_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], is_wide ? kWide : kNotWide); + return m2l_->TargetReg(fpArgMappingToPhysicalReg[cur_fp_reg_++], + arg.IsWide() ? kWide : kNotWide); } } else { if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { - return ml_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], - is_ref ? kRef : (is_wide ? kWide : kNotWide)); + return m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : (arg.IsWide() ? kWide : kNotWide)); } } return RegStorage::InvalidReg(); } -RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) { - DCHECK(IsInitialized()); - auto res = mapping_.find(in_position); - return res != mapping_.end() ? res->second : RegStorage::InvalidReg(); -} - -void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, - InToRegStorageMapper* mapper) { - DCHECK(mapper != nullptr); - max_mapped_in_ = -1; - is_there_stack_mapped_ = false; - for (int in_position = 0; in_position < count; in_position++) { - RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, - arg_locs[in_position].wide, arg_locs[in_position].ref); - if (reg.Valid()) { - mapping_[in_position] = reg; - max_mapped_in_ = std::max(max_mapped_in_, in_position); - if (arg_locs[in_position].wide) { - // We covered 2 args, so skip the next one - in_position++; - } - } else { - is_there_stack_mapped_ = true; - } - } - initialized_ = true; -} - -RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) { - if (!cu_->target64) { - return GetCoreArgMappingToPhysicalReg(arg_num); - } - - if (!in_to_reg_storage_mapping_.IsInitialized()) { - int start_vreg = cu_->mir_graph->GetFirstInVR(); - RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg]; - - InToRegStorageX86_64Mapper mapper(this); - in_to_reg_storage_mapping_.Initialize(arg_locs, mir_graph_->GetNumOfInVRs(), &mapper); - } - return in_to_reg_storage_mapping_.Get(arg_num); -} - -RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) const { - // For the 32-bit internal ABI, the first 3 arguments are passed in registers. - // Not used for 64-bit, TODO: Move X86_32 to the same framework - switch (core_arg_num) { - case 0: return TargetReg32(kArg1); - case 1: return TargetReg32(kArg2); - case 2: return TargetReg32(kArg3); - default: return RegStorage::InvalidReg(); - } -} +RegStorage X86Mir2Lir::InToRegStorageX86Mapper::GetNextReg(ShortyArg arg) { + const SpecialTargetRegister coreArgMappingToPhysicalReg[] = {kArg1, kArg2, kArg3}; + const size_t coreArgMappingToPhysicalRegSize = arraysize(coreArgMappingToPhysicalReg); -// ---------End of ABI support: mapping of args to physical registers ------------- - -/* - * If there are any ins passed in registers that have not been promoted - * to a callee-save register, flush them to the frame. Perform initial - * assignment of promoted arguments. - * - * ArgLocs is an array of location records describing the incoming arguments - * with one location record per word of argument. - */ -void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { - if (!cu_->target64) return Mir2Lir::FlushIns(ArgLocs, rl_method); - /* - * Dummy up a RegLocation for the incoming Method* - * It will attempt to keep kArg0 live (or copy it to home location - * if promoted). - */ - - RegLocation rl_src = rl_method; - rl_src.location = kLocPhysReg; - rl_src.reg = TargetReg(kArg0, kRef); - rl_src.home = false; - MarkLive(rl_src); - StoreValue(rl_method, rl_src); - // If Method* has been promoted, explicitly flush - if (rl_method.location == kLocPhysReg) { - const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; - StoreRefDisp(rs_rSP, 0, As32BitReg(TargetReg(kArg0, kRef)), kNotVolatile); - } - - if (mir_graph_->GetNumOfInVRs() == 0) { - return; - } - - int start_vreg = cu_->mir_graph->GetFirstInVR(); - /* - * Copy incoming arguments to their proper home locations. - * NOTE: an older version of dx had an issue in which - * it would reuse static method argument registers. - * This could result in the same Dalvik virtual register - * being promoted to both core and fp regs. To account for this, - * we only copy to the corresponding promoted physical register - * if it matches the type of the SSA name for the incoming - * argument. It is also possible that long and double arguments - * end up half-promoted. In those cases, we must flush the promoted - * half to memory as well. - */ - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - for (uint32_t i = 0; i < mir_graph_->GetNumOfInVRs(); i++) { - // get reg corresponding to input - RegStorage reg = GetArgMappingToPhysicalReg(i); - - RegLocation* t_loc = &ArgLocs[i]; - if (reg.Valid()) { - // If arriving in register. - - // We have already updated the arg location with promoted info - // so we can be based on it. - if (t_loc->location == kLocPhysReg) { - // Just copy it. - OpRegCopy(t_loc->reg, reg); - } else { - // Needs flush. - if (t_loc->ref) { - StoreRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, kNotVolatile); - } else { - StoreBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), reg, t_loc->wide ? k64 : k32, - kNotVolatile); - } - } - } else { - // If arriving in frame & promoted. - if (t_loc->location == kLocPhysReg) { - if (t_loc->ref) { - LoadRefDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg, kNotVolatile); - } else { - LoadBaseDisp(rs_rX86_SP_64, SRegOffset(start_vreg + i), t_loc->reg, - t_loc->wide ? k64 : k32, kNotVolatile); - } - } - } - if (t_loc->wide) { - // Increment i to skip the next one. - i++; + RegStorage result = RegStorage::InvalidReg(); + if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], + arg.IsRef() ? kRef : kNotWide); + if (arg.IsWide() && cur_core_reg_ < coreArgMappingToPhysicalRegSize) { + result = RegStorage::MakeRegPair( + result, m2l_->TargetReg(coreArgMappingToPhysicalReg[cur_core_reg_++], kNotWide)); } } + return result; } -/* - * Load up to 5 arguments, the first three of which will be in - * kArg1 .. kArg3. On entry kArg0 contains the current method pointer, - * and as part of the load sequence, it must be replaced with - * the target method pointer. Note, this may also be called - * for "range" variants if the number of arguments is 5 or fewer. - */ -int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, - int call_state, LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, - uintptr_t direct_method, InvokeType type, bool skip_this) { - if (!cu_->target64) { - return Mir2Lir::GenDalvikArgsNoRange(info, - call_state, pcrLabel, next_call_insn, - target_method, - vtable_idx, direct_code, - direct_method, type, skip_this); - } - return GenDalvikArgsRange(info, - call_state, pcrLabel, next_call_insn, - target_method, - vtable_idx, direct_code, - direct_method, type, skip_this); -} - -/* - * May have 0+ arguments (also used for jumbo). Note that - * source virtual registers may be in physical registers, so may - * need to be flushed to home location before copying. This - * applies to arg3 and above (see below). - * - * Two general strategies: - * If < 20 arguments - * Pass args 3-18 using vldm/vstm block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * If 20+ arguments - * Pass args arg19+ using memcpy block copy - * Pass arg0, arg1 & arg2 in kArg1-kArg3 - * - */ -int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, - LIR** pcrLabel, NextCallInsn next_call_insn, - const MethodReference& target_method, - uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method, - InvokeType type, bool skip_this) { - if (!cu_->target64) { - return Mir2Lir::GenDalvikArgsRange(info, call_state, - pcrLabel, next_call_insn, - target_method, - vtable_idx, direct_code, direct_method, - type, skip_this); - } - - /* If no arguments, just return */ - if (info->num_arg_words == 0) - return call_state; - - const int start_index = skip_this ? 1 : 0; - - InToRegStorageX86_64Mapper mapper(this); - InToRegStorageMapping in_to_reg_storage_mapping; - in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper); - const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); - const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 : - info->args[last_mapped_in].wide ? 2 : 1; - int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped); - - // Fisrt of all, check whether it make sense to use bulk copying - // Optimization is aplicable only for range case - // TODO: make a constant instead of 2 - if (info->is_range && regs_left_to_pass_via_stack >= 2) { - // Scan the rest of the args - if in phys_reg flush to memory - for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) { - RegLocation loc = info->args[next_arg]; - if (loc.wide) { - loc = UpdateLocWide(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k64, kNotVolatile); - } - next_arg += 2; - } else { - loc = UpdateLoc(loc); - if (loc.location == kLocPhysReg) { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - StoreBaseDisp(rs_rX86_SP_64, SRegOffset(loc.s_reg_low), loc.reg, k32, kNotVolatile); - } - next_arg++; - } - } - - // The rest can be copied together - int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low); - int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, - cu_->instruction_set); - - int current_src_offset = start_offset; - int current_dest_offset = outs_offset; - - // Only davik regs are accessed in this loop; no next_call_insn() calls. - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - while (regs_left_to_pass_via_stack > 0) { - // This is based on the knowledge that the stack itself is 16-byte aligned. - bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; - bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; - size_t bytes_to_move; - - /* - * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a - * a 128-bit move because we won't get the chance to try to aligned. If there are more than - * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. - * We do this because we could potentially do a smaller move to align. - */ - if (regs_left_to_pass_via_stack == 4 || - (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { - // Moving 128-bits via xmm register. - bytes_to_move = sizeof(uint32_t) * 4; - - // Allocate a free xmm temp. Since we are working through the calling sequence, - // we expect to have an xmm temporary available. AllocTempDouble will abort if - // there are no free registers. - RegStorage temp = AllocTempDouble(); - - LIR* ld1 = nullptr; - LIR* ld2 = nullptr; - LIR* st1 = nullptr; - LIR* st2 = nullptr; - - /* - * The logic is similar for both loads and stores. If we have 16-byte alignment, - * do an aligned move. If we have 8-byte alignment, then do the move in two - * parts. This approach prevents possible cache line splits. Finally, fall back - * to doing an unaligned move. In most cases we likely won't split the cache - * line but we cannot prove it and thus take a conservative approach. - */ - bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; - bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; - - ScopedMemRefType mem_ref_type2(this, ResourceMask::kDalvikReg); - if (src_is_16b_aligned) { - ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovA128FP); - } else if (src_is_8b_aligned) { - ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovLo128FP); - ld2 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset + (bytes_to_move >> 1), - kMovHi128FP); - } else { - ld1 = OpMovRegMem(temp, rs_rX86_SP_64, current_src_offset, kMovU128FP); - } - - if (dest_is_16b_aligned) { - st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovA128FP); - } else if (dest_is_8b_aligned) { - st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovLo128FP); - st2 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset + (bytes_to_move >> 1), - temp, kMovHi128FP); - } else { - st1 = OpMovMemReg(rs_rX86_SP_64, current_dest_offset, temp, kMovU128FP); - } - - // TODO If we could keep track of aliasing information for memory accesses that are wider - // than 64-bit, we wouldn't need to set up a barrier. - if (ld1 != nullptr) { - if (ld2 != nullptr) { - // For 64-bit load we can actually set up the aliasing information. - AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); - AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true); - } else { - // Set barrier for 128-bit load. - ld1->u.m.def_mask = &kEncodeAll; - } - } - if (st1 != nullptr) { - if (st2 != nullptr) { - // For 64-bit store we can actually set up the aliasing information. - AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); - AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true); - } else { - // Set barrier for 128-bit store. - st1->u.m.def_mask = &kEncodeAll; - } - } - - // Free the temporary used for the data movement. - FreeTemp(temp); - } else { - // Moving 32-bits via general purpose register. - bytes_to_move = sizeof(uint32_t); - - // Instead of allocating a new temp, simply reuse one of the registers being used - // for argument passing. - RegStorage temp = TargetReg(kArg3, kNotWide); - - // Now load the argument VR and store to the outs. - Load32Disp(rs_rX86_SP_64, current_src_offset, temp); - Store32Disp(rs_rX86_SP_64, current_dest_offset, temp); - } - - current_src_offset += bytes_to_move; - current_dest_offset += bytes_to_move; - regs_left_to_pass_via_stack -= (bytes_to_move >> 2); - } - DCHECK_EQ(regs_left_to_pass_via_stack, 0); - } - - // Now handle rest not registers if they are - if (in_to_reg_storage_mapping.IsThereStackMapped()) { - RegStorage regSingle = TargetReg(kArg2, kNotWide); - RegStorage regWide = TargetReg(kArg3, kWide); - for (int i = start_index; - i < last_mapped_in + size_of_the_last_mapped + regs_left_to_pass_via_stack; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - if (!reg.Valid()) { - int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set); - - { - ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); - if (rl_arg.wide) { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k64, kNotVolatile); - } else { - LoadValueDirectWideFixed(rl_arg, regWide); - StoreBaseDisp(rs_rX86_SP_64, out_offset, regWide, k64, kNotVolatile); - } - } else { - if (rl_arg.location == kLocPhysReg) { - StoreBaseDisp(rs_rX86_SP_64, out_offset, rl_arg.reg, k32, kNotVolatile); - } else { - LoadValueDirectFixed(rl_arg, regSingle); - StoreBaseDisp(rs_rX86_SP_64, out_offset, regSingle, k32, kNotVolatile); - } - } - } - call_state = next_call_insn(cu_, info, call_state, target_method, - vtable_idx, direct_code, direct_method, type); - } - if (rl_arg.wide) { - i++; - } - } - } - - // Finish with mapped registers - for (int i = start_index; i <= last_mapped_in; i++) { - RegLocation rl_arg = info->args[i]; - rl_arg = UpdateRawLoc(rl_arg); - RegStorage reg = in_to_reg_storage_mapping.Get(i); - if (reg.Valid()) { - if (rl_arg.wide) { - LoadValueDirectWideFixed(rl_arg, reg); - } else { - LoadValueDirectFixed(rl_arg, reg); - } - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - } - if (rl_arg.wide) { - i++; - } - } - - call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); - if (pcrLabel) { - if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitNullChecks()) { - *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1, kRef), info->opt_flags); - } else { - *pcrLabel = nullptr; - // In lieu of generating a check for kArg1 being null, we need to - // perform a load when doing implicit checks. - RegStorage tmp = AllocTemp(); - Load32Disp(TargetReg(kArg1, kRef), 0, tmp); - MarkPossibleNullPointerException(info->opt_flags); - FreeTemp(tmp); - } - } - return call_state; -} +// ---------End of ABI support: mapping of args to physical registers ------------- bool X86Mir2Lir::GenInlinedCharAt(CallInfo* info) { // Location of reference to data array @@ -2980,4 +2574,122 @@ X86Mir2Lir::ExplicitTempRegisterLock::~ExplicitTempRegisterLock() { } } +int X86Mir2Lir::GenDalvikArgsBulkCopy(CallInfo* info, int first, int count) { + if (count < 4) { + // It does not make sense to use this utility if we have no chance to use + // 128-bit move. + return count; + } + GenDalvikArgsFlushPromoted(info, first); + + // The rest can be copied together + int current_src_offset = SRegOffset(info->args[first].s_reg_low); + int current_dest_offset = StackVisitor::GetOutVROffset(first, cu_->instruction_set); + + // Only davik regs are accessed in this loop; no next_call_insn() calls. + ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); + while (count > 0) { + // This is based on the knowledge that the stack itself is 16-byte aligned. + bool src_is_16b_aligned = (current_src_offset & 0xF) == 0; + bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0; + size_t bytes_to_move; + + /* + * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a + * a 128-bit move because we won't get the chance to try to aligned. If there are more than + * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned. + * We do this because we could potentially do a smaller move to align. + */ + if (count == 4 || (count > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) { + // Moving 128-bits via xmm register. + bytes_to_move = sizeof(uint32_t) * 4; + + // Allocate a free xmm temp. Since we are working through the calling sequence, + // we expect to have an xmm temporary available. AllocTempDouble will abort if + // there are no free registers. + RegStorage temp = AllocTempDouble(); + + LIR* ld1 = nullptr; + LIR* ld2 = nullptr; + LIR* st1 = nullptr; + LIR* st2 = nullptr; + + /* + * The logic is similar for both loads and stores. If we have 16-byte alignment, + * do an aligned move. If we have 8-byte alignment, then do the move in two + * parts. This approach prevents possible cache line splits. Finally, fall back + * to doing an unaligned move. In most cases we likely won't split the cache + * line but we cannot prove it and thus take a conservative approach. + */ + bool src_is_8b_aligned = (current_src_offset & 0x7) == 0; + bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0; + + if (src_is_16b_aligned) { + ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovA128FP); + } else if (src_is_8b_aligned) { + ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovLo128FP); + ld2 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset + (bytes_to_move >> 1), + kMovHi128FP); + } else { + ld1 = OpMovRegMem(temp, TargetPtrReg(kSp), current_src_offset, kMovU128FP); + } + + if (dest_is_16b_aligned) { + st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovA128FP); + } else if (dest_is_8b_aligned) { + st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovLo128FP); + st2 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset + (bytes_to_move >> 1), + temp, kMovHi128FP); + } else { + st1 = OpMovMemReg(TargetPtrReg(kSp), current_dest_offset, temp, kMovU128FP); + } + + // TODO If we could keep track of aliasing information for memory accesses that are wider + // than 64-bit, we wouldn't need to set up a barrier. + if (ld1 != nullptr) { + if (ld2 != nullptr) { + // For 64-bit load we can actually set up the aliasing information. + AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true); + AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, + true); + } else { + // Set barrier for 128-bit load. + ld1->u.m.def_mask = &kEncodeAll; + } + } + if (st1 != nullptr) { + if (st2 != nullptr) { + // For 64-bit store we can actually set up the aliasing information. + AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true); + AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, + true); + } else { + // Set barrier for 128-bit store. + st1->u.m.def_mask = &kEncodeAll; + } + } + + // Free the temporary used for the data movement. + FreeTemp(temp); + } else { + // Moving 32-bits via general purpose register. + bytes_to_move = sizeof(uint32_t); + + // Instead of allocating a new temp, simply reuse one of the registers being used + // for argument passing. + RegStorage temp = TargetReg(kArg3, kNotWide); + + // Now load the argument VR and store to the outs. + Load32Disp(TargetPtrReg(kSp), current_src_offset, temp); + Store32Disp(TargetPtrReg(kSp), current_dest_offset, temp); + } + + current_src_offset += bytes_to_move; + current_dest_offset += bytes_to_move; + count -= (bytes_to_move >> 2); + } + DCHECK_EQ(count, 0); + return count; +} + } // namespace art diff --git a/test/800-smali/expected.txt b/test/800-smali/expected.txt index 0f7001fc68..5f86f1e047 100644 --- a/test/800-smali/expected.txt +++ b/test/800-smali/expected.txt @@ -8,4 +8,5 @@ b/18380491 invoke-super abstract BadCaseInOpRegRegReg CmpLong +FloatIntConstPassing Done! diff --git a/test/800-smali/smali/FloatIntConstPassing.smali b/test/800-smali/smali/FloatIntConstPassing.smali new file mode 100644 index 0000000000..a2916c5dbb --- /dev/null +++ b/test/800-smali/smali/FloatIntConstPassing.smali @@ -0,0 +1,29 @@ +.class public LFloatIntConstPassing; + +.super Ljava/lang/Object; + +.method public static getInt(I)I + .registers 2 + const/4 v0, 1 + add-int/2addr v0, p0 + return v0 +.end method + +.method public static getFloat(F)F + .registers 2 + const/4 v0, 0 + mul-float/2addr v0, p0 + return v0 +.end method + +.method public static run()I + .registers 3 + const/4 v0, 1 + invoke-static {v0}, LFloatIntConstPassing;->getInt(I)I + move-result v1 + invoke-static {v0}, LFloatIntConstPassing;->getFloat(F)F + move-result v2 + float-to-int v2, v2 + add-int/2addr v1, v2 + return v1 +.end method diff --git a/test/800-smali/src/Main.java b/test/800-smali/src/Main.java index f2c1ab57e7..a2db05135d 100644 --- a/test/800-smali/src/Main.java +++ b/test/800-smali/src/Main.java @@ -64,6 +64,7 @@ public class Main { new Object[]{0}, new AbstractMethodError(), null)); testCases.add(new TestCase("BadCaseInOpRegRegReg", "BadCaseInOpRegRegReg", "getInt", null, null, 2)); testCases.add(new TestCase("CmpLong", "CmpLong", "run", null, null, 0)); + testCases.add(new TestCase("FloatIntConstPassing", "FloatIntConstPassing", "run", null, null, 2)); } public void runTests() { |