diff options
Diffstat (limited to 'compiler/dex/quick')
| -rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 131 | ||||
| -rw-r--r-- | compiler/dex/quick/codegen_util.cc | 16 | ||||
| -rw-r--r-- | compiler/dex/quick/dex_file_method_inliner.cc | 4 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 2 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 10 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/call_x86.cc | 4 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 2 | ||||
| -rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 119 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 4 |
10 files changed, 242 insertions, 52 deletions
diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index e8f5cb9f09..3ee3e2e61d 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -91,17 +91,121 @@ void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_dest = mir_graph_->GetDest(mir); RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; + rl_src = LoadValue(rl_src, src_reg_class); + // rl_src may be aliased with rl_result/rl_dest, so do compare early. + OpRegImm(kOpCmp, rl_src.reg, 0); + ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode); - RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; - RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; - rl_true = LoadValue(rl_true, result_reg_class); - rl_false = LoadValue(rl_false, result_reg_class); - rl_result = EvalLoc(rl_dest, result_reg_class, true); - OpRegImm(kOpCmp, rl_src.reg, 0); - NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(), - rl_false.reg.GetReg(), code); + // The kMirOpSelect has two variants, one for constants and one for moves. + bool is_wide = rl_dest.ref || rl_dest.wide; + + if (mir->ssa_rep->num_uses == 1) { + uint32_t true_val = mir->dalvikInsn.vB; + uint32_t false_val = mir->dalvikInsn.vC; + + int opcode; // The opcode. + int left_op, right_op; // The operands. + bool rl_result_evaled = false; + + // Check some simple cases. + // TODO: Improve this. + int zero_reg = (is_wide ? rs_xzr : rs_wzr).GetReg(); + + if ((true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0)) { + // CSInc cheap based on wzr. + if (true_val == 1) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + + left_op = right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc; + } else if ((true_val == 0 && false_val == 0xFFFFFFFF) || + (true_val == 0xFFFFFFFF && false_val == 0)) { + // CSneg cheap based on wzr. + if (true_val == 0xFFFFFFFF) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + + left_op = right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc; + } else if (true_val == 0 || false_val == 0) { + // Csel half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (false_val == 0) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 0 ? false_val : true_val); + left_op = zero_reg; + right_op = rl_result.reg.GetReg(); + opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + } else if (true_val == 1 || false_val == 1) { + // CSInc half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (true_val == 1) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 1 ? false_val : true_val); + left_op = rl_result.reg.GetReg(); + right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csinc4rrrc) : kA64Csinc4rrrc; + } else if (true_val == 0xFFFFFFFF || false_val == 0xFFFFFFFF) { + // CSneg half cheap based on wzr. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + if (true_val == 0xFFFFFFFF) { + // Negate. + code = ArmConditionEncoding(NegateComparison(mir->meta.ccode)); + } + LoadConstantNoClobber(rl_result.reg, true_val == 0xFFFFFFFF ? false_val : true_val); + left_op = rl_result.reg.GetReg(); + right_op = zero_reg; + opcode = is_wide ? WIDE(kA64Csneg4rrrc) : kA64Csneg4rrrc; + } else { + // Csel. The rest. Use rl_result and a temp. + // TODO: To minimize the constants being loaded, check whether one can be inexpensively + // loaded as n - 1 or ~n. + rl_result = EvalLoc(rl_dest, result_reg_class, true); + rl_result_evaled = true; + LoadConstantNoClobber(rl_result.reg, true_val); + RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class); + if (rl_dest.wide) { + if (t_reg2.Is32Bit()) { + t_reg2 = As64BitReg(t_reg2); + } + } + LoadConstantNoClobber(t_reg2, false_val); + + // Use csel. + left_op = rl_result.reg.GetReg(); + right_op = t_reg2.GetReg(); + opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + } + + if (!rl_result_evaled) { + rl_result = EvalLoc(rl_dest, result_reg_class, true); + } + + NewLIR4(opcode, rl_result.reg.GetReg(), left_op, right_op, code); + } else { + RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; + RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; + + rl_true = LoadValue(rl_true, result_reg_class); + rl_false = LoadValue(rl_false, result_reg_class); + rl_result = EvalLoc(rl_dest, result_reg_class, true); + + int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc; + NewLIR4(opcode, rl_result.reg.GetReg(), + rl_true.reg.GetReg(), rl_false.reg.GetReg(), code); + } StoreValue(rl_dest, rl_result); } @@ -110,7 +214,6 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); LIR* taken = &block_label_list_[bb->taken]; LIR* not_taken = &block_label_list_[bb->fall_through]; - rl_src1 = LoadValueWide(rl_src1, kCoreReg); // Normalize such that if either operand is constant, src2 will be constant. ConditionCode ccode = mir->meta.ccode; if (rl_src1.is_const) { @@ -118,16 +221,22 @@ void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { ccode = FlipComparisonOrder(ccode); } + rl_src1 = LoadValueWide(rl_src1, kCoreReg); + if (rl_src2.is_const) { - rl_src2 = UpdateLocWide(rl_src2); + // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.) + int64_t val = mir_graph_->ConstantValueWide(rl_src2); // Special handling using cbz & cbnz. if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { OpCmpImmBranch(ccode, rl_src1.reg, 0, taken); OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken); return; + } + // Only handle Imm if src2 is not already in a register. - } else if (rl_src2.location != kLocPhysReg) { + rl_src2 = UpdateLocWide(rl_src2); + if (rl_src2.location != kLocPhysReg) { OpRegImm64(kOpCmp, rl_src1.reg, val); OpCondBranch(ccode, taken); OpCondBranch(NegateComparison(ccode), not_taken); diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 5870d22208..048aca3735 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1046,9 +1046,19 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); - // fp regs already sorted. - for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { - vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + if (cu_->instruction_set == kThumb2) { + // fp regs already sorted. + for (uint32_t i = 0; i < fp_vmap_table_.size(); i++) { + vmap_encoder.PushBackUnsigned(fp_vmap_table_[i] + VmapTable::kEntryAdjustment); + } + } else { + // For other platforms regs may have been inserted out of order - sort first. + std::sort(fp_vmap_table_.begin(), fp_vmap_table_.end()); + for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { + // Copy, stripping out the phys register sort key. + vmap_encoder.PushBackUnsigned( + ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + } } } else { DCHECK_EQ(POPCOUNT(core_spill_mask_), 0); diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 6191e4b0a1..45dd7f08a6 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -96,7 +96,7 @@ MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { DCHECK_LT(arg, invoke->dalvikInsn.vA); - DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); + DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode)); if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { return invoke->dalvikInsn.vC + arg; // Non-range invoke. } else { @@ -107,7 +107,7 @@ uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) { DCHECK_LT(arg + 1, invoke->dalvikInsn.vA); - DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); + DCHECK(!MIR::DecodedInstruction::IsPseudoMirOp(invoke->dalvikInsn.opcode)); return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc || invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index caadc0ad89..07c615f342 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -1185,7 +1185,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { work_half->meta.throw_insn = mir; } - if (MIRGraph::IsPseudoMirOp(opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { HandleExtendedMethodMIR(bb, mir); continue; } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 48855012c3..87509b636c 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -531,7 +531,7 @@ class Mir2Lir : public Backend { LIRSlowPath(Mir2Lir* m2l, const DexOffset dexpc, LIR* fromfast, LIR* cont = nullptr) : m2l_(m2l), cu_(m2l->cu_), current_dex_pc_(dexpc), fromfast_(fromfast), cont_(cont) { - m2l->StartSlowPath(cont); + m2l->StartSlowPath(this); } virtual ~LIRSlowPath() {} virtual void Compile() = 0; @@ -705,17 +705,17 @@ class Mir2Lir : public Backend { int AssignLiteralOffset(CodeOffset offset); int AssignSwitchTablesOffset(CodeOffset offset); int AssignFillArrayDataOffset(CodeOffset offset); - LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); + virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal); void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec); void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec); - virtual void StartSlowPath(LIR *label) {} + virtual void StartSlowPath(LIRSlowPath* slowpath) {} virtual void BeginInvoke(CallInfo* info) {} virtual void EndInvoke(CallInfo* info) {} // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated. - RegLocation NarrowRegLoc(RegLocation loc); + virtual RegLocation NarrowRegLoc(RegLocation loc); // Shared by all targets - implemented in local_optimizations.cc void ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src); @@ -763,7 +763,7 @@ class Mir2Lir : public Backend { virtual bool IsTemp(RegStorage reg); bool IsPromoted(RegStorage reg); bool IsDirty(RegStorage reg); - void LockTemp(RegStorage reg); + virtual void LockTemp(RegStorage reg); void ResetDef(RegStorage reg); void NullifyRange(RegStorage reg, int s_reg); void MarkDef(RegLocation rl, LIR *start, LIR *finish); diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 9000514856..8e2a1e3532 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -234,8 +234,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); - /* NOTE: promotion of FP regs currently unsupported, thus no FP spill */ - DCHECK_EQ(num_fp_spills_, 0); + SpillFPRegs(); if (!skip_overflow_check) { class StackOverflowSlowPath : public LIRSlowPath { public: @@ -309,6 +308,7 @@ void X86Mir2Lir::GenExitSequence() { NewLIR0(kPseudoMethodExit); UnSpillCoreRegs(); + UnSpillFPRegs(); /* Remove frame except for return address */ stack_increment_ = OpRegImm(kOpAdd, rs_rX86_SP, frame_size_ - GetInstructionSetPointerSize(cu_->instruction_set)); NewLIR0(kX86Ret); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index ff7b30eeec..b0c54e86e9 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -319,6 +319,8 @@ class X86Mir2Lir : public Mir2Lir { void OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset); void SpillCoreRegs(); void UnSpillCoreRegs(); + void UnSpillFPRegs(); + void SpillFPRegs(); static const X86EncodingMap EncodingMap[kX86Last]; bool InexpensiveConstantInt(int32_t value); bool InexpensiveConstantFloat(int32_t value); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index e81f505f2f..1ebbbbd5ee 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -52,6 +52,13 @@ static constexpr RegStorage dp_regs_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 }; +static constexpr RegStorage xp_regs_arr_32[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, +}; +static constexpr RegStorage xp_regs_arr_64[] = { + rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, + rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 +}; static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; @@ -60,6 +67,24 @@ static constexpr RegStorage core_temps_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, rs_r8, rs_r9, rs_r10, rs_r11 }; + +// How to add register to be available for promotion: +// 1) Remove register from array defining temp +// 2) Update ClobberCallerSave +// 3) Update JNI compiler ABI: +// 3.1) add reg in JniCallingConvention method +// 3.2) update CoreSpillMask/FpSpillMask +// 4) Update entrypoints +// 4.1) Update constants in asm_support_x86_64.h for new frame size +// 4.2) Remove entry in SmashCallerSaves +// 4.3) Update jni_entrypoints to spill/unspill new callee save reg +// 4.4) Update quick_entrypoints to spill/unspill new callee save reg +// 5) Update runtime ABI +// 5.1) Update quick_method_frame_info with new required spills +// 5.2) Update QuickArgumentVisitor with new offsets to gprs and xmms +// Note that you cannot use register corresponding to incoming args +// according to ABI and QCG needs one additional XMM temp for +// bulk copy in preparation to call. static constexpr RegStorage core_temps_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, rs_r8q, rs_r9q, rs_r10q, rs_r11q @@ -69,14 +94,14 @@ static constexpr RegStorage sp_temps_arr_32[] = { }; static constexpr RegStorage sp_temps_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, - rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 + rs_fr8, rs_fr9, rs_fr10, rs_fr11 }; static constexpr RegStorage dp_temps_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; static constexpr RegStorage dp_temps_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, - rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 + rs_dr8, rs_dr9, rs_dr10, rs_dr11 }; static constexpr RegStorage xp_temps_arr_32[] = { @@ -84,7 +109,7 @@ static constexpr RegStorage xp_temps_arr_32[] = { }; static constexpr RegStorage xp_temps_arr_64[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, - rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 + rs_xr8, rs_xr9, rs_xr10, rs_xr11 }; static constexpr ArrayRef<const RegStorage> empty_pool; @@ -95,6 +120,8 @@ static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32); static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64); static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32); static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64); +static constexpr ArrayRef<const RegStorage> xp_regs_32(xp_regs_arr_32); +static constexpr ArrayRef<const RegStorage> xp_regs_64(xp_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64); static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q); @@ -437,21 +464,13 @@ bool X86Mir2Lir::IsByteRegister(RegStorage reg) { /* Clobber all regs that might be used by an external C call */ void X86Mir2Lir::ClobberCallerSave() { - Clobber(rs_rAX); - Clobber(rs_rCX); - Clobber(rs_rDX); - Clobber(rs_rBX); - - Clobber(rs_fr0); - Clobber(rs_fr1); - Clobber(rs_fr2); - Clobber(rs_fr3); - Clobber(rs_fr4); - Clobber(rs_fr5); - Clobber(rs_fr6); - Clobber(rs_fr7); - if (cu_->target64) { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rSI); + Clobber(rs_rDI); + Clobber(rs_r8); Clobber(rs_r9); Clobber(rs_r10); @@ -461,11 +480,21 @@ void X86Mir2Lir::ClobberCallerSave() { Clobber(rs_fr9); Clobber(rs_fr10); Clobber(rs_fr11); - Clobber(rs_fr12); - Clobber(rs_fr13); - Clobber(rs_fr14); - Clobber(rs_fr15); + } else { + Clobber(rs_rAX); + Clobber(rs_rCX); + Clobber(rs_rDX); + Clobber(rs_rBX); } + + Clobber(rs_fr0); + Clobber(rs_fr1); + Clobber(rs_fr2); + Clobber(rs_fr3); + Clobber(rs_fr4); + Clobber(rs_fr5); + Clobber(rs_fr6); + Clobber(rs_fr7); } RegLocation X86Mir2Lir::GetReturnWideAlt() { @@ -599,11 +628,15 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Target-specific adjustments. // Add in XMM registers. - const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; - for (RegStorage reg : *xp_temps) { + const ArrayRef<const RegStorage> *xp_regs = cu_->target64 ? &xp_regs_64 : &xp_regs_32; + for (RegStorage reg : *xp_regs) { RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); reginfo_map_.Put(reg.GetReg(), info); - info->SetIsTemp(true); + } + const ArrayRef<const RegStorage> *xp_temps = cu_->target64 ? &xp_temps_64 : &xp_temps_32; + for (RegStorage reg : *xp_temps) { + RegisterInfo* xp_reg_info = GetRegInfo(reg); + xp_reg_info->SetIsTemp(true); } // Alias single precision xmm to double xmms. @@ -665,9 +698,11 @@ void X86Mir2Lir::SpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - StoreWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + StoreBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } @@ -680,14 +715,46 @@ void X86Mir2Lir::UnSpillCoreRegs() { // Spill mask not including fake return address register uint32_t mask = core_spill_mask_ & ~(1 << rs_rRET.GetRegNum()); int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * num_core_spills_); + OpSize size = cu_->target64 ? k64 : k32; for (int reg = 0; mask; mask >>= 1, reg++) { if (mask & 0x1) { - LoadWordDisp(rs_rX86_SP, offset, RegStorage::Solo32(reg)); + LoadBaseDisp(rs_rX86_SP, offset, cu_->target64 ? RegStorage::Solo64(reg) : RegStorage::Solo32(reg), + size, kNotVolatile); offset += GetInstructionSetPointerSize(cu_->instruction_set); } } } +void X86Mir2Lir::SpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + StoreBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} +void X86Mir2Lir::UnSpillFPRegs() { + if (num_fp_spills_ == 0) { + return; + } + uint32_t mask = fp_spill_mask_; + int offset = frame_size_ - (GetInstructionSetPointerSize(cu_->instruction_set) * (num_fp_spills_ + num_core_spills_)); + for (int reg = 0; mask; mask >>= 1, reg++) { + if (mask & 0x1) { + LoadBaseDisp(rs_rX86_SP, offset, RegStorage::FloatSolo64(reg), + k64, kNotVolatile); + offset += sizeof(double); + } + } +} + + bool X86Mir2Lir::IsUnconditionalBranch(LIR* lir) { return (lir->opcode == kX86Jmp8 || lir->opcode == kX86Jmp32); } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index 657160ffd1..5c7c91b5b5 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -917,7 +917,7 @@ void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) { for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; - if (MIRGraph::IsPseudoMirOp(opcode)) { + if (MIR::DecodedInstruction::IsPseudoMirOp(opcode)) { AnalyzeExtendedMIR(opcode, bb, mir); } else { AnalyzeMIR(opcode, bb, mir); diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 2789923bb9..56573810ca 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -66,7 +66,9 @@ namespace art { * XMM6: caller | caller, arg7 | caller, scratch | caller, arg7, scratch * XMM7: caller | caller, arg8 | caller, scratch | caller, arg8, scratch * --- x86-64/x32 registers - * XMM8 .. 15: caller save available as scratch registers for ART. + * XMM8 .. 11: caller save available as scratch registers for ART. + * XMM12 .. 15: callee save available as promoted registers for ART. + * This change (XMM12..15) is for QCG only, for others they are caller save. * * X87 is a necessary evil outside of ART code for x86: * ST0: x86 float/double native return value, caller save |