| /* |
| * Copyright (C) 2012 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* This file contains codegen for the X86 ISA */ |
| |
| #include "codegen_x86.h" |
| #include "dex/quick/mir_to_lir-inl.h" |
| #include "dex/reg_storage_eq.h" |
| #include "mirror/art_method.h" |
| #include "mirror/array-inl.h" |
| #include "x86_lir.h" |
| |
| namespace art { |
| |
| /* |
| * Compare two 64-bit values |
| * x = y return 0 |
| * x < y return -1 |
| * x > y return 1 |
| */ |
| void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2) { |
| if (cu_->target64) { |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| RegStorage temp_reg = AllocTemp(); |
| OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); |
| NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondG); // result = (src1 > src2) ? 1 : 0 |
| NewLIR2(kX86Set8R, temp_reg.GetReg(), kX86CondL); // temp = (src1 >= src2) ? 0 : 1 |
| NewLIR2(kX86Sub8RR, rl_result.reg.GetReg(), temp_reg.GetReg()); |
| NewLIR2(kX86Movsx8qRR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); |
| |
| StoreValue(rl_dest, rl_result); |
| FreeTemp(temp_reg); |
| return; |
| } |
| |
| // Prepare for explicit register usage |
| ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); |
| RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); |
| RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); |
| LoadValueDirectWideFixed(rl_src1, r_tmp1); |
| LoadValueDirectWideFixed(rl_src2, r_tmp2); |
| // Compute (r1:r0) = (r1:r0) - (r3:r2) |
| OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 |
| OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF |
| NewLIR2(kX86Set8R, rs_r2.GetReg(), kX86CondL); // r2 = (r1:r0) < (r3:r2) ? 1 : 0 |
| NewLIR2(kX86Movzx8RR, rs_r2.GetReg(), rs_r2.GetReg()); |
| OpReg(kOpNeg, rs_r2); // r2 = -r2 |
| OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = high | low - sets ZF |
| NewLIR2(kX86Set8R, rs_r0.GetReg(), kX86CondNz); // r0 = (r1:r0) != (r3:r2) ? 1 : 0 |
| NewLIR2(kX86Movzx8RR, r0, r0); |
| OpRegReg(kOpOr, rs_r0, rs_r2); // r0 = r0 | r2 |
| RegLocation rl_result = LocCReturn(); |
| StoreValue(rl_dest, rl_result); |
| } |
| |
| X86ConditionCode X86ConditionEncoding(ConditionCode cond) { |
| switch (cond) { |
| case kCondEq: return kX86CondEq; |
| case kCondNe: return kX86CondNe; |
| case kCondCs: return kX86CondC; |
| case kCondCc: return kX86CondNc; |
| case kCondUlt: return kX86CondC; |
| case kCondUge: return kX86CondNc; |
| case kCondMi: return kX86CondS; |
| case kCondPl: return kX86CondNs; |
| case kCondVs: return kX86CondO; |
| case kCondVc: return kX86CondNo; |
| case kCondHi: return kX86CondA; |
| case kCondLs: return kX86CondBe; |
| case kCondGe: return kX86CondGe; |
| case kCondLt: return kX86CondL; |
| case kCondGt: return kX86CondG; |
| case kCondLe: return kX86CondLe; |
| case kCondAl: |
| case kCondNv: LOG(FATAL) << "Should not reach here"; |
| } |
| return kX86CondO; |
| } |
| |
| LIR* X86Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) { |
| NewLIR2(src1.Is64Bit() ? kX86Cmp64RR : kX86Cmp32RR, src1.GetReg(), src2.GetReg()); |
| X86ConditionCode cc = X86ConditionEncoding(cond); |
| LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , |
| cc); |
| branch->target = target; |
| return branch; |
| } |
| |
| LIR* X86Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, |
| int check_value, LIR* target) { |
| if ((check_value == 0) && (cond == kCondEq || cond == kCondNe)) { |
| // TODO: when check_value == 0 and reg is rCX, use the jcxz/nz opcode |
| NewLIR2(reg.Is64Bit() ? kX86Test64RR: kX86Test32RR, reg.GetReg(), reg.GetReg()); |
| } else { |
| if (reg.Is64Bit()) { |
| NewLIR2(IS_SIMM8(check_value) ? kX86Cmp64RI8 : kX86Cmp64RI, reg.GetReg(), check_value); |
| } else { |
| NewLIR2(IS_SIMM8(check_value) ? kX86Cmp32RI8 : kX86Cmp32RI, reg.GetReg(), check_value); |
| } |
| } |
| X86ConditionCode cc = X86ConditionEncoding(cond); |
| LIR* branch = NewLIR2(kX86Jcc8, 0 /* lir operand for Jcc offset */ , cc); |
| branch->target = target; |
| return branch; |
| } |
| |
| LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) { |
| // If src or dest is a pair, we'll be using low reg. |
| if (r_dest.IsPair()) { |
| r_dest = r_dest.GetLow(); |
| } |
| if (r_src.IsPair()) { |
| r_src = r_src.GetLow(); |
| } |
| if (r_dest.IsFloat() || r_src.IsFloat()) |
| return OpFpRegCopy(r_dest, r_src); |
| LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR, |
| r_dest.GetReg(), r_src.GetReg()); |
| if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) { |
| res->flags.is_nop = true; |
| } |
| return res; |
| } |
| |
| void X86Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) { |
| if (r_dest != r_src) { |
| LIR *res = OpRegCopyNoInsert(r_dest, r_src); |
| AppendLIR(res); |
| } |
| } |
| |
| void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) { |
| if (r_dest != r_src) { |
| bool dest_fp = r_dest.IsFloat(); |
| bool src_fp = r_src.IsFloat(); |
| if (dest_fp) { |
| if (src_fp) { |
| OpRegCopy(r_dest, r_src); |
| } else { |
| // TODO: Prevent this from happening in the code. The result is often |
| // unused or could have been loaded more easily from memory. |
| if (!r_src.IsPair()) { |
| DCHECK(!r_dest.IsPair()); |
| NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg()); |
| } else { |
| NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg()); |
| RegStorage r_tmp = AllocTempDouble(); |
| NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg()); |
| NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg()); |
| FreeTemp(r_tmp); |
| } |
| } |
| } else { |
| if (src_fp) { |
| if (!r_dest.IsPair()) { |
| DCHECK(!r_src.IsPair()); |
| NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg()); |
| } else { |
| NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg()); |
| RegStorage temp_reg = AllocTempDouble(); |
| NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg()); |
| NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32); |
| NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg()); |
| } |
| } else { |
| DCHECK_EQ(r_dest.IsPair(), r_src.IsPair()); |
| if (!r_src.IsPair()) { |
| // Just copy the register directly. |
| OpRegCopy(r_dest, r_src); |
| } else { |
| // Handle overlap |
| if (r_src.GetHighReg() == r_dest.GetLowReg() && |
| r_src.GetLowReg() == r_dest.GetHighReg()) { |
| // Deal with cycles. |
| RegStorage temp_reg = AllocTemp(); |
| OpRegCopy(temp_reg, r_dest.GetHigh()); |
| OpRegCopy(r_dest.GetHigh(), r_dest.GetLow()); |
| OpRegCopy(r_dest.GetLow(), temp_reg); |
| FreeTemp(temp_reg); |
| } else if (r_src.GetHighReg() == r_dest.GetLowReg()) { |
| OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); |
| OpRegCopy(r_dest.GetLow(), r_src.GetLow()); |
| } else { |
| OpRegCopy(r_dest.GetLow(), r_src.GetLow()); |
| OpRegCopy(r_dest.GetHigh(), r_src.GetHigh()); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| void X86Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code, |
| int32_t true_val, int32_t false_val, RegStorage rs_dest, |
| int dest_reg_class) { |
| DCHECK(!left_op.IsPair() && !right_op.IsPair() && !rs_dest.IsPair()); |
| DCHECK(!left_op.IsFloat() && !right_op.IsFloat() && !rs_dest.IsFloat()); |
| |
| // We really need this check for correctness, otherwise we will need to do more checks in |
| // non zero/one case |
| if (true_val == false_val) { |
| LoadConstantNoClobber(rs_dest, true_val); |
| return; |
| } |
| |
| const bool dest_intersect = IsSameReg(rs_dest, left_op) || IsSameReg(rs_dest, right_op); |
| |
| const bool zero_one_case = (true_val == 0 && false_val == 1) || (true_val == 1 && false_val == 0); |
| if (zero_one_case && IsByteRegister(rs_dest)) { |
| if (!dest_intersect) { |
| LoadConstantNoClobber(rs_dest, 0); |
| } |
| OpRegReg(kOpCmp, left_op, right_op); |
| // Set the low byte of the result to 0 or 1 from the compare condition code. |
| NewLIR2(kX86Set8R, rs_dest.GetReg(), |
| X86ConditionEncoding(true_val == 1 ? code : FlipComparisonOrder(code))); |
| if (dest_intersect) { |
| NewLIR2(rs_dest.Is64Bit() ? kX86Movzx8qRR : kX86Movzx8RR, rs_dest.GetReg(), rs_dest.GetReg()); |
| } |
| } else { |
| // Be careful rs_dest can be changed only after cmp because it can be the same as one of ops |
| // and it cannot use xor because it makes cc flags to be dirty |
| RegStorage temp_reg = AllocTypedTemp(false, dest_reg_class, false); |
| if (temp_reg.Valid()) { |
| if (false_val == 0 && dest_intersect) { |
| code = FlipComparisonOrder(code); |
| std::swap(true_val, false_val); |
| } |
| if (!dest_intersect) { |
| LoadConstantNoClobber(rs_dest, false_val); |
| } |
| LoadConstantNoClobber(temp_reg, true_val); |
| OpRegReg(kOpCmp, left_op, right_op); |
| if (dest_intersect) { |
| LoadConstantNoClobber(rs_dest, false_val); |
| DCHECK(!last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); |
| } |
| OpCondRegReg(kOpCmov, code, rs_dest, temp_reg); |
| FreeTemp(temp_reg); |
| } else { |
| // slow path |
| LIR* cmp_branch = OpCmpBranch(code, left_op, right_op, nullptr); |
| LoadConstantNoClobber(rs_dest, false_val); |
| LIR* that_is_it = NewLIR1(kX86Jmp8, 0); |
| LIR* true_case = NewLIR0(kPseudoTargetLabel); |
| cmp_branch->target = true_case; |
| LoadConstantNoClobber(rs_dest, true_val); |
| LIR* end = NewLIR0(kPseudoTargetLabel); |
| that_is_it->target = end; |
| } |
| } |
| } |
| |
| void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { |
| RegLocation rl_result; |
| RegLocation rl_src = mir_graph_->GetSrc(mir, 0); |
| RegLocation rl_dest = mir_graph_->GetDest(mir); |
| // Avoid using float regs here. |
| RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; |
| RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; |
| ConditionCode ccode = mir->meta.ccode; |
| |
| // The kMirOpSelect has two variants, one for constants and one for moves. |
| const bool is_constant_case = (mir->ssa_rep->num_uses == 1); |
| |
| if (is_constant_case) { |
| int true_val = mir->dalvikInsn.vB; |
| int false_val = mir->dalvikInsn.vC; |
| |
| // simplest strange case |
| if (true_val == false_val) { |
| rl_result = EvalLoc(rl_dest, result_reg_class, true); |
| LoadConstantNoClobber(rl_result.reg, true_val); |
| } else { |
| // TODO: use GenSelectConst32 and handle additional opcode patterns such as |
| // "cmp; setcc; movzx" or "cmp; sbb r0,r0; and r0,$mask; add r0,$literal". |
| rl_src = LoadValue(rl_src, src_reg_class); |
| rl_result = EvalLoc(rl_dest, result_reg_class, true); |
| /* |
| * For ccode == kCondEq: |
| * |
| * 1) When the true case is zero and result_reg is not same as src_reg: |
| * xor result_reg, result_reg |
| * cmp $0, src_reg |
| * mov t1, $false_case |
| * cmovnz result_reg, t1 |
| * 2) When the false case is zero and result_reg is not same as src_reg: |
| * xor result_reg, result_reg |
| * cmp $0, src_reg |
| * mov t1, $true_case |
| * cmovz result_reg, t1 |
| * 3) All other cases (we do compare first to set eflags): |
| * cmp $0, src_reg |
| * mov result_reg, $false_case |
| * mov t1, $true_case |
| * cmovz result_reg, t1 |
| */ |
| // FIXME: depending on how you use registers you could get a false != mismatch when dealing |
| // with different views of the same underlying physical resource (i.e. solo32 vs. solo64). |
| const bool result_reg_same_as_src = |
| (rl_src.location == kLocPhysReg && rl_src.reg.GetRegNum() == rl_result.reg.GetRegNum()); |
| const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); |
| const bool false_zero_case = (false_val == 0 && true_val != 0 && !result_reg_same_as_src); |
| const bool catch_all_case = !(true_zero_case || false_zero_case); |
| |
| if (true_zero_case || false_zero_case) { |
| OpRegReg(kOpXor, rl_result.reg, rl_result.reg); |
| } |
| |
| if (true_zero_case || false_zero_case || catch_all_case) { |
| OpRegImm(kOpCmp, rl_src.reg, 0); |
| } |
| |
| if (catch_all_case) { |
| OpRegImm(kOpMov, rl_result.reg, false_val); |
| } |
| |
| if (true_zero_case || false_zero_case || catch_all_case) { |
| ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; |
| int immediateForTemp = true_zero_case ? false_val : true_val; |
| RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class); |
| OpRegImm(kOpMov, temp1_reg, immediateForTemp); |
| |
| OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); |
| |
| FreeTemp(temp1_reg); |
| } |
| } |
| } else { |
| rl_src = LoadValue(rl_src, src_reg_class); |
| RegLocation rl_true = mir_graph_->GetSrc(mir, 1); |
| RegLocation rl_false = mir_graph_->GetSrc(mir, 2); |
| rl_true = LoadValue(rl_true, result_reg_class); |
| rl_false = LoadValue(rl_false, result_reg_class); |
| rl_result = EvalLoc(rl_dest, result_reg_class, true); |
| |
| /* |
| * For ccode == kCondEq: |
| * |
| * 1) When true case is already in place: |
| * cmp $0, src_reg |
| * cmovnz result_reg, false_reg |
| * 2) When false case is already in place: |
| * cmp $0, src_reg |
| * cmovz result_reg, true_reg |
| * 3) When neither cases are in place: |
| * cmp $0, src_reg |
| * mov result_reg, false_reg |
| * cmovz result_reg, true_reg |
| */ |
| |
| // kMirOpSelect is generated just for conditional cases when comparison is done with zero. |
| OpRegImm(kOpCmp, rl_src.reg, 0); |
| |
| if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { |
| OpCondRegReg(kOpCmov, NegateComparison(ccode), rl_result.reg, rl_false.reg); |
| } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { |
| OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); |
| } else { |
| OpRegCopy(rl_result.reg, rl_false.reg); |
| OpCondRegReg(kOpCmov, ccode, rl_result.reg, rl_true.reg); |
| } |
| } |
| |
| StoreValue(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { |
| LIR* taken = &block_label_list_[bb->taken]; |
| RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); |
| RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); |
| ConditionCode ccode = mir->meta.ccode; |
| |
| if (rl_src1.is_const) { |
| std::swap(rl_src1, rl_src2); |
| ccode = FlipComparisonOrder(ccode); |
| } |
| if (rl_src2.is_const) { |
| // Do special compare/branch against simple const operand |
| int64_t val = mir_graph_->ConstantValueWide(rl_src2); |
| GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); |
| return; |
| } |
| |
| if (cu_->target64) { |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| |
| OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); |
| OpCondBranch(ccode, taken); |
| return; |
| } |
| |
| // Prepare for explicit register usage |
| ExplicitTempRegisterLock(this, 4, &rs_r0, &rs_r1, &rs_r2, &rs_r3); |
| RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1); |
| RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_r2, rs_r3); |
| LoadValueDirectWideFixed(rl_src1, r_tmp1); |
| LoadValueDirectWideFixed(rl_src2, r_tmp2); |
| |
| // Swap operands and condition code to prevent use of zero flag. |
| if (ccode == kCondLe || ccode == kCondGt) { |
| // Compute (r3:r2) = (r3:r2) - (r1:r0) |
| OpRegReg(kOpSub, rs_r2, rs_r0); // r2 = r2 - r0 |
| OpRegReg(kOpSbc, rs_r3, rs_r1); // r3 = r3 - r1 - CF |
| } else { |
| // Compute (r1:r0) = (r1:r0) - (r3:r2) |
| OpRegReg(kOpSub, rs_r0, rs_r2); // r0 = r0 - r2 |
| OpRegReg(kOpSbc, rs_r1, rs_r3); // r1 = r1 - r3 - CF |
| } |
| switch (ccode) { |
| case kCondEq: |
| case kCondNe: |
| OpRegReg(kOpOr, rs_r0, rs_r1); // r0 = r0 | r1 |
| break; |
| case kCondLe: |
| ccode = kCondGe; |
| break; |
| case kCondGt: |
| ccode = kCondLt; |
| break; |
| case kCondLt: |
| case kCondGe: |
| break; |
| default: |
| LOG(FATAL) << "Unexpected ccode: " << ccode; |
| } |
| OpCondBranch(ccode, taken); |
| } |
| |
| void X86Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, |
| int64_t val, ConditionCode ccode) { |
| int32_t val_lo = Low32Bits(val); |
| int32_t val_hi = High32Bits(val); |
| LIR* taken = &block_label_list_[bb->taken]; |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| bool is_equality_test = ccode == kCondEq || ccode == kCondNe; |
| |
| if (cu_->target64) { |
| if (is_equality_test && val == 0) { |
| // We can simplify of comparing for ==, != to 0. |
| NewLIR2(kX86Test64RR, rl_src1.reg.GetReg(), rl_src1.reg.GetReg()); |
| } else if (is_equality_test && val_hi == 0 && val_lo > 0) { |
| OpRegImm(kOpCmp, rl_src1.reg, val_lo); |
| } else { |
| RegStorage tmp = AllocTypedTempWide(false, kCoreReg); |
| LoadConstantWide(tmp, val); |
| OpRegReg(kOpCmp, rl_src1.reg, tmp); |
| FreeTemp(tmp); |
| } |
| OpCondBranch(ccode, taken); |
| return; |
| } |
| |
| if (is_equality_test && val != 0) { |
| rl_src1 = ForceTempWide(rl_src1); |
| } |
| RegStorage low_reg = rl_src1.reg.GetLow(); |
| RegStorage high_reg = rl_src1.reg.GetHigh(); |
| |
| if (is_equality_test) { |
| // We can simplify of comparing for ==, != to 0. |
| if (val == 0) { |
| if (IsTemp(low_reg)) { |
| OpRegReg(kOpOr, low_reg, high_reg); |
| // We have now changed it; ignore the old values. |
| Clobber(rl_src1.reg); |
| } else { |
| RegStorage t_reg = AllocTemp(); |
| OpRegRegReg(kOpOr, t_reg, low_reg, high_reg); |
| FreeTemp(t_reg); |
| } |
| OpCondBranch(ccode, taken); |
| return; |
| } |
| |
| // Need to compute the actual value for ==, !=. |
| OpRegImm(kOpSub, low_reg, val_lo); |
| NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); |
| OpRegReg(kOpOr, high_reg, low_reg); |
| Clobber(rl_src1.reg); |
| } else if (ccode == kCondLe || ccode == kCondGt) { |
| // Swap operands and condition code to prevent use of zero flag. |
| RegStorage tmp = AllocTypedTempWide(false, kCoreReg); |
| LoadConstantWide(tmp, val); |
| OpRegReg(kOpSub, tmp.GetLow(), low_reg); |
| OpRegReg(kOpSbc, tmp.GetHigh(), high_reg); |
| ccode = (ccode == kCondLe) ? kCondGe : kCondLt; |
| FreeTemp(tmp); |
| } else { |
| // We can use a compare for the low word to set CF. |
| OpRegImm(kOpCmp, low_reg, val_lo); |
| if (IsTemp(high_reg)) { |
| NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi); |
| // We have now changed it; ignore the old values. |
| Clobber(rl_src1.reg); |
| } else { |
| // mov temp_reg, high_reg; sbb temp_reg, high_constant |
| RegStorage t_reg = AllocTemp(); |
| OpRegCopy(t_reg, high_reg); |
| NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi); |
| FreeTemp(t_reg); |
| } |
| } |
| |
| OpCondBranch(ccode, taken); |
| } |
| |
| void X86Mir2Lir::CalculateMagicAndShift(int64_t divisor, int64_t& magic, int& shift, bool is_long) { |
| // It does not make sense to calculate magic and shift for zero divisor. |
| DCHECK_NE(divisor, 0); |
| |
| /* According to H.S.Warren's Hacker's Delight Chapter 10 and |
| * T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. |
| * The magic number M and shift S can be calculated in the following way: |
| * Let nc be the most positive value of numerator(n) such that nc = kd - 1, |
| * where divisor(d) >=2. |
| * Let nc be the most negative value of numerator(n) such that nc = kd + 1, |
| * where divisor(d) <= -2. |
| * Thus nc can be calculated like: |
| * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long |
| * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long |
| * |
| * So the shift p is the smallest p satisfying |
| * 2^p > nc * (d - 2^p % d), where d >= 2 |
| * 2^p > nc * (d + 2^p % d), where d <= -2. |
| * |
| * the magic number M is calcuated by |
| * M = (2^p + d - 2^p % d) / d, where d >= 2 |
| * M = (2^p - d - 2^p % d) / d, where d <= -2. |
| * |
| * Notice that p is always bigger than or equal to 32/64, so we just return 32-p/64-p as |
| * the shift number S. |
| */ |
| |
| int64_t p = (is_long) ? 63 : 31; |
| const uint64_t exp = (is_long) ? 0x8000000000000000ULL : 0x80000000U; |
| |
| // Initialize the computations. |
| uint64_t abs_d = (divisor >= 0) ? divisor : -divisor; |
| uint64_t tmp = exp + ((is_long) ? static_cast<uint64_t>(divisor) >> 63 : |
| static_cast<uint32_t>(divisor) >> 31); |
| uint64_t abs_nc = tmp - 1 - tmp % abs_d; |
| uint64_t quotient1 = exp / abs_nc; |
| uint64_t remainder1 = exp % abs_nc; |
| uint64_t quotient2 = exp / abs_d; |
| uint64_t remainder2 = exp % abs_d; |
| |
| /* |
| * To avoid handling both positive and negative divisor, Hacker's Delight |
| * introduces a method to handle these 2 cases together to avoid duplication. |
| */ |
| uint64_t delta; |
| do { |
| p++; |
| quotient1 = 2 * quotient1; |
| remainder1 = 2 * remainder1; |
| if (remainder1 >= abs_nc) { |
| quotient1++; |
| remainder1 = remainder1 - abs_nc; |
| } |
| quotient2 = 2 * quotient2; |
| remainder2 = 2 * remainder2; |
| if (remainder2 >= abs_d) { |
| quotient2++; |
| remainder2 = remainder2 - abs_d; |
| } |
| delta = abs_d - remainder2; |
| } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0)); |
| |
| magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1); |
| |
| if (!is_long) { |
| magic = static_cast<int>(magic); |
| } |
| |
| shift = (is_long) ? p - 64 : p - 32; |
| } |
| |
| RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div) { |
| LOG(FATAL) << "Unexpected use of GenDivRemLit for x86"; |
| return rl_dest; |
| } |
| |
| RegLocation X86Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src, |
| int imm, bool is_div) { |
| // Use a multiply (and fixup) to perform an int div/rem by a constant. |
| RegLocation rl_result; |
| |
| if (imm == 1) { |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| if (is_div) { |
| // x / 1 == x. |
| LoadValueDirectFixed(rl_src, rl_result.reg); |
| } else { |
| // x % 1 == 0. |
| LoadConstantNoClobber(rl_result.reg, 0); |
| } |
| } else if (imm == -1) { // handle 0x80000000 / -1 special case. |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| if (is_div) { |
| LoadValueDirectFixed(rl_src, rl_result.reg); |
| |
| // Check if numerator is 0 |
| OpRegImm(kOpCmp, rl_result.reg, 0); |
| LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); |
| |
| // handle 0x80000000 / -1 |
| OpRegImm(kOpCmp, rl_result.reg, 0x80000000); |
| LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); |
| |
| // for x != MIN_INT, x / -1 == -x. |
| NewLIR1(kX86Neg32R, rl_result.reg.GetReg()); |
| |
| // EAX already contains the right value (0x80000000), |
| minint_branch->target = NewLIR0(kPseudoTargetLabel); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| } else { |
| // x % -1 == 0. |
| LoadConstantNoClobber(rl_result.reg, 0); |
| } |
| } else if (is_div && IsPowerOfTwo(std::abs(imm))) { |
| // Division using shifting. |
| rl_src = LoadValue(rl_src, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| if (IsSameReg(rl_result.reg, rl_src.reg)) { |
| RegStorage rs_temp = AllocTypedTemp(false, kCoreReg); |
| rl_result.reg.SetReg(rs_temp.GetReg()); |
| } |
| |
| // Check if numerator is 0 |
| OpRegImm(kOpCmp, rl_src.reg, 0); |
| LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| LoadConstantNoClobber(rl_result.reg, 0); |
| LIR* done = NewLIR1(kX86Jmp8, 0); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| |
| NewLIR3(kX86Lea32RM, rl_result.reg.GetReg(), rl_src.reg.GetReg(), std::abs(imm) - 1); |
| NewLIR2(kX86Test32RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); |
| OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); |
| int shift_amount = LowestSetBit(imm); |
| OpRegImm(kOpAsr, rl_result.reg, shift_amount); |
| if (imm < 0) { |
| OpReg(kOpNeg, rl_result.reg); |
| } |
| done->target = NewLIR0(kPseudoTargetLabel); |
| } else { |
| CHECK(imm <= -2 || imm >= 2); |
| |
| // Use H.S.Warren's Hacker's Delight Chapter 10 and |
| // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. |
| int64_t magic; |
| int shift; |
| CalculateMagicAndShift((int64_t)imm, magic, shift, false /* is_long */); |
| |
| /* |
| * For imm >= 2, |
| * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 |
| * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. |
| * For imm <= -2, |
| * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 |
| * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. |
| * We implement this algorithm in the following way: |
| * 1. multiply magic number m and numerator n, get the higher 32bit result in EDX |
| * 2. if imm > 0 and magic < 0, add numerator to EDX |
| * if imm < 0 and magic > 0, sub numerator from EDX |
| * 3. if S !=0, SAR S bits for EDX |
| * 4. add 1 to EDX if EDX < 0 |
| * 5. Thus, EDX is the quotient |
| */ |
| |
| FlushReg(rs_r0); |
| Clobber(rs_r0); |
| LockTemp(rs_r0); |
| FlushReg(rs_r2); |
| Clobber(rs_r2); |
| LockTemp(rs_r2); |
| |
| // Assume that the result will be in EDX for divide, and EAX for remainder. |
| rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, is_div ? rs_r2 : rs_r0, |
| INVALID_SREG, INVALID_SREG}; |
| |
| // We need the value at least twice. Load into a temp. |
| rl_src = LoadValue(rl_src, kCoreReg); |
| RegStorage numerator_reg = rl_src.reg; |
| |
| // Check if numerator is 0. |
| OpRegImm(kOpCmp, numerator_reg, 0); |
| LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| // Return result 0 if numerator was 0. |
| LoadConstantNoClobber(rl_result.reg, 0); |
| LIR* done = NewLIR1(kX86Jmp8, 0); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| |
| // EAX = magic. |
| LoadConstant(rs_r0, magic); |
| |
| // EDX:EAX = magic * numerator. |
| NewLIR1(kX86Imul32DaR, numerator_reg.GetReg()); |
| |
| if (imm > 0 && magic < 0) { |
| // Add numerator to EDX. |
| DCHECK(numerator_reg.Valid()); |
| NewLIR2(kX86Add32RR, rs_r2.GetReg(), numerator_reg.GetReg()); |
| } else if (imm < 0 && magic > 0) { |
| DCHECK(numerator_reg.Valid()); |
| NewLIR2(kX86Sub32RR, rs_r2.GetReg(), numerator_reg.GetReg()); |
| } |
| |
| // Do we need the shift? |
| if (shift != 0) { |
| // Shift EDX by 'shift' bits. |
| NewLIR2(kX86Sar32RI, rs_r2.GetReg(), shift); |
| } |
| |
| // Add 1 to EDX if EDX < 0. |
| |
| // Move EDX to EAX. |
| OpRegCopy(rs_r0, rs_r2); |
| |
| // Move sign bit to bit 0, zeroing the rest. |
| NewLIR2(kX86Shr32RI, rs_r2.GetReg(), 31); |
| |
| // EDX = EDX + EAX. |
| NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r0.GetReg()); |
| |
| // Quotient is in EDX. |
| if (!is_div) { |
| // We need to compute the remainder. |
| // Remainder is divisor - (quotient * imm). |
| DCHECK(numerator_reg.Valid()); |
| OpRegCopy(rs_r0, numerator_reg); |
| |
| // EAX = numerator * imm. |
| OpRegRegImm(kOpMul, rs_r2, rs_r2, imm); |
| |
| // EAX -= EDX. |
| NewLIR2(kX86Sub32RR, rs_r0.GetReg(), rs_r2.GetReg()); |
| |
| // For this case, return the result in EAX. |
| } |
| done->target = NewLIR0(kPseudoTargetLabel); |
| } |
| |
| return rl_result; |
| } |
| |
| RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, |
| bool is_div) { |
| LOG(FATAL) << "Unexpected use of GenDivRem for x86"; |
| return rl_dest; |
| } |
| |
| RegLocation X86Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, bool is_div, int flags) { |
| // We have to use fixed registers, so flush all the temps. |
| |
| // Prepare for explicit register usage. |
| ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); |
| |
| // Load LHS into EAX. |
| LoadValueDirectFixed(rl_src1, rs_r0); |
| |
| // Load RHS into EBX. |
| LoadValueDirectFixed(rl_src2, rs_r1); |
| |
| // Copy LHS sign bit into EDX. |
| NewLIR0(kx86Cdq32Da); |
| |
| if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { |
| // Handle division by zero case. |
| GenDivZeroCheck(rs_r1); |
| } |
| |
| // Check if numerator is 0 |
| OpRegImm(kOpCmp, rs_r0, 0); |
| LIR* branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); |
| |
| // Have to catch 0x80000000/-1 case, or we will get an exception! |
| OpRegImm(kOpCmp, rs_r1, -1); |
| LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| |
| // RHS is -1. |
| OpRegImm(kOpCmp, rs_r0, 0x80000000); |
| LIR* minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| |
| // In 0x80000000/-1 case. |
| if (!is_div) { |
| // For DIV, EAX is already right. For REM, we need EDX 0. |
| LoadConstantNoClobber(rs_r2, 0); |
| } |
| LIR* done = NewLIR1(kX86Jmp8, 0); |
| |
| // Expected case. |
| minus_one_branch->target = NewLIR0(kPseudoTargetLabel); |
| minint_branch->target = minus_one_branch->target; |
| NewLIR1(kX86Idivmod32DaR, rs_r1.GetReg()); |
| done->target = NewLIR0(kPseudoTargetLabel); |
| |
| // Result is in EAX for div and EDX for rem. |
| RegLocation rl_result = {kLocPhysReg, 0, 0, 0, 0, 0, 0, 0, 1, rs_r0, INVALID_SREG, INVALID_SREG}; |
| if (!is_div) { |
| rl_result.reg.SetReg(r2); |
| } |
| return rl_result; |
| } |
| |
| bool X86Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { |
| DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); |
| |
| if (is_long && !cu_->target64) { |
| /* |
| * We want to implement the following algorithm |
| * mov eax, low part of arg1 |
| * mov edx, high part of arg1 |
| * mov ebx, low part of arg2 |
| * mov ecx, high part of arg2 |
| * mov edi, eax |
| * sub edi, ebx |
| * mov edi, edx |
| * sbb edi, ecx |
| * is_min ? "cmovgel eax, ebx" : "cmovll eax, ebx" |
| * is_min ? "cmovgel edx, ecx" : "cmovll edx, ecx" |
| * |
| * The algorithm above needs 5 registers: a pair for the first operand |
| * (which later will be used as result), a pair for the second operand |
| * and a temp register (e.g. 'edi') for intermediate calculations. |
| * Ideally we have 6 GP caller-save registers in 32-bit mode. They are: |
| * 'eax', 'ebx', 'ecx', 'edx', 'esi' and 'edi'. So there should be |
| * always enough registers to operate on. Practically, there is a pair |
| * of registers 'edi' and 'esi' which holds promoted values and |
| * sometimes should be treated as 'callee save'. If one of the operands |
| * is in the promoted registers then we have enough register to |
| * operate on. Otherwise there is lack of resources and we have to |
| * save 'edi' before calculations and restore after. |
| */ |
| |
| RegLocation rl_src1 = info->args[0]; |
| RegLocation rl_src2 = info->args[2]; |
| RegLocation rl_dest = InlineTargetWide(info); |
| int res_vreg, src1_vreg, src2_vreg; |
| |
| if (rl_dest.s_reg_low == INVALID_SREG) { |
| // Result is unused, the code is dead. Inlining successful, no code generated. |
| return true; |
| } |
| |
| /* |
| * If the result register is the same as the second element, then we |
| * need to be careful. The reason is that the first copy will |
| * inadvertently clobber the second element with the first one thus |
| * yielding the wrong result. Thus we do a swap in that case. |
| */ |
| res_vreg = mir_graph_->SRegToVReg(rl_dest.s_reg_low); |
| src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low); |
| if (res_vreg == src2_vreg) { |
| std::swap(rl_src1, rl_src2); |
| } |
| |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| |
| // Pick the first integer as min/max. |
| OpRegCopyWide(rl_result.reg, rl_src1.reg); |
| |
| /* |
| * If the integers are both in the same register, then there is |
| * nothing else to do because they are equal and we have already |
| * moved one into the result. |
| */ |
| src1_vreg = mir_graph_->SRegToVReg(rl_src1.s_reg_low); |
| src2_vreg = mir_graph_->SRegToVReg(rl_src2.s_reg_low); |
| if (src1_vreg == src2_vreg) { |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| // Free registers to make some room for the second operand. |
| // But don't try to free ourselves or promoted registers. |
| if (res_vreg != src1_vreg && |
| IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { |
| FreeTemp(rl_src1.reg); |
| } |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| |
| // Do we have a free register for intermediate calculations? |
| RegStorage tmp = AllocTemp(false); |
| if (tmp == RegStorage::InvalidReg()) { |
| /* |
| * No, will use 'edi'. |
| * |
| * As mentioned above we have 4 temporary and 2 promotable |
| * caller-save registers. Therefore, we assume that a free |
| * register can be allocated only if 'esi' and 'edi' are |
| * already used as operands. If number of promotable registers |
| * increases from 2 to 4 then our assumption fails and operand |
| * data is corrupted. |
| * Let's DCHECK it. |
| */ |
| DCHECK(IsTemp(rl_src2.reg.GetLow()) && |
| IsTemp(rl_src2.reg.GetHigh()) && |
| IsTemp(rl_result.reg.GetLow()) && |
| IsTemp(rl_result.reg.GetHigh())); |
| tmp = rs_rDI; |
| NewLIR1(kX86Push32R, tmp.GetReg()); |
| } |
| |
| // Now we are ready to do calculations. |
| OpRegReg(kOpMov, tmp, rl_result.reg.GetLow()); |
| OpRegReg(kOpSub, tmp, rl_src2.reg.GetLow()); |
| OpRegReg(kOpMov, tmp, rl_result.reg.GetHigh()); |
| OpRegReg(kOpSbc, tmp, rl_src2.reg.GetHigh()); |
| |
| // Let's put pop 'edi' here to break a bit the dependency chain. |
| if (tmp == rs_rDI) { |
| NewLIR1(kX86Pop32R, tmp.GetReg()); |
| } |
| |
| // Conditionally move the other integer into the destination register. |
| ConditionCode cc = is_min ? kCondGe : kCondLt; |
| OpCondRegReg(kOpCmov, cc, rl_result.reg.GetLow(), rl_src2.reg.GetLow()); |
| OpCondRegReg(kOpCmov, cc, rl_result.reg.GetHigh(), rl_src2.reg.GetHigh()); |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| // Get the two arguments to the invoke and place them in GP registers. |
| RegLocation rl_src1 = info->args[0]; |
| RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1]; |
| rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg); |
| rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg); |
| |
| RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| |
| /* |
| * If the result register is the same as the second element, then we need to be careful. |
| * The reason is that the first copy will inadvertently clobber the second element with |
| * the first one thus yielding the wrong result. Thus we do a swap in that case. |
| */ |
| if (rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { |
| std::swap(rl_src1, rl_src2); |
| } |
| |
| // Pick the first integer as min/max. |
| OpRegCopy(rl_result.reg, rl_src1.reg); |
| |
| // If the integers are both in the same register, then there is nothing else to do |
| // because they are equal and we have already moved one into the result. |
| if (rl_src1.reg.GetReg() != rl_src2.reg.GetReg()) { |
| // It is possible we didn't pick correctly so do the actual comparison now. |
| OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); |
| |
| // Conditionally move the other integer into the destination register. |
| ConditionCode condition_code = is_min ? kCondGt : kCondLt; |
| OpCondRegReg(kOpCmov, condition_code, rl_result.reg, rl_src2.reg); |
| } |
| |
| if (is_long) { |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| StoreValue(rl_dest, rl_result); |
| } |
| return true; |
| } |
| |
| bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { |
| RegLocation rl_src_address = info->args[0]; // long address |
| RegLocation rl_address; |
| if (!cu_->target64) { |
| rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] |
| rl_address = LoadValue(rl_src_address, kCoreReg); |
| } else { |
| rl_address = LoadValueWide(rl_src_address, kCoreReg); |
| } |
| RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| // Unaligned access is allowed on x86. |
| LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); |
| if (size == k64) { |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); |
| StoreValue(rl_dest, rl_result); |
| } |
| return true; |
| } |
| |
| bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { |
| RegLocation rl_src_address = info->args[0]; // long address |
| RegLocation rl_address; |
| if (!cu_->target64) { |
| rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0] |
| rl_address = LoadValue(rl_src_address, kCoreReg); |
| } else { |
| rl_address = LoadValueWide(rl_src_address, kCoreReg); |
| } |
| RegLocation rl_src_value = info->args[2]; // [size] value |
| RegLocation rl_value; |
| if (size == k64) { |
| // Unaligned access is allowed on x86. |
| rl_value = LoadValueWide(rl_src_value, kCoreReg); |
| } else { |
| DCHECK(size == kSignedByte || size == kSignedHalf || size == k32); |
| // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR. |
| if (!cu_->target64 && size == kSignedByte) { |
| rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg); |
| if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) { |
| RegStorage temp = AllocateByteRegister(); |
| OpRegCopy(temp, rl_src_value.reg); |
| rl_value.reg = temp; |
| } else { |
| rl_value = LoadValue(rl_src_value, kCoreReg); |
| } |
| } else { |
| rl_value = LoadValue(rl_src_value, kCoreReg); |
| } |
| } |
| StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile); |
| return true; |
| } |
| |
| void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) { |
| NewLIR5(kX86Lea32RA, r_base.GetReg(), reg1.GetReg(), reg2.GetReg(), scale, offset); |
| } |
| |
| void X86Mir2Lir::OpTlsCmp(ThreadOffset<4> offset, int val) { |
| DCHECK_EQ(kX86, cu_->instruction_set); |
| NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); |
| } |
| |
| void X86Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { |
| DCHECK_EQ(kX86_64, cu_->instruction_set); |
| NewLIR2(kX86Cmp16TI8, offset.Int32Value(), val); |
| } |
| |
| static bool IsInReg(X86Mir2Lir *pMir2Lir, const RegLocation &rl, RegStorage reg) { |
| return rl.reg.Valid() && rl.reg.GetReg() == reg.GetReg() && (pMir2Lir->IsLive(reg) || rl.home); |
| } |
| |
| bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { |
| DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64); |
| // Unused - RegLocation rl_src_unsafe = info->args[0]; |
| RegLocation rl_src_obj = info->args[1]; // Object - known non-null |
| RegLocation rl_src_offset = info->args[2]; // long low |
| if (!cu_->target64) { |
| rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] |
| } |
| RegLocation rl_src_expected = info->args[4]; // int, long or Object |
| // If is_long, high half is in info->args[5] |
| RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object |
| // If is_long, high half is in info->args[7] |
| |
| if (is_long && cu_->target64) { |
| // RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX. |
| FlushReg(rs_r0q); |
| Clobber(rs_r0q); |
| LockTemp(rs_r0q); |
| |
| RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); |
| RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); |
| RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); |
| LoadValueDirectWide(rl_src_expected, rs_r0q); |
| NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, |
| rl_new_value.reg.GetReg()); |
| |
| // After a store we need to insert barrier in case of potential load. Since the |
| // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. |
| GenMemBarrier(kAnyAny); |
| |
| FreeTemp(rs_r0q); |
| } else if (is_long) { |
| // TODO: avoid unnecessary loads of SI and DI when the values are in registers. |
| // TODO: CFI support. |
| FlushAllRegs(); |
| LockCallTemps(); |
| RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_rAX, rs_rDX); |
| RegStorage r_tmp2 = RegStorage::MakeRegPair(rs_rBX, rs_rCX); |
| LoadValueDirectWideFixed(rl_src_expected, r_tmp1); |
| LoadValueDirectWideFixed(rl_src_new_value, r_tmp2); |
| // FIXME: needs 64-bit update. |
| const bool obj_in_di = IsInReg(this, rl_src_obj, rs_rDI); |
| const bool obj_in_si = IsInReg(this, rl_src_obj, rs_rSI); |
| DCHECK(!obj_in_si || !obj_in_di); |
| const bool off_in_di = IsInReg(this, rl_src_offset, rs_rDI); |
| const bool off_in_si = IsInReg(this, rl_src_offset, rs_rSI); |
| DCHECK(!off_in_si || !off_in_di); |
| // If obj/offset is in a reg, use that reg. Otherwise, use the empty reg. |
| RegStorage rs_obj = obj_in_di ? rs_rDI : obj_in_si ? rs_rSI : !off_in_di ? rs_rDI : rs_rSI; |
| RegStorage rs_off = off_in_si ? rs_rSI : off_in_di ? rs_rDI : !obj_in_si ? rs_rSI : rs_rDI; |
| bool push_di = (!obj_in_di && !off_in_di) && (rs_obj == rs_rDI || rs_off == rs_rDI); |
| bool push_si = (!obj_in_si && !off_in_si) && (rs_obj == rs_rSI || rs_off == rs_rSI); |
| if (push_di) { |
| NewLIR1(kX86Push32R, rs_rDI.GetReg()); |
| MarkTemp(rs_rDI); |
| LockTemp(rs_rDI); |
| } |
| if (push_si) { |
| NewLIR1(kX86Push32R, rs_rSI.GetReg()); |
| MarkTemp(rs_rSI); |
| LockTemp(rs_rSI); |
| } |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| const size_t push_offset = (push_si ? 4u : 0u) + (push_di ? 4u : 0u); |
| if (!obj_in_si && !obj_in_di) { |
| LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_obj.s_reg_low) + push_offset, rs_obj); |
| // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. |
| DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); |
| int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; |
| AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); |
| } |
| if (!off_in_si && !off_in_di) { |
| LoadWordDisp(rs_rX86_SP, SRegOffset(rl_src_offset.s_reg_low) + push_offset, rs_off); |
| // Dalvik register annotation in LoadBaseIndexedDisp() used wrong offset. Fix it. |
| DCHECK(!DECODE_ALIAS_INFO_WIDE(last_lir_insn_->flags.alias_info)); |
| int reg_id = DECODE_ALIAS_INFO_REG(last_lir_insn_->flags.alias_info) - push_offset / 4u; |
| AnnotateDalvikRegAccess(last_lir_insn_, reg_id, true, false); |
| } |
| NewLIR4(kX86LockCmpxchg64A, rs_obj.GetReg(), rs_off.GetReg(), 0, 0); |
| |
| // After a store we need to insert barrier to prevent reordering with either |
| // earlier or later memory accesses. Since |
| // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, |
| // and it will be associated with the cmpxchg instruction, preventing both. |
| GenMemBarrier(kAnyAny); |
| |
| if (push_si) { |
| FreeTemp(rs_rSI); |
| UnmarkTemp(rs_rSI); |
| NewLIR1(kX86Pop32R, rs_rSI.GetReg()); |
| } |
| if (push_di) { |
| FreeTemp(rs_rDI); |
| UnmarkTemp(rs_rDI); |
| NewLIR1(kX86Pop32R, rs_rDI.GetReg()); |
| } |
| FreeCallTemps(); |
| } else { |
| // EAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in EAX. |
| FlushReg(rs_r0); |
| Clobber(rs_r0); |
| LockTemp(rs_r0); |
| |
| RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); |
| RegLocation rl_new_value = LoadValue(rl_src_new_value, LocToRegClass(rl_src_new_value)); |
| |
| if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { |
| // Mark card for object assuming new value is stored. |
| FreeTemp(rs_r0); // Temporarily release EAX for MarkGCCard(). |
| MarkGCCard(rl_new_value.reg, rl_object.reg); |
| LockTemp(rs_r0); |
| } |
| |
| RegLocation rl_offset; |
| if (cu_->target64) { |
| rl_offset = LoadValueWide(rl_src_offset, kCoreReg); |
| } else { |
| rl_offset = LoadValue(rl_src_offset, kCoreReg); |
| } |
| LoadValueDirect(rl_src_expected, rs_r0); |
| NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, |
| rl_new_value.reg.GetReg()); |
| |
| // After a store we need to insert barrier to prevent reordering with either |
| // earlier or later memory accesses. Since |
| // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated, |
| // and it will be associated with the cmpxchg instruction, preventing both. |
| GenMemBarrier(kAnyAny); |
| |
| FreeTemp(rs_r0); |
| } |
| |
| // Convert ZF to boolean |
| RegLocation rl_dest = InlineTarget(info); // boolean place for result |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| RegStorage result_reg = rl_result.reg; |
| |
| // For 32-bit, SETcc only works with EAX..EDX. |
| if (!IsByteRegister(result_reg)) { |
| result_reg = AllocateByteRegister(); |
| } |
| NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ); |
| NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg()); |
| if (IsTemp(result_reg)) { |
| FreeTemp(result_reg); |
| } |
| StoreValue(rl_dest, rl_result); |
| return true; |
| } |
| |
| void X86Mir2Lir::SwapBits(RegStorage result_reg, int shift, int32_t value) { |
| RegStorage r_temp = AllocTemp(); |
| OpRegCopy(r_temp, result_reg); |
| OpRegImm(kOpLsr, result_reg, shift); |
| OpRegImm(kOpAnd, r_temp, value); |
| OpRegImm(kOpAnd, result_reg, value); |
| OpRegImm(kOpLsl, r_temp, shift); |
| OpRegReg(kOpOr, result_reg, r_temp); |
| FreeTemp(r_temp); |
| } |
| |
| void X86Mir2Lir::SwapBits64(RegStorage result_reg, int shift, int64_t value) { |
| RegStorage r_temp = AllocTempWide(); |
| OpRegCopy(r_temp, result_reg); |
| OpRegImm(kOpLsr, result_reg, shift); |
| RegStorage r_value = AllocTempWide(); |
| LoadConstantWide(r_value, value); |
| OpRegReg(kOpAnd, r_temp, r_value); |
| OpRegReg(kOpAnd, result_reg, r_value); |
| OpRegImm(kOpLsl, r_temp, shift); |
| OpRegReg(kOpOr, result_reg, r_temp); |
| FreeTemp(r_temp); |
| FreeTemp(r_value); |
| } |
| |
| bool X86Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) { |
| RegLocation rl_src_i = info->args[0]; |
| RegLocation rl_i = (size == k64) ? LoadValueWide(rl_src_i, kCoreReg) |
| : LoadValue(rl_src_i, kCoreReg); |
| RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| if (size == k64) { |
| if (cu_->instruction_set == kX86_64) { |
| /* Use one bswap instruction to reverse byte order first and then use 3 rounds of |
| swapping bits to reverse bits in a long number x. Using bswap to save instructions |
| compared to generic luni implementation which has 5 rounds of swapping bits. |
| x = bswap x |
| x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555; |
| x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333; |
| x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F; |
| */ |
| OpRegReg(kOpRev, rl_result.reg, rl_i.reg); |
| SwapBits64(rl_result.reg, 1, 0x5555555555555555); |
| SwapBits64(rl_result.reg, 2, 0x3333333333333333); |
| SwapBits64(rl_result.reg, 4, 0x0f0f0f0f0f0f0f0f); |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| RegStorage r_i_low = rl_i.reg.GetLow(); |
| if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { |
| // First REV shall clobber rl_result.reg.GetLowReg(), save the value in a temp for the second |
| // REV. |
| r_i_low = AllocTemp(); |
| OpRegCopy(r_i_low, rl_i.reg); |
| } |
| OpRegReg(kOpRev, rl_result.reg.GetLow(), rl_i.reg.GetHigh()); |
| OpRegReg(kOpRev, rl_result.reg.GetHigh(), r_i_low); |
| if (rl_i.reg.GetLowReg() == rl_result.reg.GetLowReg()) { |
| FreeTemp(r_i_low); |
| } |
| SwapBits(rl_result.reg.GetLow(), 1, 0x55555555); |
| SwapBits(rl_result.reg.GetLow(), 2, 0x33333333); |
| SwapBits(rl_result.reg.GetLow(), 4, 0x0f0f0f0f); |
| SwapBits(rl_result.reg.GetHigh(), 1, 0x55555555); |
| SwapBits(rl_result.reg.GetHigh(), 2, 0x33333333); |
| SwapBits(rl_result.reg.GetHigh(), 4, 0x0f0f0f0f); |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| OpRegReg(kOpRev, rl_result.reg, rl_i.reg); |
| SwapBits(rl_result.reg, 1, 0x55555555); |
| SwapBits(rl_result.reg, 2, 0x33333333); |
| SwapBits(rl_result.reg, 4, 0x0f0f0f0f); |
| StoreValue(rl_dest, rl_result); |
| } |
| return true; |
| } |
| |
| LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { |
| CHECK(base_of_code_ != nullptr); |
| |
| // Address the start of the method |
| RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low); |
| if (rl_method.wide) { |
| LoadValueDirectWideFixed(rl_method, reg); |
| } else { |
| LoadValueDirectFixed(rl_method, reg); |
| } |
| store_method_addr_used_ = true; |
| |
| // Load the proper value from the literal area. |
| // We don't know the proper offset for the value, so pick one that will force |
| // 4 byte offset. We will fix this up in the assembler later to have the right |
| // value. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); |
| LIR *res = RawLIR(current_dalvik_offset_, kX86Mov32RM, reg.GetReg(), reg.GetReg(), 256, |
| 0, 0, target); |
| res->target = target; |
| res->flags.fixup = kFixupLoad; |
| store_method_addr_used_ = true; |
| return res; |
| } |
| |
| LIR* X86Mir2Lir::OpVldm(RegStorage r_base, int count) { |
| LOG(FATAL) << "Unexpected use of OpVldm for x86"; |
| return NULL; |
| } |
| |
| LIR* X86Mir2Lir::OpVstm(RegStorage r_base, int count) { |
| LOG(FATAL) << "Unexpected use of OpVstm for x86"; |
| return NULL; |
| } |
| |
| void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src, |
| RegLocation rl_result, int lit, |
| int first_bit, int second_bit) { |
| RegStorage t_reg = AllocTemp(); |
| OpRegRegImm(kOpLsl, t_reg, rl_src.reg, second_bit - first_bit); |
| OpRegRegReg(kOpAdd, rl_result.reg, rl_src.reg, t_reg); |
| FreeTemp(t_reg); |
| if (first_bit != 0) { |
| OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit); |
| } |
| } |
| |
| void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) { |
| if (cu_->target64) { |
| DCHECK(reg.Is64Bit()); |
| |
| NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0); |
| } else { |
| DCHECK(reg.IsPair()); |
| |
| // We are not supposed to clobber the incoming storage, so allocate a temporary. |
| RegStorage t_reg = AllocTemp(); |
| // Doing an OR is a quick way to check if both registers are zero. This will set the flags. |
| OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh()); |
| // The temp is no longer needed so free it at this time. |
| FreeTemp(t_reg); |
| } |
| |
| // In case of zero, throw ArithmeticException. |
| GenDivZeroCheck(kCondEq); |
| } |
| |
| void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index, |
| RegStorage array_base, |
| int len_offset) { |
| class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { |
| public: |
| ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch, |
| RegStorage index, RegStorage array_base, int32_t len_offset) |
| : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch), |
| index_(index), array_base_(array_base), len_offset_(len_offset) { |
| } |
| |
| void Compile() OVERRIDE { |
| m2l_->ResetRegPool(); |
| m2l_->ResetDefTracking(); |
| GenerateTargetLabel(kPseudoThrowTarget); |
| |
| RegStorage new_index = index_; |
| // Move index out of kArg1, either directly to kArg0, or to kArg2. |
| // TODO: clean-up to check not a number but with type |
| if (index_ == m2l_->TargetReg(kArg1, kNotWide)) { |
| if (array_base_ == m2l_->TargetReg(kArg0, kRef)) { |
| m2l_->OpRegCopy(m2l_->TargetReg(kArg2, kNotWide), index_); |
| new_index = m2l_->TargetReg(kArg2, kNotWide); |
| } else { |
| m2l_->OpRegCopy(m2l_->TargetReg(kArg0, kNotWide), index_); |
| new_index = m2l_->TargetReg(kArg0, kNotWide); |
| } |
| } |
| // Load array length to kArg1. |
| X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_); |
| x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); |
| x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, new_index, |
| m2l_->TargetReg(kArg1, kNotWide), true); |
| } |
| |
| private: |
| const RegStorage index_; |
| const RegStorage array_base_; |
| const int32_t len_offset_; |
| }; |
| |
| OpRegMem(kOpCmp, index, array_base, len_offset); |
| MarkPossibleNullPointerException(0); |
| LIR* branch = OpCondBranch(kCondUge, nullptr); |
| AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, |
| index, array_base, len_offset)); |
| } |
| |
| void X86Mir2Lir::GenArrayBoundsCheck(int32_t index, |
| RegStorage array_base, |
| int32_t len_offset) { |
| class ArrayBoundsCheckSlowPath : public Mir2Lir::LIRSlowPath { |
| public: |
| ArrayBoundsCheckSlowPath(Mir2Lir* m2l, LIR* branch, |
| int32_t index, RegStorage array_base, int32_t len_offset) |
| : LIRSlowPath(m2l, m2l->GetCurrentDexPc(), branch), |
| index_(index), array_base_(array_base), len_offset_(len_offset) { |
| } |
| |
| void Compile() OVERRIDE { |
| m2l_->ResetRegPool(); |
| m2l_->ResetDefTracking(); |
| GenerateTargetLabel(kPseudoThrowTarget); |
| |
| // Load array length to kArg1. |
| X86Mir2Lir* x86_m2l = static_cast<X86Mir2Lir*>(m2l_); |
| x86_m2l->OpRegMem(kOpMov, m2l_->TargetReg(kArg1, kNotWide), array_base_, len_offset_); |
| x86_m2l->LoadConstant(m2l_->TargetReg(kArg0, kNotWide), index_); |
| x86_m2l->CallRuntimeHelperRegReg(kQuickThrowArrayBounds, m2l_->TargetReg(kArg0, kNotWide), |
| m2l_->TargetReg(kArg1, kNotWide), true); |
| } |
| |
| private: |
| const int32_t index_; |
| const RegStorage array_base_; |
| const int32_t len_offset_; |
| }; |
| |
| NewLIR3(IS_SIMM8(index) ? kX86Cmp32MI8 : kX86Cmp32MI, array_base.GetReg(), len_offset, index); |
| MarkPossibleNullPointerException(0); |
| LIR* branch = OpCondBranch(kCondLs, nullptr); |
| AddSlowPath(new (arena_) ArrayBoundsCheckSlowPath(this, branch, |
| index, array_base, len_offset)); |
| } |
| |
| // Test suspend flag, return target of taken suspend branch |
| LIR* X86Mir2Lir::OpTestSuspend(LIR* target) { |
| if (cu_->target64) { |
| OpTlsCmp(Thread::ThreadFlagsOffset<8>(), 0); |
| } else { |
| OpTlsCmp(Thread::ThreadFlagsOffset<4>(), 0); |
| } |
| return OpCondBranch((target == NULL) ? kCondNe : kCondEq, target); |
| } |
| |
| // Decrement register and branch on condition |
| LIR* X86Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) { |
| OpRegImm(kOpSub, reg, 1); |
| return OpCondBranch(c_code, target); |
| } |
| |
| bool X86Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, |
| RegLocation rl_src, RegLocation rl_dest, int lit) { |
| LOG(FATAL) << "Unexpected use of smallLiteralDive in x86"; |
| return false; |
| } |
| |
| bool X86Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) { |
| LOG(FATAL) << "Unexpected use of easyMultiply in x86"; |
| return false; |
| } |
| |
| LIR* X86Mir2Lir::OpIT(ConditionCode cond, const char* guide) { |
| LOG(FATAL) << "Unexpected use of OpIT in x86"; |
| return NULL; |
| } |
| |
| void X86Mir2Lir::OpEndIT(LIR* it) { |
| LOG(FATAL) << "Unexpected use of OpEndIT in x86"; |
| } |
| |
| void X86Mir2Lir::GenImulRegImm(RegStorage dest, RegStorage src, int val) { |
| switch (val) { |
| case 0: |
| NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); |
| break; |
| case 1: |
| OpRegCopy(dest, src); |
| break; |
| default: |
| OpRegRegImm(kOpMul, dest, src, val); |
| break; |
| } |
| } |
| |
| void X86Mir2Lir::GenImulMemImm(RegStorage dest, int sreg, int displacement, int val) { |
| // All memory accesses below reference dalvik regs. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| |
| LIR *m; |
| switch (val) { |
| case 0: |
| NewLIR2(kX86Xor32RR, dest.GetReg(), dest.GetReg()); |
| break; |
| case 1: |
| LoadBaseDisp(rs_rX86_SP, displacement, dest, k32, kNotVolatile); |
| break; |
| default: |
| m = NewLIR4(IS_SIMM8(val) ? kX86Imul32RMI8 : kX86Imul32RMI, dest.GetReg(), |
| rs_rX86_SP.GetReg(), displacement, val); |
| AnnotateDalvikRegAccess(m, displacement >> 2, true /* is_load */, true /* is_64bit */); |
| break; |
| } |
| } |
| |
| void X86Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, int flags) { |
| if (!cu_->target64) { |
| // Some x86 32b ops are fallback. |
| switch (opcode) { |
| case Instruction::NOT_LONG: |
| case Instruction::DIV_LONG: |
| case Instruction::DIV_LONG_2ADDR: |
| case Instruction::REM_LONG: |
| case Instruction::REM_LONG_2ADDR: |
| Mir2Lir::GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); |
| return; |
| |
| default: |
| // Everything else we can handle. |
| break; |
| } |
| } |
| |
| switch (opcode) { |
| case Instruction::NOT_LONG: |
| GenNotLong(rl_dest, rl_src2); |
| return; |
| |
| case Instruction::ADD_LONG: |
| case Instruction::ADD_LONG_2ADDR: |
| GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); |
| return; |
| |
| case Instruction::SUB_LONG: |
| case Instruction::SUB_LONG_2ADDR: |
| GenLongArith(rl_dest, rl_src1, rl_src2, opcode, false); |
| return; |
| |
| case Instruction::MUL_LONG: |
| case Instruction::MUL_LONG_2ADDR: |
| GenMulLong(opcode, rl_dest, rl_src1, rl_src2, flags); |
| return; |
| |
| case Instruction::DIV_LONG: |
| case Instruction::DIV_LONG_2ADDR: |
| GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags); |
| return; |
| |
| case Instruction::REM_LONG: |
| case Instruction::REM_LONG_2ADDR: |
| GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags); |
| return; |
| |
| case Instruction::AND_LONG_2ADDR: |
| case Instruction::AND_LONG: |
| GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); |
| return; |
| |
| case Instruction::OR_LONG: |
| case Instruction::OR_LONG_2ADDR: |
| GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); |
| return; |
| |
| case Instruction::XOR_LONG: |
| case Instruction::XOR_LONG_2ADDR: |
| GenLongArith(rl_dest, rl_src1, rl_src2, opcode, true); |
| return; |
| |
| case Instruction::NEG_LONG: |
| GenNegLong(rl_dest, rl_src2); |
| return; |
| |
| default: |
| LOG(FATAL) << "Invalid long arith op"; |
| return; |
| } |
| } |
| |
| bool X86Mir2Lir::GenMulLongConst(RegLocation rl_dest, RegLocation rl_src1, int64_t val, int flags) { |
| // All memory accesses below reference dalvik regs. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| |
| if (val == 0) { |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| if (cu_->target64) { |
| OpRegReg(kOpXor, rl_result.reg, rl_result.reg); |
| } else { |
| OpRegReg(kOpXor, rl_result.reg.GetLow(), rl_result.reg.GetLow()); |
| OpRegReg(kOpXor, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); |
| } |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } else if (val == 1) { |
| StoreValueWide(rl_dest, rl_src1); |
| return true; |
| } else if (val == 2) { |
| GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src1, rl_src1, flags); |
| return true; |
| } else if (IsPowerOfTwo(val)) { |
| int shift_amount = LowestSetBit(val); |
| if (!PartiallyIntersects(rl_src1, rl_dest)) { |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| RegLocation rl_result = GenShiftImmOpLong(Instruction::SHL_LONG, rl_dest, rl_src1, |
| shift_amount, flags); |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| } |
| |
| // Okay, on 32b just bite the bullet and do it, still better than the general case. |
| if (!cu_->target64) { |
| int32_t val_lo = Low32Bits(val); |
| int32_t val_hi = High32Bits(val); |
| // Prepare for explicit register usage. |
| ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); |
| rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); |
| bool src1_in_reg = rl_src1.location == kLocPhysReg; |
| int displacement = SRegOffset(rl_src1.s_reg_low); |
| |
| // ECX <- 1H * 2L |
| // EAX <- 1L * 2H |
| if (src1_in_reg) { |
| GenImulRegImm(rs_r1, rl_src1.reg.GetHigh(), val_lo); |
| GenImulRegImm(rs_r0, rl_src1.reg.GetLow(), val_hi); |
| } else { |
| GenImulMemImm(rs_r1, GetSRegHi(rl_src1.s_reg_low), displacement + HIWORD_OFFSET, val_lo); |
| GenImulMemImm(rs_r0, rl_src1.s_reg_low, displacement + LOWORD_OFFSET, val_hi); |
| } |
| |
| // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) |
| NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); |
| |
| // EAX <- 2L |
| LoadConstantNoClobber(rs_r0, val_lo); |
| |
| // EDX:EAX <- 2L * 1L (double precision) |
| if (src1_in_reg) { |
| NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); |
| } else { |
| LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| |
| // EDX <- EDX + ECX (add high words) |
| NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); |
| |
| // Result is EDX:EAX |
| RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, |
| RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| return false; |
| } |
| |
| void X86Mir2Lir::GenMulLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, int flags) { |
| if (rl_src1.is_const) { |
| std::swap(rl_src1, rl_src2); |
| } |
| |
| if (rl_src2.is_const) { |
| if (GenMulLongConst(rl_dest, rl_src1, mir_graph_->ConstantValueWide(rl_src2), flags)) { |
| return; |
| } |
| } |
| |
| // All memory accesses below reference dalvik regs. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| |
| if (cu_->target64) { |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && |
| rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { |
| NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg()); |
| } else if (rl_result.reg.GetReg() != rl_src1.reg.GetReg() && |
| rl_result.reg.GetReg() == rl_src2.reg.GetReg()) { |
| NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src1.reg.GetReg()); |
| } else if (rl_result.reg.GetReg() == rl_src1.reg.GetReg() && |
| rl_result.reg.GetReg() != rl_src2.reg.GetReg()) { |
| NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); |
| } else { |
| OpRegCopy(rl_result.reg, rl_src1.reg); |
| NewLIR2(kX86Imul64RR, rl_result.reg.GetReg(), rl_src2.reg.GetReg()); |
| } |
| StoreValueWide(rl_dest, rl_result); |
| return; |
| } |
| |
| // Not multiplying by a constant. Do it the hard way |
| // Check for V*V. We can eliminate a multiply in that case, as 2L*1H == 2H*1L. |
| bool is_square = mir_graph_->SRegToVReg(rl_src1.s_reg_low) == |
| mir_graph_->SRegToVReg(rl_src2.s_reg_low); |
| |
| // Prepare for explicit register usage. |
| ExplicitTempRegisterLock(this, 3, &rs_r0, &rs_r1, &rs_r2); |
| rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); |
| rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); |
| |
| // At this point, the VRs are in their home locations. |
| bool src1_in_reg = rl_src1.location == kLocPhysReg; |
| bool src2_in_reg = rl_src2.location == kLocPhysReg; |
| |
| // ECX <- 1H |
| if (src1_in_reg) { |
| NewLIR2(kX86Mov32RR, rs_r1.GetReg(), rl_src1.reg.GetHighReg()); |
| } else { |
| LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src1.s_reg_low) + HIWORD_OFFSET, rs_r1, k32, |
| kNotVolatile); |
| } |
| |
| if (is_square) { |
| // Take advantage of the fact that the values are the same. |
| // ECX <- ECX * 2L (1H * 2L) |
| if (src2_in_reg) { |
| NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); |
| } else { |
| int displacement = SRegOffset(rl_src2.s_reg_low); |
| LIR* m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(), |
| displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| |
| // ECX <- 2*ECX (2H * 1L) + (1H * 2L) |
| NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r1.GetReg()); |
| } else { |
| // EAX <- 2H |
| if (src2_in_reg) { |
| NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetHighReg()); |
| } else { |
| LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + HIWORD_OFFSET, rs_r0, k32, |
| kNotVolatile); |
| } |
| |
| // EAX <- EAX * 1L (2H * 1L) |
| if (src1_in_reg) { |
| NewLIR2(kX86Imul32RR, rs_r0.GetReg(), rl_src1.reg.GetLowReg()); |
| } else { |
| int displacement = SRegOffset(rl_src1.s_reg_low); |
| LIR *m = NewLIR3(kX86Imul32RM, rs_r0.GetReg(), rs_rX86_SP.GetReg(), |
| displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| |
| // ECX <- ECX * 2L (1H * 2L) |
| if (src2_in_reg) { |
| NewLIR2(kX86Imul32RR, rs_r1.GetReg(), rl_src2.reg.GetLowReg()); |
| } else { |
| int displacement = SRegOffset(rl_src2.s_reg_low); |
| LIR *m = NewLIR3(kX86Imul32RM, rs_r1.GetReg(), rs_rX86_SP.GetReg(), |
| displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| |
| // ECX <- ECX + EAX (2H * 1L) + (1H * 2L) |
| NewLIR2(kX86Add32RR, rs_r1.GetReg(), rs_r0.GetReg()); |
| } |
| |
| // EAX <- 2L |
| if (src2_in_reg) { |
| NewLIR2(kX86Mov32RR, rs_r0.GetReg(), rl_src2.reg.GetLowReg()); |
| } else { |
| LoadBaseDisp(rs_rX86_SP, SRegOffset(rl_src2.s_reg_low) + LOWORD_OFFSET, rs_r0, k32, |
| kNotVolatile); |
| } |
| |
| // EDX:EAX <- 2L * 1L (double precision) |
| if (src1_in_reg) { |
| NewLIR1(kX86Mul32DaR, rl_src1.reg.GetLowReg()); |
| } else { |
| int displacement = SRegOffset(rl_src1.s_reg_low); |
| LIR *m = NewLIR2(kX86Mul32DaM, rs_rX86_SP.GetReg(), displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| |
| // EDX <- EDX + ECX (add high words) |
| NewLIR2(kX86Add32RR, rs_r2.GetReg(), rs_r1.GetReg()); |
| |
| // Result is EDX:EAX |
| RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, |
| RegStorage::MakeRegPair(rs_r0, rs_r2), INVALID_SREG, INVALID_SREG}; |
| StoreValueWide(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src, |
| Instruction::Code op) { |
| DCHECK_EQ(rl_dest.location, kLocPhysReg); |
| X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); |
| if (rl_src.location == kLocPhysReg) { |
| // Both operands are in registers. |
| // But we must ensure that rl_src is in pair |
| if (cu_->target64) { |
| NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg()); |
| } else { |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) { |
| // The registers are the same, so we would clobber it before the use. |
| RegStorage temp_reg = AllocTemp(); |
| OpRegCopy(temp_reg, rl_dest.reg); |
| rl_src.reg.SetHighReg(temp_reg.GetReg()); |
| } |
| NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg()); |
| |
| x86op = GetOpcode(op, rl_dest, rl_src, true); |
| NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg()); |
| } |
| return; |
| } |
| |
| // RHS is in memory. |
| DCHECK((rl_src.location == kLocDalvikFrame) || |
| (rl_src.location == kLocCompilerTemp)); |
| int r_base = rs_rX86_SP.GetReg(); |
| int displacement = SRegOffset(rl_src.s_reg_low); |
| |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| LIR *lir = NewLIR3(x86op, cu_->target64 ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), |
| r_base, displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| if (!cu_->target64) { |
| x86op = GetOpcode(op, rl_dest, rl_src, true); |
| lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET); |
| AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| } |
| } |
| |
| void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { |
| rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); |
| if (rl_dest.location == kLocPhysReg) { |
| // Ensure we are in a register pair |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| |
| rl_src = UpdateLocWideTyped(rl_src, kCoreReg); |
| GenLongRegOrMemOp(rl_result, rl_src, op); |
| StoreFinalValueWide(rl_dest, rl_result); |
| return; |
| } else if (!cu_->target64 && Intersects(rl_src, rl_dest)) { |
| // Handle the case when src and dest are intersect. |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| rl_src = UpdateLocWideTyped(rl_src, kCoreReg); |
| GenLongRegOrMemOp(rl_result, rl_src, op); |
| StoreFinalValueWide(rl_dest, rl_result); |
| return; |
| } |
| |
| // It wasn't in registers, so it better be in memory. |
| DCHECK((rl_dest.location == kLocDalvikFrame) || |
| (rl_dest.location == kLocCompilerTemp)); |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| |
| // Operate directly into memory. |
| X86OpCode x86op = GetOpcode(op, rl_dest, rl_src, false); |
| int r_base = rs_rX86_SP.GetReg(); |
| int displacement = SRegOffset(rl_dest.s_reg_low); |
| |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, |
| cu_->target64 ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg()); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| false /* is_load */, true /* is64bit */); |
| if (!cu_->target64) { |
| x86op = GetOpcode(op, rl_dest, rl_src, true); |
| lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg()); |
| AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, |
| false /* is_load */, true /* is64bit */); |
| } |
| } |
| |
| void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, Instruction::Code op, |
| bool is_commutative) { |
| // Is this really a 2 operand operation? |
| switch (op) { |
| case Instruction::ADD_LONG_2ADDR: |
| case Instruction::SUB_LONG_2ADDR: |
| case Instruction::AND_LONG_2ADDR: |
| case Instruction::OR_LONG_2ADDR: |
| case Instruction::XOR_LONG_2ADDR: |
| if (GenerateTwoOperandInstructions()) { |
| GenLongArith(rl_dest, rl_src2, op); |
| return; |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| if (rl_dest.location == kLocPhysReg) { |
| RegLocation rl_result = LoadValueWide(rl_src1, kCoreReg); |
| |
| // We are about to clobber the LHS, so it needs to be a temp. |
| rl_result = ForceTempWide(rl_result); |
| |
| // Perform the operation using the RHS. |
| rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); |
| GenLongRegOrMemOp(rl_result, rl_src2, op); |
| |
| // And now record that the result is in the temp. |
| StoreFinalValueWide(rl_dest, rl_result); |
| return; |
| } |
| |
| // It wasn't in registers, so it better be in memory. |
| DCHECK((rl_dest.location == kLocDalvikFrame) || |
| (rl_dest.location == kLocCompilerTemp)); |
| rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); |
| rl_src2 = UpdateLocWideTyped(rl_src2, kCoreReg); |
| |
| // Get one of the source operands into temporary register. |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| if (cu_->target64) { |
| if (IsTemp(rl_src1.reg)) { |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } else if (is_commutative) { |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| // We need at least one of them to be a temporary. |
| if (!IsTemp(rl_src2.reg)) { |
| rl_src1 = ForceTempWide(rl_src1); |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } else { |
| GenLongRegOrMemOp(rl_src2, rl_src1, op); |
| StoreFinalValueWide(rl_dest, rl_src2); |
| return; |
| } |
| } else { |
| // Need LHS to be the temp. |
| rl_src1 = ForceTempWide(rl_src1); |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } |
| } else { |
| if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) { |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } else if (is_commutative) { |
| rl_src2 = LoadValueWide(rl_src2, kCoreReg); |
| // We need at least one of them to be a temporary. |
| if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) { |
| rl_src1 = ForceTempWide(rl_src1); |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } else { |
| GenLongRegOrMemOp(rl_src2, rl_src1, op); |
| StoreFinalValueWide(rl_dest, rl_src2); |
| return; |
| } |
| } else { |
| // Need LHS to be the temp. |
| rl_src1 = ForceTempWide(rl_src1); |
| GenLongRegOrMemOp(rl_src1, rl_src2, op); |
| } |
| } |
| |
| StoreFinalValueWide(rl_dest, rl_src1); |
| } |
| |
| void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) { |
| if (cu_->target64) { |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result; |
| rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| OpRegCopy(rl_result.reg, rl_src.reg); |
| OpReg(kOpNot, rl_result.reg); |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| LOG(FATAL) << "Unexpected use GenNotLong()"; |
| } |
| } |
| |
| void X86Mir2Lir::GenDivRemLongLit(RegLocation rl_dest, RegLocation rl_src, |
| int64_t imm, bool is_div) { |
| if (imm == 0) { |
| GenDivZeroException(); |
| } else if (imm == 1) { |
| if (is_div) { |
| // x / 1 == x. |
| StoreValueWide(rl_dest, rl_src); |
| } else { |
| // x % 1 == 0. |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| LoadConstantWide(rl_result.reg, 0); |
| StoreValueWide(rl_dest, rl_result); |
| } |
| } else if (imm == -1) { // handle 0x8000000000000000 / -1 special case. |
| if (is_div) { |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| RegStorage rs_temp = AllocTempWide(); |
| |
| OpRegCopy(rl_result.reg, rl_src.reg); |
| LoadConstantWide(rs_temp, 0x8000000000000000); |
| |
| // If x == MIN_LONG, return MIN_LONG. |
| OpRegReg(kOpCmp, rl_src.reg, rs_temp); |
| LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondEq); |
| |
| // For x != MIN_LONG, x / -1 == -x. |
| OpReg(kOpNeg, rl_result.reg); |
| |
| minint_branch->target = NewLIR0(kPseudoTargetLabel); |
| FreeTemp(rs_temp); |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| // x % -1 == 0. |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| LoadConstantWide(rl_result.reg, 0); |
| StoreValueWide(rl_dest, rl_result); |
| } |
| } else if (is_div && IsPowerOfTwo(std::abs(imm))) { |
| // Division using shifting. |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| if (IsSameReg(rl_result.reg, rl_src.reg)) { |
| RegStorage rs_temp = AllocTypedTempWide(false, kCoreReg); |
| rl_result.reg.SetReg(rs_temp.GetReg()); |
| } |
| LoadConstantWide(rl_result.reg, std::abs(imm) - 1); |
| OpRegReg(kOpAdd, rl_result.reg, rl_src.reg); |
| NewLIR2(kX86Test64RR, rl_src.reg.GetReg(), rl_src.reg.GetReg()); |
| OpCondRegReg(kOpCmov, kCondPl, rl_result.reg, rl_src.reg); |
| int shift_amount = LowestSetBit(imm); |
| OpRegImm(kOpAsr, rl_result.reg, shift_amount); |
| if (imm < 0) { |
| OpReg(kOpNeg, rl_result.reg); |
| } |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| CHECK(imm <= -2 || imm >= 2); |
| |
| FlushReg(rs_r0q); |
| Clobber(rs_r0q); |
| LockTemp(rs_r0q); |
| FlushReg(rs_r2q); |
| Clobber(rs_r2q); |
| LockTemp(rs_r2q); |
| |
| RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, |
| is_div ? rs_r2q : rs_r0q, INVALID_SREG, INVALID_SREG}; |
| |
| // Use H.S.Warren's Hacker's Delight Chapter 10 and |
| // T,Grablund, P.L.Montogomery's Division by invariant integers using multiplication. |
| int64_t magic; |
| int shift; |
| CalculateMagicAndShift(imm, magic, shift, true /* is_long */); |
| |
| /* |
| * For imm >= 2, |
| * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n > 0 |
| * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1, while n < 0. |
| * For imm <= -2, |
| * int(n/imm) = ceil(n/imm) = floor(M*n/2^S) +1 , while n > 0 |
| * int(n/imm) = floor(n/imm) = floor(M*n/2^S), while n < 0. |
| * We implement this algorithm in the following way: |
| * 1. multiply magic number m and numerator n, get the higher 64bit result in RDX |
| * 2. if imm > 0 and magic < 0, add numerator to RDX |
| * if imm < 0 and magic > 0, sub numerator from RDX |
| * 3. if S !=0, SAR S bits for RDX |
| * 4. add 1 to RDX if RDX < 0 |
| * 5. Thus, RDX is the quotient |
| */ |
| |
| // RAX = magic. |
| LoadConstantWide(rs_r0q, magic); |
| |
| // Multiply by numerator. |
| RegStorage numerator_reg; |
| if (!is_div || (imm > 0 && magic < 0) || (imm < 0 && magic > 0)) { |
| // We will need the value later. |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| numerator_reg = rl_src.reg; |
| |
| // RDX:RAX = magic * numerator. |
| NewLIR1(kX86Imul64DaR, numerator_reg.GetReg()); |
| } else { |
| // Only need this once. Multiply directly from the value. |
| rl_src = UpdateLocWideTyped(rl_src, kCoreReg); |
| if (rl_src.location != kLocPhysReg) { |
| // Okay, we can do this from memory. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| int displacement = SRegOffset(rl_src.s_reg_low); |
| // RDX:RAX = magic * numerator. |
| LIR *m = NewLIR2(kX86Imul64DaM, rs_rX86_SP.GetReg(), displacement); |
| AnnotateDalvikRegAccess(m, displacement >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } else { |
| // RDX:RAX = magic * numerator. |
| NewLIR1(kX86Imul64DaR, rl_src.reg.GetReg()); |
| } |
| } |
| |
| if (imm > 0 && magic < 0) { |
| // Add numerator to RDX. |
| DCHECK(numerator_reg.Valid()); |
| OpRegReg(kOpAdd, rs_r2q, numerator_reg); |
| } else if (imm < 0 && magic > 0) { |
| DCHECK(numerator_reg.Valid()); |
| OpRegReg(kOpSub, rs_r2q, numerator_reg); |
| } |
| |
| // Do we need the shift? |
| if (shift != 0) { |
| // Shift RDX by 'shift' bits. |
| OpRegImm(kOpAsr, rs_r2q, shift); |
| } |
| |
| // Move RDX to RAX. |
| OpRegCopyWide(rs_r0q, rs_r2q); |
| |
| // Move sign bit to bit 0, zeroing the rest. |
| OpRegImm(kOpLsr, rs_r2q, 63); |
| |
| // RDX = RDX + RAX. |
| OpRegReg(kOpAdd, rs_r2q, rs_r0q); |
| |
| // Quotient is in RDX. |
| if (!is_div) { |
| // We need to compute the remainder. |
| // Remainder is divisor - (quotient * imm). |
| DCHECK(numerator_reg.Valid()); |
| OpRegCopyWide(rs_r0q, numerator_reg); |
| |
| // Imul doesn't support 64-bit imms. |
| if (imm > std::numeric_limits<int32_t>::max() || |
| imm < std::numeric_limits<int32_t>::min()) { |
| RegStorage rs_temp = AllocTempWide(); |
| LoadConstantWide(rs_temp, imm); |
| |
| // RAX = numerator * imm. |
| NewLIR2(kX86Imul64RR, rs_r2q.GetReg(), rs_temp.GetReg()); |
| |
| FreeTemp(rs_temp); |
| } else { |
| // RAX = numerator * imm. |
| int short_imm = static_cast<int>(imm); |
| NewLIR3(kX86Imul64RRI, rs_r2q.GetReg(), rs_r2q.GetReg(), short_imm); |
| } |
| |
| // RAX -= RDX. |
| OpRegReg(kOpSub, rs_r0q, rs_r2q); |
| |
| // Result in RAX. |
| } else { |
| // Result in RDX. |
| } |
| StoreValueWide(rl_dest, rl_result); |
| FreeTemp(rs_r0q); |
| FreeTemp(rs_r2q); |
| } |
| } |
| |
| void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, bool is_div, int flags) { |
| if (!cu_->target64) { |
| LOG(FATAL) << "Unexpected use GenDivRemLong()"; |
| return; |
| } |
| |
| if (rl_src2.is_const) { |
| DCHECK(rl_src2.wide); |
| int64_t imm = mir_graph_->ConstantValueWide(rl_src2); |
| GenDivRemLongLit(rl_dest, rl_src1, imm, is_div); |
| return; |
| } |
| |
| // We have to use fixed registers, so flush all the temps. |
| // Prepare for explicit register usage. |
| ExplicitTempRegisterLock(this, 4, &rs_r0q, &rs_r1q, &rs_r2q, &rs_r6q); |
| |
| // Load LHS into RAX. |
| LoadValueDirectWideFixed(rl_src1, rs_r0q); |
| |
| // Load RHS into RCX. |
| LoadValueDirectWideFixed(rl_src2, rs_r1q); |
| |
| // Copy LHS sign bit into RDX. |
| NewLIR0(kx86Cqo64Da); |
| |
| // Handle division by zero case. |
| if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) { |
| GenDivZeroCheckWide(rs_r1q); |
| } |
| |
| // Have to catch 0x8000000000000000/-1 case, or we will get an exception! |
| NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1); |
| LIR* minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| |
| // RHS is -1. |
| LoadConstantWide(rs_r6q, 0x8000000000000000); |
| NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r6q.GetReg()); |
| LIR *minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe); |
| |
| // In 0x8000000000000000/-1 case. |
| if (!is_div) { |
| // For DIV, RAX is already right. For REM, we need RDX 0. |
| NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg()); |
| } |
| LIR* done = NewLIR1(kX86Jmp8, 0); |
| |
| // Expected case. |
| minus_one_branch->target = NewLIR0(kPseudoTargetLabel); |
| minint_branch->target = minus_one_branch->target; |
| NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg()); |
| done->target = NewLIR0(kPseudoTargetLabel); |
| |
| // Result is in RAX for div and RDX for rem. |
| RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG}; |
| if (!is_div) { |
| rl_result.reg.SetReg(r2q); |
| } |
| |
| StoreValueWide(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) { |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result; |
| if (cu_->target64) { |
| rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| OpRegReg(kOpNeg, rl_result.reg, rl_src.reg); |
| } else { |
| rl_result = ForceTempWide(rl_src); |
| if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) && |
| ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) { |
| // The registers are the same, so we would clobber it before the use. |
| RegStorage temp_reg = AllocTemp(); |
| OpRegCopy(temp_reg, rl_result.reg); |
| rl_result.reg.SetHighReg(temp_reg.GetReg()); |
| } |
| OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow()); // rLow = -rLow |
| OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0); // rHigh = rHigh + CF |
| OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh()); // rHigh = -rHigh |
| } |
| StoreValueWide(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<4> thread_offset) { |
| DCHECK_EQ(kX86, cu_->instruction_set); |
| X86OpCode opcode = kX86Bkpt; |
| switch (op) { |
| case kOpCmp: opcode = kX86Cmp32RT; break; |
| case kOpMov: opcode = kX86Mov32RT; break; |
| default: |
| LOG(FATAL) << "Bad opcode: " << op; |
| break; |
| } |
| NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); |
| } |
| |
| void X86Mir2Lir::OpRegThreadMem(OpKind op, RegStorage r_dest, ThreadOffset<8> thread_offset) { |
| DCHECK_EQ(kX86_64, cu_->instruction_set); |
| X86OpCode opcode = kX86Bkpt; |
| if (cu_->target64 && r_dest.Is64BitSolo()) { |
| switch (op) { |
| case kOpCmp: opcode = kX86Cmp64RT; break; |
| case kOpMov: opcode = kX86Mov64RT; break; |
| default: |
| LOG(FATAL) << "Bad opcode(OpRegThreadMem 64): " << op; |
| break; |
| } |
| } else { |
| switch (op) { |
| case kOpCmp: opcode = kX86Cmp32RT; break; |
| case kOpMov: opcode = kX86Mov32RT; break; |
| default: |
| LOG(FATAL) << "Bad opcode: " << op; |
| break; |
| } |
| } |
| NewLIR2(opcode, r_dest.GetReg(), thread_offset.Int32Value()); |
| } |
| |
| /* |
| * Generate array load |
| */ |
| void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, |
| RegLocation rl_index, RegLocation rl_dest, int scale) { |
| RegisterClass reg_class = RegClassBySize(size); |
| int len_offset = mirror::Array::LengthOffset().Int32Value(); |
| RegLocation rl_result; |
| rl_array = LoadValue(rl_array, kRefReg); |
| |
| int data_offset; |
| if (size == k64 || size == kDouble) { |
| data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); |
| } else { |
| data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); |
| } |
| |
| bool constant_index = rl_index.is_const; |
| int32_t constant_index_value = 0; |
| if (!constant_index) { |
| rl_index = LoadValue(rl_index, kCoreReg); |
| } else { |
| constant_index_value = mir_graph_->ConstantValue(rl_index); |
| // If index is constant, just fold it into the data offset |
| data_offset += constant_index_value << scale; |
| // treat as non array below |
| rl_index.reg = RegStorage::InvalidReg(); |
| } |
| |
| /* null object? */ |
| GenNullCheck(rl_array.reg, opt_flags); |
| |
| if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { |
| if (constant_index) { |
| GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); |
| } else { |
| GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); |
| } |
| } |
| rl_result = EvalLoc(rl_dest, reg_class, true); |
| LoadBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_result.reg, size); |
| if ((size == k64) || (size == kDouble)) { |
| StoreValueWide(rl_dest, rl_result); |
| } else { |
| StoreValue(rl_dest, rl_result); |
| } |
| } |
| |
| /* |
| * Generate array store |
| * |
| */ |
| void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, |
| RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) { |
| RegisterClass reg_class = RegClassBySize(size); |
| int len_offset = mirror::Array::LengthOffset().Int32Value(); |
| int data_offset; |
| |
| if (size == k64 || size == kDouble) { |
| data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value(); |
| } else { |
| data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); |
| } |
| |
| rl_array = LoadValue(rl_array, kRefReg); |
| bool constant_index = rl_index.is_const; |
| int32_t constant_index_value = 0; |
| if (!constant_index) { |
| rl_index = LoadValue(rl_index, kCoreReg); |
| } else { |
| // If index is constant, just fold it into the data offset |
| constant_index_value = mir_graph_->ConstantValue(rl_index); |
| data_offset += constant_index_value << scale; |
| // treat as non array below |
| rl_index.reg = RegStorage::InvalidReg(); |
| } |
| |
| /* null object? */ |
| GenNullCheck(rl_array.reg, opt_flags); |
| |
| if (!(opt_flags & MIR_IGNORE_RANGE_CHECK)) { |
| if (constant_index) { |
| GenArrayBoundsCheck(constant_index_value, rl_array.reg, len_offset); |
| } else { |
| GenArrayBoundsCheck(rl_index.reg, rl_array.reg, len_offset); |
| } |
| } |
| if ((size == k64) || (size == kDouble)) { |
| rl_src = LoadValueWide(rl_src, reg_class); |
| } else { |
| rl_src = LoadValue(rl_src, reg_class); |
| } |
| // If the src reg can't be byte accessed, move it to a temp first. |
| if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { |
| RegStorage temp = AllocTemp(); |
| OpRegCopy(temp, rl_src.reg); |
| StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags); |
| } else { |
| StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags); |
| } |
| if (card_mark) { |
| // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. |
| if (!constant_index) { |
| FreeTemp(rl_index.reg); |
| } |
| MarkGCCard(rl_src.reg, rl_array.reg); |
| } |
| } |
| |
| RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, |
| RegLocation rl_src, int shift_amount, int flags) { |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| if (cu_->target64) { |
| OpKind op = static_cast<OpKind>(0); /* Make gcc happy */ |
| switch (opcode) { |
| case Instruction::SHL_LONG: |
| case Instruction::SHL_LONG_2ADDR: |
| op = kOpLsl; |
| break; |
| case Instruction::SHR_LONG: |
| case Instruction::SHR_LONG_2ADDR: |
| op = kOpAsr; |
| break; |
| case Instruction::USHR_LONG: |
| case Instruction::USHR_LONG_2ADDR: |
| op = kOpLsr; |
| break; |
| default: |
| LOG(FATAL) << "Unexpected case"; |
| } |
| OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount); |
| } else { |
| switch (opcode) { |
| case Instruction::SHL_LONG: |
| case Instruction::SHL_LONG_2ADDR: |
| DCHECK_NE(shift_amount, 1); // Prevent a double store from happening. |
| if (shift_amount == 32) { |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); |
| LoadConstant(rl_result.reg.GetLow(), 0); |
| } else if (shift_amount > 31) { |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow()); |
| NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32); |
| LoadConstant(rl_result.reg.GetLow(), 0); |
| } else { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); |
| NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), |
| shift_amount); |
| NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount); |
| } |
| break; |
| case Instruction::SHR_LONG: |
| case Instruction::SHR_LONG_2ADDR: |
| if (shift_amount == 32) { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); |
| NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); |
| } else if (shift_amount > 31) { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); |
| NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32); |
| NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31); |
| } else { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); |
| NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), |
| shift_amount); |
| NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount); |
| } |
| break; |
| case Instruction::USHR_LONG: |
| case Instruction::USHR_LONG_2ADDR: |
| if (shift_amount == 32) { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); |
| LoadConstant(rl_result.reg.GetHigh(), 0); |
| } else if (shift_amount > 31) { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh()); |
| NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32); |
| LoadConstant(rl_result.reg.GetHigh(), 0); |
| } else { |
| OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetLow()); |
| OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh()); |
| NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), |
| shift_amount); |
| NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount); |
| } |
| break; |
| default: |
| LOG(FATAL) << "Unexpected case"; |
| } |
| } |
| return rl_result; |
| } |
| |
| void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest, |
| RegLocation rl_src, RegLocation rl_shift, int flags) { |
| // Per spec, we only care about low 6 bits of shift amount. |
| int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f; |
| if (shift_amount == 0) { |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| StoreValueWide(rl_dest, rl_src); |
| return; |
| } else if (shift_amount == 1 && |
| (opcode == Instruction::SHL_LONG || opcode == Instruction::SHL_LONG_2ADDR)) { |
| // Need to handle this here to avoid calling StoreValueWide twice. |
| GenArithOpLong(Instruction::ADD_LONG, rl_dest, rl_src, rl_src, flags); |
| return; |
| } |
| if (PartiallyIntersects(rl_src, rl_dest)) { |
| GenShiftOpLong(opcode, rl_dest, rl_src, rl_shift); |
| return; |
| } |
| rl_src = LoadValueWide(rl_src, kCoreReg); |
| RegLocation rl_result = GenShiftImmOpLong(opcode, rl_dest, rl_src, shift_amount, flags); |
| StoreValueWide(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, |
| RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2, |
| int flags) { |
| bool isConstSuccess = false; |
| switch (opcode) { |
| case Instruction::ADD_LONG: |
| case Instruction::AND_LONG: |
| case Instruction::OR_LONG: |
| case Instruction::XOR_LONG: |
| if (rl_src2.is_const) { |
| isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); |
| } else { |
| DCHECK(rl_src1.is_const); |
| isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); |
| } |
| break; |
| case Instruction::SUB_LONG: |
| case Instruction::SUB_LONG_2ADDR: |
| if (rl_src2.is_const) { |
| isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); |
| } else { |
| GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); |
| isConstSuccess = true; |
| } |
| break; |
| case Instruction::ADD_LONG_2ADDR: |
| case Instruction::OR_LONG_2ADDR: |
| case Instruction::XOR_LONG_2ADDR: |
| case Instruction::AND_LONG_2ADDR: |
| if (rl_src2.is_const) { |
| if (GenerateTwoOperandInstructions()) { |
| isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode); |
| } else { |
| isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode); |
| } |
| } else { |
| DCHECK(rl_src1.is_const); |
| isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode); |
| } |
| break; |
| default: |
| isConstSuccess = false; |
| break; |
| } |
| |
| if (!isConstSuccess) { |
| // Default - bail to non-const handler. |
| GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags); |
| } |
| } |
| |
| bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) { |
| switch (op) { |
| case Instruction::AND_LONG_2ADDR: |
| case Instruction::AND_LONG: |
| return value == -1; |
| case Instruction::OR_LONG: |
| case Instruction::OR_LONG_2ADDR: |
| case Instruction::XOR_LONG: |
| case Instruction::XOR_LONG_2ADDR: |
| return value == 0; |
| default: |
| return false; |
| } |
| } |
| |
| X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocation rhs, |
| bool is_high_op) { |
| bool rhs_in_mem = rhs.location != kLocPhysReg; |
| bool dest_in_mem = dest.location != kLocPhysReg; |
| bool is64Bit = cu_->target64; |
| DCHECK(!rhs_in_mem || !dest_in_mem); |
| switch (op) { |
| case Instruction::ADD_LONG: |
| case Instruction::ADD_LONG_2ADDR: |
| if (dest_in_mem) { |
| return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR; |
| } else if (rhs_in_mem) { |
| return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM; |
| } |
| return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR; |
| case Instruction::SUB_LONG: |
| case Instruction::SUB_LONG_2ADDR: |
| if (dest_in_mem) { |
| return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR; |
| } else if (rhs_in_mem) { |
| return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM; |
| } |
| return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR; |
| case Instruction::AND_LONG_2ADDR: |
| case Instruction::AND_LONG: |
| if (dest_in_mem) { |
| return is64Bit ? kX86And64MR : kX86And32MR; |
| } |
| if (is64Bit) { |
| return rhs_in_mem ? kX86And64RM : kX86And64RR; |
| } |
| return rhs_in_mem ? kX86And32RM : kX86And32RR; |
| case Instruction::OR_LONG: |
| case Instruction::OR_LONG_2ADDR: |
| if (dest_in_mem) { |
| return is64Bit ? kX86Or64MR : kX86Or32MR; |
| } |
| if (is64Bit) { |
| return rhs_in_mem ? kX86Or64RM : kX86Or64RR; |
| } |
| return rhs_in_mem ? kX86Or32RM : kX86Or32RR; |
| case Instruction::XOR_LONG: |
| case Instruction::XOR_LONG_2ADDR: |
| if (dest_in_mem) { |
| return is64Bit ? kX86Xor64MR : kX86Xor32MR; |
| } |
| if (is64Bit) { |
| return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR; |
| } |
| return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR; |
| default: |
| LOG(FATAL) << "Unexpected opcode: " << op; |
| return kX86Add32RR; |
| } |
| } |
| |
| X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op, |
| int32_t value) { |
| bool in_mem = loc.location != kLocPhysReg; |
| bool is64Bit = cu_->target64; |
| bool byte_imm = IS_SIMM8(value); |
| DCHECK(in_mem || !loc.reg.IsFloat()); |
| switch (op) { |
| case Instruction::ADD_LONG: |
| case Instruction::ADD_LONG_2ADDR: |
| if (byte_imm) { |
| if (in_mem) { |
| return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8; |
| } |
| return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8; |
| } |
| if (in_mem) { |
| return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI; |
| } |
| return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI; |
| case Instruction::SUB_LONG: |
| case Instruction::SUB_LONG_2ADDR: |
| if (byte_imm) { |
| if (in_mem) { |
| return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8; |
| } |
| return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8; |
| } |
| if (in_mem) { |
| return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI; |
| } |
| return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI; |
| case Instruction::AND_LONG_2ADDR: |
| case Instruction::AND_LONG: |
| if (byte_imm) { |
| if (is64Bit) { |
| return in_mem ? kX86And64MI8 : kX86And64RI8; |
| } |
| return in_mem ? kX86And32MI8 : kX86And32RI8; |
| } |
| if (is64Bit) { |
| return in_mem ? kX86And64MI : kX86And64RI; |
| } |
| return in_mem ? kX86And32MI : kX86And32RI; |
| case Instruction::OR_LONG: |
| case Instruction::OR_LONG_2ADDR: |
| if (byte_imm) { |
| if (is64Bit) { |
| return in_mem ? kX86Or64MI8 : kX86Or64RI8; |
| } |
| return in_mem ? kX86Or32MI8 : kX86Or32RI8; |
| } |
| if (is64Bit) { |
| return in_mem ? kX86Or64MI : kX86Or64RI; |
| } |
| return in_mem ? kX86Or32MI : kX86Or32RI; |
| case Instruction::XOR_LONG: |
| case Instruction::XOR_LONG_2ADDR: |
| if (byte_imm) { |
| if (is64Bit) { |
| return in_mem ? kX86Xor64MI8 : kX86Xor64RI8; |
| } |
| return in_mem ? kX86Xor32MI8 : kX86Xor32RI8; |
| } |
| if (is64Bit) { |
| return in_mem ? kX86Xor64MI : kX86Xor64RI; |
| } |
| return in_mem ? kX86Xor32MI : kX86Xor32RI; |
| default: |
| LOG(FATAL) << "Unexpected opcode: " << op; |
| return kX86Add32MI; |
| } |
| } |
| |
| bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) { |
| DCHECK(rl_src.is_const); |
| int64_t val = mir_graph_->ConstantValueWide(rl_src); |
| |
| if (cu_->target64) { |
| // We can do with imm only if it fits 32 bit |
| if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { |
| return false; |
| } |
| |
| rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); |
| |
| if ((rl_dest.location == kLocDalvikFrame) || |
| (rl_dest.location == kLocCompilerTemp)) { |
| int r_base = rs_rX86_SP.GetReg(); |
| int displacement = SRegOffset(rl_dest.s_reg_low); |
| |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| X86OpCode x86op = GetOpcode(op, rl_dest, false, val); |
| LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| false /* is_load */, true /* is64bit */); |
| return true; |
| } |
| |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| DCHECK_EQ(rl_result.location, kLocPhysReg); |
| DCHECK(!rl_result.reg.IsFloat()); |
| |
| X86OpCode x86op = GetOpcode(op, rl_result, false, val); |
| NewLIR2(x86op, rl_result.reg.GetReg(), val); |
| |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| int32_t val_lo = Low32Bits(val); |
| int32_t val_hi = High32Bits(val); |
| rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); |
| |
| // Can we just do this into memory? |
| if ((rl_dest.location == kLocDalvikFrame) || |
| (rl_dest.location == kLocCompilerTemp)) { |
| int r_base = rs_rX86_SP.GetReg(); |
| int displacement = SRegOffset(rl_dest.s_reg_low); |
| |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| if (!IsNoOp(op, val_lo)) { |
| X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); |
| LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val_lo); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2, |
| false /* is_load */, true /* is64bit */); |
| } |
| if (!IsNoOp(op, val_hi)) { |
| X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); |
| LIR *lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, val_hi); |
| AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is64bit */); |
| AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2, |
| false /* is_load */, true /* is64bit */); |
| } |
| return true; |
| } |
| |
| RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| DCHECK_EQ(rl_result.location, kLocPhysReg); |
| DCHECK(!rl_result.reg.IsFloat()); |
| |
| if (!IsNoOp(op, val_lo)) { |
| X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); |
| NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); |
| } |
| if (!IsNoOp(op, val_hi)) { |
| X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); |
| NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); |
| } |
| StoreValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, |
| RegLocation rl_src2, Instruction::Code op) { |
| DCHECK(rl_src2.is_const); |
| int64_t val = mir_graph_->ConstantValueWide(rl_src2); |
| |
| if (cu_->target64) { |
| // We can do with imm only if it fits 32 bit |
| if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) { |
| return false; |
| } |
| if (rl_dest.location == kLocPhysReg && |
| rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) { |
| X86OpCode x86op = GetOpcode(op, rl_dest, false, val); |
| OpRegCopy(rl_dest.reg, rl_src1.reg); |
| NewLIR2(x86op, rl_dest.reg.GetReg(), val); |
| StoreFinalValueWide(rl_dest, rl_dest); |
| return true; |
| } |
| |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| // We need the values to be in a temporary |
| RegLocation rl_result = ForceTempWide(rl_src1); |
| |
| X86OpCode x86op = GetOpcode(op, rl_result, false, val); |
| NewLIR2(x86op, rl_result.reg.GetReg(), val); |
| |
| StoreFinalValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| int32_t val_lo = Low32Bits(val); |
| int32_t val_hi = High32Bits(val); |
| rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg); |
| rl_src1 = UpdateLocWideTyped(rl_src1, kCoreReg); |
| |
| // Can we do this directly into the destination registers? |
| if (rl_dest.location == kLocPhysReg && rl_src1.location == kLocPhysReg && |
| rl_dest.reg.GetLowReg() == rl_src1.reg.GetLowReg() && |
| rl_dest.reg.GetHighReg() == rl_src1.reg.GetHighReg() && !rl_dest.reg.IsFloat()) { |
| if (!IsNoOp(op, val_lo)) { |
| X86OpCode x86op = GetOpcode(op, rl_dest, false, val_lo); |
| NewLIR2(x86op, rl_dest.reg.GetLowReg(), val_lo); |
| } |
| if (!IsNoOp(op, val_hi)) { |
| X86OpCode x86op = GetOpcode(op, rl_dest, true, val_hi); |
| NewLIR2(x86op, rl_dest.reg.GetHighReg(), val_hi); |
| } |
| |
| StoreFinalValueWide(rl_dest, rl_dest); |
| return true; |
| } |
| |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| DCHECK_EQ(rl_src1.location, kLocPhysReg); |
| |
| // We need the values to be in a temporary |
| RegLocation rl_result = ForceTempWide(rl_src1); |
| if (!IsNoOp(op, val_lo)) { |
| X86OpCode x86op = GetOpcode(op, rl_result, false, val_lo); |
| NewLIR2(x86op, rl_result.reg.GetLowReg(), val_lo); |
| } |
| if (!IsNoOp(op, val_hi)) { |
| X86OpCode x86op = GetOpcode(op, rl_result, true, val_hi); |
| NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi); |
| } |
| |
| StoreFinalValueWide(rl_dest, rl_result); |
| return true; |
| } |
| |
| // For final classes there are no sub-classes to check and so we can answer the instance-of |
| // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. |
| void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, |
| RegLocation rl_dest, RegLocation rl_src) { |
| RegLocation object = LoadValue(rl_src, kRefReg); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| RegStorage result_reg = rl_result.reg; |
| |
| // For 32-bit, SETcc only works with EAX..EDX. |
| RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg; |
| if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) { |
| result_reg = AllocateByteRegister(); |
| } |
| |
| // Assume that there is no match. |
| LoadConstant(result_reg, 0); |
| LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); |
| |
| // We will use this register to compare to memory below. |
| // References are 32 bit in memory, and 64 bit in registers (in 64 bit mode). |
| // For this reason, force allocation of a 32 bit register to use, so that the |
| // compare to memory will be done using a 32 bit comparision. |
| // The LoadRefDisp(s) below will work normally, even in 64 bit mode. |
| RegStorage check_class = AllocTemp(); |
| |
| // If Method* is already in a register, we can save a copy. |
| RegLocation rl_method = mir_graph_->GetMethodLoc(); |
| int32_t offset_of_type = mirror::Array::DataOffset( |
| sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + |
| (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx); |
| |
| if (rl_method.location == kLocPhysReg) { |
| if (use_declaring_class) { |
| LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), |
| check_class, kNotVolatile); |
| } else { |
| LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), |
| check_class, kNotVolatile); |
| LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); |
| } |
| } else { |
| LoadCurrMethodDirect(check_class); |
| if (use_declaring_class) { |
| LoadRefDisp(check_class, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), |
| check_class, kNotVolatile); |
| } else { |
| LoadRefDisp(check_class, mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), |
| check_class, kNotVolatile); |
| LoadRefDisp(check_class, offset_of_type, check_class, kNotVolatile); |
| } |
| } |
| |
| // Compare the computed class to the class in the object. |
| DCHECK_EQ(object.location, kLocPhysReg); |
| OpRegMem(kOpCmp, check_class, object.reg, mirror::Object::ClassOffset().Int32Value()); |
| |
| // Set the low byte of the result to 0 or 1 from the compare condition code. |
| NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondEq); |
| |
| LIR* target = NewLIR0(kPseudoTargetLabel); |
| null_branchover->target = target; |
| FreeTemp(check_class); |
| if (IsTemp(result_reg)) { |
| OpRegCopy(rl_result.reg, result_reg); |
| FreeTemp(result_reg); |
| } |
| StoreValue(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, |
| RegLocation rl_lhs, RegLocation rl_rhs, int flags) { |
| OpKind op = kOpBkpt; |
| bool is_div_rem = false; |
| bool unary = false; |
| bool shift_op = false; |
| bool is_two_addr = false; |
| RegLocation rl_result; |
| switch (opcode) { |
| case Instruction::NEG_INT: |
| op = kOpNeg; |
| unary = true; |
| break; |
| case Instruction::NOT_INT: |
| op = kOpMvn; |
| unary = true; |
| break; |
| case Instruction::ADD_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::ADD_INT: |
| op = kOpAdd; |
| break; |
| case Instruction::SUB_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::SUB_INT: |
| op = kOpSub; |
| break; |
| case Instruction::MUL_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::MUL_INT: |
| op = kOpMul; |
| break; |
| case Instruction::DIV_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::DIV_INT: |
| op = kOpDiv; |
| is_div_rem = true; |
| break; |
| /* NOTE: returns in kArg1 */ |
| case Instruction::REM_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::REM_INT: |
| op = kOpRem; |
| is_div_rem = true; |
| break; |
| case Instruction::AND_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::AND_INT: |
| op = kOpAnd; |
| break; |
| case Instruction::OR_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::OR_INT: |
| op = kOpOr; |
| break; |
| case Instruction::XOR_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::XOR_INT: |
| op = kOpXor; |
| break; |
| case Instruction::SHL_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::SHL_INT: |
| shift_op = true; |
| op = kOpLsl; |
| break; |
| case Instruction::SHR_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::SHR_INT: |
| shift_op = true; |
| op = kOpAsr; |
| break; |
| case Instruction::USHR_INT_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::USHR_INT: |
| shift_op = true; |
| op = kOpLsr; |
| break; |
| default: |
| LOG(FATAL) << "Invalid word arith op: " << opcode; |
| } |
| |
| // Can we convert to a two address instruction? |
| if (!is_two_addr && |
| (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == |
| mir_graph_->SRegToVReg(rl_lhs.s_reg_low))) { |
| is_two_addr = true; |
| } |
| |
| if (!GenerateTwoOperandInstructions()) { |
| is_two_addr = false; |
| } |
| |
| // Get the div/rem stuff out of the way. |
| if (is_div_rem) { |
| rl_result = GenDivRem(rl_dest, rl_lhs, rl_rhs, op == kOpDiv, flags); |
| StoreValue(rl_dest, rl_result); |
| return; |
| } |
| |
| // If we generate any memory access below, it will reference a dalvik reg. |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| |
| if (unary) { |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_result = UpdateLocTyped(rl_dest, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegReg(op, rl_result.reg, rl_lhs.reg); |
| } else { |
| if (shift_op) { |
| // X86 doesn't require masking and must use ECX. |
| RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX |
| LoadValueDirectFixed(rl_rhs, t_reg); |
| if (is_two_addr) { |
| // Can we do this directly into memory? |
| rl_result = UpdateLocTyped(rl_dest, kCoreReg); |
| if (rl_result.location != kLocPhysReg) { |
| // Okay, we can do this into memory |
| OpMemReg(op, rl_result, t_reg.GetReg()); |
| FreeTemp(t_reg); |
| return; |
| } else if (!rl_result.reg.IsFloat()) { |
| // Can do this directly into the result register |
| OpRegReg(op, rl_result.reg, t_reg); |
| FreeTemp(t_reg); |
| StoreFinalValue(rl_dest, rl_result); |
| return; |
| } |
| } |
| // Three address form, or we can't do directly. |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_lhs.reg, t_reg); |
| FreeTemp(t_reg); |
| } else { |
| // Multiply is 3 operand only (sort of). |
| if (is_two_addr && op != kOpMul) { |
| // Can we do this directly into memory? |
| rl_result = UpdateLocTyped(rl_dest, kCoreReg); |
| if (rl_result.location == kLocPhysReg) { |
| // Ensure res is in a core reg |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| // Can we do this from memory directly? |
| rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg); |
| if (rl_rhs.location != kLocPhysReg) { |
| OpRegMem(op, rl_result.reg, rl_rhs); |
| StoreFinalValue(rl_dest, rl_result); |
| return; |
| } else if (!rl_rhs.reg.IsFloat()) { |
| OpRegReg(op, rl_result.reg, rl_rhs.reg); |
| StoreFinalValue(rl_dest, rl_result); |
| return; |
| } |
| } |
| rl_rhs = LoadValue(rl_rhs, kCoreReg); |
| // It might happen rl_rhs and rl_dest are the same VR |
| // in this case rl_dest is in reg after LoadValue while |
| // rl_result is not updated yet, so do this |
| rl_result = UpdateLocTyped(rl_dest, kCoreReg); |
| if (rl_result.location != kLocPhysReg) { |
| // Okay, we can do this into memory. |
| OpMemReg(op, rl_result, rl_rhs.reg.GetReg()); |
| return; |
| } else if (!rl_result.reg.IsFloat()) { |
| // Can do this directly into the result register. |
| OpRegReg(op, rl_result.reg, rl_rhs.reg); |
| StoreFinalValue(rl_dest, rl_result); |
| return; |
| } else { |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); |
| } |
| } else { |
| // Try to use reg/memory instructions. |
| rl_lhs = UpdateLocTyped(rl_lhs, kCoreReg); |
| rl_rhs = UpdateLocTyped(rl_rhs, kCoreReg); |
| // We can't optimize with FP registers. |
| if (!IsOperationSafeWithoutTemps(rl_lhs, rl_rhs)) { |
| // Something is difficult, so fall back to the standard case. |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_rhs = LoadValue(rl_rhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); |
| } else { |
| // We can optimize by moving to result and using memory operands. |
| if (rl_rhs.location != kLocPhysReg) { |
| // Force LHS into result. |
| // We should be careful with order here |
| // If rl_dest and rl_lhs points to the same VR we should load first |
| // If the are different we should find a register first for dest |
| if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == |
| mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) { |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| // No-op if these are the same. |
| OpRegCopy(rl_result.reg, rl_lhs.reg); |
| } else { |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| LoadValueDirect(rl_lhs, rl_result.reg); |
| } |
| OpRegMem(op, rl_result.reg, rl_rhs); |
| } else if (rl_lhs.location != kLocPhysReg) { |
| // RHS is in a register; LHS is in memory. |
| if (op != kOpSub) { |
| // Force RHS into result and operate on memory. |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegCopy(rl_result.reg, rl_rhs.reg); |
| OpRegMem(op, rl_result.reg, rl_lhs); |
| } else { |
| // Subtraction isn't commutative. |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_rhs = LoadValue(rl_rhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); |
| } |
| } else { |
| // Both are in registers. |
| rl_lhs = LoadValue(rl_lhs, kCoreReg); |
| rl_rhs = LoadValue(rl_rhs, kCoreReg); |
| rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_lhs.reg, rl_rhs.reg); |
| } |
| } |
| } |
| } |
| } |
| StoreValue(rl_dest, rl_result); |
| } |
| |
| bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_rhs) { |
| // If we have non-core registers, then we can't do good things. |
| if (rl_lhs.location == kLocPhysReg && rl_lhs.reg.IsFloat()) { |
| return false; |
| } |
| if (rl_rhs.location == kLocPhysReg && rl_rhs.reg.IsFloat()) { |
| return false; |
| } |
| |
| // Everything will be fine :-). |
| return true; |
| } |
| |
| void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) { |
| if (!cu_->target64) { |
| Mir2Lir::GenIntToLong(rl_dest, rl_src); |
| return; |
| } |
| rl_src = UpdateLocTyped(rl_src, kCoreReg); |
| RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); |
| if (rl_src.location == kLocPhysReg) { |
| NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg()); |
| } else { |
| int displacement = SRegOffset(rl_src.s_reg_low); |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(), |
| displacement + LOWORD_OFFSET); |
| AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2, |
| true /* is_load */, true /* is_64bit */); |
| } |
| StoreValueWide(rl_dest, rl_result); |
| } |
| |
| void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, |
| RegLocation rl_src1, RegLocation rl_shift) { |
| if (!cu_->target64) { |
| // Long shift operations in 32-bit. Use shld or shrd to create a 32-bit register filled from |
| // the other half, shift the other half, if the shift amount is less than 32 we're done, |
| // otherwise move one register to the other and place zero or sign bits in the other. |
| LIR* branch; |
| FlushAllRegs(); |
| LockCallTemps(); |
| LoadValueDirectFixed(rl_shift, rs_rCX); |
| RegStorage r_tmp = RegStorage::MakeRegPair(rs_rAX, rs_rDX); |
| LoadValueDirectWideFixed(rl_src1, r_tmp); |
| switch (opcode) { |
| case Instruction::SHL_LONG: |
| case Instruction::SHL_LONG_2ADDR: |
| NewLIR3(kX86Shld32RRC, r_tmp.GetHighReg(), r_tmp.GetLowReg(), rs_rCX.GetReg()); |
| NewLIR2(kX86Sal32RC, r_tmp.GetLowReg(), rs_rCX.GetReg()); |
| NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); |
| branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); |
| OpRegCopy(r_tmp.GetHigh(), r_tmp.GetLow()); |
| LoadConstant(r_tmp.GetLow(), 0); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| break; |
| case Instruction::SHR_LONG: |
| case Instruction::SHR_LONG_2ADDR: |
| NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), rs_rCX.GetReg()); |
| NewLIR2(kX86Sar32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); |
| NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); |
| branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); |
| OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); |
| NewLIR2(kX86Sar32RI, r_tmp.GetHighReg(), 31); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| break; |
| case Instruction::USHR_LONG: |
| case Instruction::USHR_LONG_2ADDR: |
| NewLIR3(kX86Shrd32RRC, r_tmp.GetLowReg(), r_tmp.GetHighReg(), |
| rs_rCX.GetReg()); |
| NewLIR2(kX86Shr32RC, r_tmp.GetHighReg(), rs_rCX.GetReg()); |
| NewLIR2(kX86Test8RI, rs_rCX.GetReg(), 32); |
| branch = NewLIR2(kX86Jcc8, 0, kX86CondZ); |
| OpRegCopy(r_tmp.GetLow(), r_tmp.GetHigh()); |
| LoadConstant(r_tmp.GetHigh(), 0); |
| branch->target = NewLIR0(kPseudoTargetLabel); |
| break; |
| default: |
| LOG(FATAL) << "Unexpected case: " << opcode; |
| return; |
| } |
| RegLocation rl_result = LocCReturnWide(); |
| StoreValueWide(rl_dest, rl_result); |
| return; |
| } |
| |
| bool is_two_addr = false; |
| OpKind op = kOpBkpt; |
| RegLocation rl_result; |
| |
| switch (opcode) { |
| case Instruction::SHL_LONG_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::SHL_LONG: |
| op = kOpLsl; |
| break; |
| case Instruction::SHR_LONG_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::SHR_LONG: |
| op = kOpAsr; |
| break; |
| case Instruction::USHR_LONG_2ADDR: |
| is_two_addr = true; |
| FALLTHROUGH_INTENDED; |
| case Instruction::USHR_LONG: |
| op = kOpLsr; |
| break; |
| default: |
| op = kOpBkpt; |
| } |
| |
| // X86 doesn't require masking and must use ECX. |
| RegStorage t_reg = TargetReg(kCount, kNotWide); // rCX |
| LoadValueDirectFixed(rl_shift, t_reg); |
| if (is_two_addr) { |
| // Can we do this directly into memory? |
| rl_result = UpdateLocWideTyped(rl_dest, kCoreReg); |
| if (rl_result.location != kLocPhysReg) { |
| // Okay, we can do this into memory |
| ScopedMemRefType mem_ref_type(this, ResourceMask::kDalvikReg); |
| OpMemReg(op, rl_result, t_reg.GetReg()); |
| } else if (!rl_result.reg.IsFloat()) { |
| // Can do this directly into the result register |
| OpRegReg(op, rl_result.reg, t_reg); |
| StoreFinalValueWide(rl_dest, rl_result); |
| } |
| } else { |
| // Three address form, or we can't do directly. |
| rl_src1 = LoadValueWide(rl_src1, kCoreReg); |
| rl_result = EvalLocWide(rl_dest, kCoreReg, true); |
| OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg); |
| StoreFinalValueWide(rl_dest, rl_result); |
| } |
| |
| FreeTemp(t_reg); |
| } |
| |
| } // namespace art |