diff options
| author | 2014-08-07 21:40:57 +0000 | |
|---|---|---|
| committer | 2014-08-07 15:32:16 +0000 | |
| commit | 25904e30e8bb2d131cbcfa67b1e5481bbf35cffd (patch) | |
| tree | 784a6e20550001c377ee27442e521ab78cd06496 | |
| parent | 52fed5a8f31dac64d09ede03703f40b712a7d766 (diff) | |
| parent | 947717a2b085f36ea007ac64f728e19ff1c8db0b (diff) | |
Merge "Add arraycopy intrinsic for arm and arm64."
| -rw-r--r-- | compiler/dex/quick/arm/codegen_arm.h | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/arm/int_arm.cc | 94 | ||||
| -rw-r--r-- | compiler/dex/quick/arm64/codegen_arm64.h | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/arm64/int_arm64.cc | 116 |
4 files changed, 212 insertions, 0 deletions
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index 072acbeaa7..cd6c9cc1e1 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -108,6 +108,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { bool GenInlinedSqrt(CallInfo* info); bool GenInlinedPeek(CallInfo* info, OpSize size); bool GenInlinedPoke(CallInfo* info, OpSize size); + bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE; RegLocation GenDivRem(RegLocation rl_dest, RegStorage reg_lo, RegStorage reg_hi, bool is_div); RegLocation GenDivRemLit(RegLocation rl_dest, RegStorage reg_lo, int lit, bool is_div); void GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2); diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 6711ab36a1..b9a17cceb9 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -949,6 +949,100 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { return true; } +bool ArmMir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { + constexpr int kLargeArrayThreshold = 256; + + RegLocation rl_src = info->args[0]; + RegLocation rl_src_pos = info->args[1]; + RegLocation rl_dst = info->args[2]; + RegLocation rl_dst_pos = info->args[3]; + RegLocation rl_length = info->args[4]; + // Compile time check, handle exception by non-inline method to reduce related meta-data. + if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) || + (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) || + (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) { + return false; + } + + ClobberCallerSave(); + LockCallTemps(); // Prepare for explicit register usage. + LockTemp(rs_r12); + RegStorage rs_src = rs_r0; + RegStorage rs_dst = rs_r1; + LoadValueDirectFixed(rl_src, rs_src); + LoadValueDirectFixed(rl_dst, rs_dst); + + // Handle null pointer exception in slow-path. + LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr); + LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr); + // Handle potential overlapping in slow-path. + LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr); + // Handle exception or big length in slow-path. + RegStorage rs_length = rs_r2; + LoadValueDirectFixed(rl_length, rs_length); + LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr); + // Src bounds check. + RegStorage rs_pos = rs_r3; + RegStorage rs_arr_length = rs_r12; + LoadValueDirectFixed(rl_src_pos, rs_pos); + LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr); + Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); + OpRegReg(kOpSub, rs_arr_length, rs_pos); + LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); + // Dst bounds check. + LoadValueDirectFixed(rl_dst_pos, rs_pos); + LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_pos, 0, nullptr); + Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); + OpRegReg(kOpSub, rs_arr_length, rs_pos); + LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); + + // Everything is checked now. + OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value()); + OpRegReg(kOpAdd, rs_dst, rs_pos); + OpRegReg(kOpAdd, rs_dst, rs_pos); + OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value()); + LoadValueDirectFixed(rl_src_pos, rs_pos); + OpRegReg(kOpAdd, rs_src, rs_pos); + OpRegReg(kOpAdd, rs_src, rs_pos); + + RegStorage rs_tmp = rs_pos; + OpRegRegImm(kOpLsl, rs_length, rs_length, 1); + + // Copy one element. + OpRegRegImm(kOpAnd, rs_tmp, rs_length, 2); + LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); + OpRegImm(kOpSub, rs_length, 2); + LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf); + StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf); + + // Copy two elements. + LIR *begin_loop = NewLIR0(kPseudoTargetLabel); + LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr); + OpRegImm(kOpSub, rs_length, 4); + LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32); + StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32); + OpUnconditionalBranch(begin_loop); + + LIR *check_failed = NewLIR0(kPseudoTargetLabel); + LIR* launchpad_branch = OpUnconditionalBranch(nullptr); + LIR* return_point = NewLIR0(kPseudoTargetLabel); + + src_check_branch->target = check_failed; + dst_check_branch->target = check_failed; + src_dst_same->target = check_failed; + len_neg_or_too_big->target = check_failed; + src_pos_negative->target = check_failed; + src_bad_len->target = check_failed; + dst_pos_negative->target = check_failed; + dst_bad_len->target = check_failed; + jmp_to_begin_loop->target = begin_loop; + jmp_to_ret->target = return_point; + + AddIntrinsicSlowPath(info, launchpad_branch, return_point); + + return true; +} + LIR* ArmMir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { return RawLIR(current_dalvik_offset_, kThumb2LdrPcRel12, reg.GetReg(), 0, 0, 0, 0, target); } diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index 2cd24c6874..3e1c18baf4 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -168,6 +168,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { bool GenInlinedPeek(CallInfo* info, OpSize size) OVERRIDE; bool GenInlinedPoke(CallInfo* info, OpSize size) OVERRIDE; bool GenInlinedAbsLong(CallInfo* info) OVERRIDE; + bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE; void GenIntToLong(RegLocation rl_dest, RegLocation rl_src) OVERRIDE; void GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) OVERRIDE; diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 147fee8436..d00c57dee9 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -272,6 +272,7 @@ LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_ ArmOpcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0); branch = NewLIR2(opcode | wide, reg.GetReg(), 0); } + // TODO: Use tbz/tbnz for < 0 or >= 0. } if (branch == nullptr) { @@ -788,6 +789,121 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { return true; } +bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { + constexpr int kLargeArrayThreshold = 512; + + RegLocation rl_src = info->args[0]; + RegLocation rl_src_pos = info->args[1]; + RegLocation rl_dst = info->args[2]; + RegLocation rl_dst_pos = info->args[3]; + RegLocation rl_length = info->args[4]; + // Compile time check, handle exception by non-inline method to reduce related meta-data. + if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) || + (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) || + (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) { + return false; + } + + ClobberCallerSave(); + LockCallTemps(); // Prepare for explicit register usage. + RegStorage rs_src = rs_x0; + RegStorage rs_dst = rs_x1; + LoadValueDirectFixed(rl_src, rs_src); + LoadValueDirectFixed(rl_dst, rs_dst); + + // Handle null pointer exception in slow-path. + LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr); + LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr); + // Handle potential overlapping in slow-path. + // TUNING: Support overlapping cases. + LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr); + // Handle exception or big length in slow-path. + RegStorage rs_length = rs_w2; + LoadValueDirectFixed(rl_length, rs_length); + LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr); + // Src bounds check. + RegStorage rs_src_pos = rs_w3; + RegStorage rs_arr_length = rs_w4; + LoadValueDirectFixed(rl_src_pos, rs_src_pos); + LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr); + Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); + OpRegReg(kOpSub, rs_arr_length, rs_src_pos); + LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); + // Dst bounds check. + RegStorage rs_dst_pos = rs_w5; + LoadValueDirectFixed(rl_dst_pos, rs_dst_pos); + LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr); + Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length); + OpRegReg(kOpSub, rs_arr_length, rs_dst_pos); + LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr); + + // Everything is checked now. + // Set rs_src to the address of the first element to be copied. + rs_src_pos = As64BitReg(rs_src_pos); + OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value()); + OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1); + OpRegReg(kOpAdd, rs_src, rs_src_pos); + // Set rs_src to the address of the first element to be copied. + rs_dst_pos = As64BitReg(rs_dst_pos); + OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value()); + OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1); + OpRegReg(kOpAdd, rs_dst, rs_dst_pos); + + // rs_arr_length won't be not used anymore. + RegStorage rs_tmp = rs_arr_length; + // Use 64-bit view since rs_length will be used as index. + rs_length = As64BitReg(rs_length); + OpRegRegImm(kOpLsl, rs_length, rs_length, 1); + + // Copy one element. + OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 2); + LIR* jmp_to_copy_two = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); + OpRegImm(kOpSub, rs_length, 2); + LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf); + StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf); + + // Copy two elements. + LIR *copy_two = NewLIR0(kPseudoTargetLabel); + OpRegRegImm(kOpAnd, rs_tmp, As32BitReg(rs_length), 4); + LIR* jmp_to_copy_four = OpCmpImmBranch(kCondEq, rs_tmp, 0, nullptr); + OpRegImm(kOpSub, rs_length, 4); + LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32); + StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32); + + // Copy four elements. + LIR *copy_four = NewLIR0(kPseudoTargetLabel); + LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr); + LIR *begin_loop = NewLIR0(kPseudoTargetLabel); + OpRegImm(kOpSub, rs_length, 8); + rs_tmp = As64BitReg(rs_tmp); + LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64); + StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64); + LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr); + LIR* loop_finished = OpUnconditionalBranch(nullptr); + + LIR *check_failed = NewLIR0(kPseudoTargetLabel); + LIR* launchpad_branch = OpUnconditionalBranch(nullptr); + LIR* return_point = NewLIR0(kPseudoTargetLabel); + + src_check_branch->target = check_failed; + dst_check_branch->target = check_failed; + src_dst_same->target = check_failed; + len_neg_or_too_big->target = check_failed; + src_pos_negative->target = check_failed; + src_bad_len->target = check_failed; + dst_pos_negative->target = check_failed; + dst_bad_len->target = check_failed; + jmp_to_copy_two->target = copy_two; + jmp_to_copy_four->target = copy_four; + jmp_to_ret->target = return_point; + jmp_to_loop->target = begin_loop; + loop_finished->target = return_point; + + AddIntrinsicSlowPath(info, launchpad_branch, return_point); + + return true; +} + LIR* Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) { ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral); return RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp), reg.GetReg(), 0, 0, 0, 0, target); |