diff options
Diffstat (limited to 'compiler')
25 files changed, 452 insertions, 182 deletions
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 45cf2fba7f..e11f61a285 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -293,16 +293,12 @@ class CommonCompilerTest : public CommonRuntimeTest { ASSERT_LE(instruction_set_features, runtime_features); #elif defined(__aarch64__) instruction_set = kArm64; - // TODO: arm64 compilation support. - compiler_options_->SetCompilerFilter(CompilerOptions::kInterpretOnly); #elif defined(__mips__) instruction_set = kMips; #elif defined(__i386__) instruction_set = kX86; #elif defined(__x86_64__) instruction_set = kX86_64; - // TODO: x86_64 compilation support. - compiler_options_->SetCompilerFilter(CompilerOptions::kInterpretOnly); #endif runtime_->SetInstructionSet(instruction_set); diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index 07bbf15e00..dc6043dd65 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -868,7 +868,9 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, cu.disable_opt |= (1 << kLoadStoreElimination); } else if (cu.instruction_set == kArm64) { // TODO(Arm64): enable optimizations once backend is mature enough. - cu.disable_opt = ~(uint32_t)0; + cu.disable_opt = ~((1 << kSuppressMethodInlining) | + (1 << kNullCheckElimination) | + (1 << kPromoteRegs)); } cu.StartTimingSplit("BuildMIRGraph"); diff --git a/compiler/dex/quick/arm64/assemble_arm64.cc b/compiler/dex/quick/arm64/assemble_arm64.cc index 284593bf2c..e10f7cfe67 100644 --- a/compiler/dex/quick/arm64/assemble_arm64.cc +++ b/compiler/dex/quick/arm64/assemble_arm64.cc @@ -170,7 +170,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { ENCODING_MAP(WIDE(kA64Cbz2rt), SF_VARIANTS(0x34000000), kFmtRegR, 4, 0, kFmtBitBlt, 23, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, - IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, + IS_BINARY_OP | REG_USE0 | IS_BRANCH | NEEDS_FIXUP, "cbz", "!0r, !1t", kFixupCBxZ), ENCODING_MAP(WIDE(kA64Cmn3rro), SF_VARIANTS(0x2b00001f), kFmtRegR, 9, 5, kFmtRegR, 20, 16, kFmtShift, -1, -1, @@ -288,7 +288,7 @@ const ArmEncodingMap Arm64Mir2Lir::EncodingMap[kA64Last] = { kFmtRegW, 4, 0, kFmtRegS, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0w, !1s", kFixupNone), - ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e6e0000), + ENCODING_MAP(kA64Fmov2xS, NO_VARIANTS(0x9e660000), kFmtRegX, 4, 0, kFmtRegD, 9, 5, kFmtUnused, -1, -1, kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1, "fmov", "!0x, !1S", kFixupNone), diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index b070c8a289..7d75da91d8 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -78,7 +78,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { OVERRIDE; LIR* LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) OVERRIDE; - LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) OVERRIDE; + LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale) + OVERRIDE; LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size) OVERRIDE; LIR* LoadConstantNoClobber(RegStorage r_dest, int value); @@ -89,7 +90,8 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { VolatileKind is_volatile) OVERRIDE; LIR* StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) OVERRIDE; - LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) OVERRIDE; + LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale) + OVERRIDE; LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_src, OpSize size) OVERRIDE; void MarkGCCard(RegStorage val_reg, RegStorage tgt_addr_reg) OVERRIDE; @@ -177,6 +179,7 @@ class Arm64Mir2Lir FINAL : public Mir2Lir { RegLocation rl_src2); void GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src); bool GenInlinedReverseBits(CallInfo* info, OpSize size); + bool GenInlinedAbsDouble(CallInfo* info) OVERRIDE; bool GenInlinedCas(CallInfo* info, bool is_long, bool is_object); bool GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long); bool GenInlinedMinMaxFP(CallInfo* info, bool is_min, bool is_double); diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 0f9de5b604..6594c4b7a7 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -323,6 +323,16 @@ void Arm64Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { StoreValueWide(rl_dest, rl_result); } +bool Arm64Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { + RegLocation rl_src = info->args[0]; + rl_src = LoadValueWide(rl_src, kCoreReg); + RegLocation rl_dest = InlineTargetWide(info); + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + NewLIR4(WIDE(kA64Ubfm4rrdd), rl_result.reg.GetReg(), rl_src.reg.GetReg(), 0, 62); + StoreValueWide(rl_dest, rl_result); + return true; +} + bool Arm64Mir2Lir::GenInlinedSqrt(CallInfo* info) { RegLocation rl_src = info->args[0]; RegLocation rl_dest = InlineTargetWide(info); // double place for result diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index bab549955c..e8f5cb9f09 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -451,9 +451,8 @@ bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) { bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] ? - RegLocation rl_dest = InlineTarget(info); - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info); + RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile); @@ -468,9 +467,8 @@ bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) { bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) { RegLocation rl_src_address = info->args[0]; // long address - rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[1] RegLocation rl_src_value = info->args[2]; // [size] value - RegLocation rl_address = LoadValue(rl_src_address, kCoreReg); // kRefReg + RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg); RegLocation rl_value; if (size == k64) { @@ -497,11 +495,9 @@ void Arm64Mir2Lir::OpTlsCmp(ThreadOffset<8> offset, int val) { bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { DCHECK_EQ(cu_->instruction_set, kArm64); - ArmOpcode wide = is_long ? WIDE(0) : UNWIDE(0); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low - rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] //TODO: do we really need this RegLocation rl_src_expected = info->args[4]; // int, long or Object // If is_long, high half is in info->args[5] RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object @@ -510,7 +506,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // Load Object and offset RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); - RegLocation rl_offset = LoadValue(rl_src_offset, kRefReg); + RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg); RegLocation rl_new_value; RegLocation rl_expected; @@ -542,28 +538,38 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // result = tmp != 0; RegStorage r_tmp; + RegStorage r_tmp_stored; + RegStorage rl_new_value_stored = rl_new_value.reg; + ArmOpcode wide = UNWIDE(0); if (is_long) { - r_tmp = AllocTempWide(); + r_tmp_stored = r_tmp = AllocTempWide(); + wide = WIDE(0); } else if (is_object) { + // References use 64-bit registers, but are stored as compressed 32-bit values. + // This means r_tmp_stored != r_tmp. r_tmp = AllocTempRef(); + r_tmp_stored = As32BitReg(r_tmp); + rl_new_value_stored = As32BitReg(rl_new_value_stored); } else { - r_tmp = AllocTemp(); + r_tmp_stored = r_tmp = AllocTemp(); } + RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp); LIR* loop = NewLIR0(kPseudoTargetLabel); - NewLIR2(kA64Ldaxr2rX | wide, r_tmp.GetReg(), r_ptr.GetReg()); + NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg()); OpRegReg(kOpCmp, r_tmp, rl_expected.reg); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); LIR* early_exit = OpCondBranch(kCondNe, NULL); - - NewLIR3(kA64Stlxr3wrX | wide, As32BitReg(r_tmp).GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg()); - NewLIR3(kA64Cmp3RdT, As32BitReg(r_tmp).GetReg(), 0, ENCODE_NO_SHIFT); + NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg()); + NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT); DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode)); OpCondBranch(kCondNe, loop); + LIR* exit_loop = NewLIR0(kPseudoTargetLabel); + early_exit->target = exit_loop; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - LIR* exit = NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); - early_exit->target = exit; + NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe); FreeTemp(r_tmp); // Now unneeded. FreeTemp(r_ptr); // Now unneeded. @@ -817,7 +823,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, FreeTemp(reg_len); } if (rl_result.ref) { - LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg); + LoadRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale); } else { LoadBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_result.reg, scale, size); } @@ -914,7 +920,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, FreeTemp(reg_len); } if (rl_src.ref) { - StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg); + StoreRefIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale); } else { StoreBaseIndexed(reg_ptr, As64BitReg(rl_index.reg), rl_src.reg, scale, size); } diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 6985de6574..ef9dbddbde 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -1056,8 +1056,8 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn(); int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + 1); - // Fisrt of all, check whether it make sense to use bulk copying - // Optimization is aplicable only for range case + // First of all, check whether it makes sense to use bulk copying. + // Bulk copying is done only for the range case. // TODO: make a constant instead of 2 if (info->is_range && regs_left_to_pass_via_stack >= 2) { // Scan the rest of the args - if in phys_reg flush to memory @@ -1141,7 +1141,6 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, LoadValueDirectWideFixed(rl_arg, regWide); StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64, kNotVolatile); } - i++; } else { if (rl_arg.location == kLocPhysReg) { if (rl_arg.ref) { @@ -1163,6 +1162,9 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, direct_code, direct_method, type); } + if (rl_arg.wide) { + i++; + } } } @@ -1174,12 +1176,14 @@ int Arm64Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, if (reg.Valid()) { if (rl_arg.wide) { LoadValueDirectWideFixed(rl_arg, reg); - i++; } else { LoadValueDirectFixed(rl_arg, reg); } call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx, - direct_code, direct_method, type); + direct_code, direct_method, type); + } + if (rl_arg.wide) { + i++; } } diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index aaee91b817..22a4ec4d49 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -893,7 +893,14 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; r_base = Check64BitReg(r_base); - r_index = Check64BitReg(r_index); + + // TODO(Arm64): The sign extension of r_index should be carried out by using an extended + // register offset load (rather than doing the sign extension in a separate instruction). + if (r_index.Is32Bit()) { + // Assemble: ``sxtw xN, wN''. + r_index = As64BitReg(r_index); + NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); + } if (r_dest.IsFloat()) { if (r_dest.IsDouble()) { @@ -920,8 +927,8 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto opcode = WIDE(kA64Ldr4rXxG); expected_scale = 3; break; - case kSingle: - case k32: + case kSingle: // Intentional fall-through. + case k32: // Intentional fall-through. case kReference: r_dest = Check32BitReg(r_dest); opcode = kA64Ldr4rXxG; @@ -963,8 +970,9 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto return load; } -LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) { - return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), 2, kReference); +LIR* Arm64Mir2Lir::LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, + int scale) { + return LoadBaseIndexed(r_base, r_index, As32BitReg(r_dest), scale, kReference); } LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, @@ -973,7 +981,14 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; r_base = Check64BitReg(r_base); - r_index = Check64BitReg(r_index); + + // TODO(Arm64): The sign extension of r_index should be carried out by using an extended + // register offset store (rather than doing the sign extension in a separate instruction). + if (r_index.Is32Bit()) { + // Assemble: ``sxtw xN, wN''. + r_index = As64BitReg(r_index); + NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31); + } if (r_src.IsFloat()) { if (r_src.IsDouble()) { @@ -1035,8 +1050,9 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt return store; } -LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) { - return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), 2, kReference); +LIR* Arm64Mir2Lir::StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, + int scale) { + return StoreBaseIndexed(r_base, r_index, As32BitReg(r_src), scale, kReference); } /* diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index b699bd3bf2..36456bc4b0 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -34,6 +34,59 @@ namespace art { namespace { // anonymous namespace +static constexpr bool kIntrinsicIsStatic[] = { + true, // kIntrinsicDoubleCvt + true, // kIntrinsicFloatCvt + true, // kIntrinsicReverseBits + true, // kIntrinsicReverseBytes + true, // kIntrinsicAbsInt + true, // kIntrinsicAbsLong + true, // kIntrinsicAbsFloat + true, // kIntrinsicAbsDouble + true, // kIntrinsicMinMaxInt + true, // kIntrinsicMinMaxLong + true, // kIntrinsicMinMaxFloat + true, // kIntrinsicMinMaxDouble + true, // kIntrinsicSqrt + false, // kIntrinsicCharAt + false, // kIntrinsicCompareTo + false, // kIntrinsicIsEmptyOrLength + false, // kIntrinsicIndexOf + true, // kIntrinsicCurrentThread + true, // kIntrinsicPeek + true, // kIntrinsicPoke + false, // kIntrinsicCas + false, // kIntrinsicUnsafeGet + false, // kIntrinsicUnsafePut + true, // kIntrinsicSystemArrayCopyCharArray +}; +COMPILE_ASSERT(arraysize(kIntrinsicIsStatic) == kInlineOpNop, check_arraysize_kIntrinsicIsStatic); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicDoubleCvt], DoubleCvt_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicFloatCvt], FloatCvt_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicReverseBits], ReverseBits_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicReverseBytes], ReverseBytes_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicAbsInt], AbsInt_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicAbsLong], AbsLong_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicAbsFloat], AbsFloat_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicAbsDouble], AbsDouble_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxInt], MinMaxInt_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxLong], MinMaxLong_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxFloat], MinMaxFloat_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicMinMaxDouble], MinMaxDouble_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicSqrt], Sqrt_must_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCharAt], CharAt_must_not_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCompareTo], CompareTo_must_not_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIsEmptyOrLength], IsEmptyOrLength_must_not_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicIndexOf], IndexOf_must_not_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicCurrentThread], CurrentThread_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicPeek], Peek_must_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicPoke], Poke_must_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicCas], Cas_must_not_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicUnsafeGet], UnsafeGet_must_not_be_static); +COMPILE_ASSERT(!kIntrinsicIsStatic[kIntrinsicUnsafePut], UnsafePut_must_not_be_static); +COMPILE_ASSERT(kIntrinsicIsStatic[kIntrinsicSystemArrayCopyCharArray], + SystemArrayCopyCharArray_must_not_be_static); + MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { MIR* insn = mir_graph->NewMIR(); insn->offset = invoke->offset; @@ -84,6 +137,8 @@ const char* const DexFileMethodInliner::kClassCacheNames[] = { "Ljava/lang/Thread;", // kClassCacheJavaLangThread "Llibcore/io/Memory;", // kClassCacheLibcoreIoMemory "Lsun/misc/Unsafe;", // kClassCacheSunMiscUnsafe + "Ljava/lang/System;", // kClassCacheJavaLangSystem + "[C" // kClassCacheJavaLangCharArray }; const char* const DexFileMethodInliner::kNameCacheNames[] = { @@ -129,6 +184,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "putObject", // kNameCachePutObject "putObjectVolatile", // kNameCachePutObjectVolatile "putOrderedObject", // kNameCachePutOrderedObject + "arraycopy", // kNameCacheArrayCopy }; const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = { @@ -204,6 +260,9 @@ const DexFileMethodInliner::ProtoDef DexFileMethodInliner::kProtoCacheDefs[] = { // kProtoCacheObjectJObject_V { kClassCacheVoid, 3, { kClassCacheJavaLangObject, kClassCacheLong, kClassCacheJavaLangObject } }, + // kProtoCacheCharArrayICharArrayII_V + { kClassCacheVoid, 5, {kClassCacheJavaLangCharArray, kClassCacheInt, + kClassCacheJavaLangCharArray, kClassCacheInt, kClassCacheInt}} }; const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods[] = { @@ -291,6 +350,10 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods UNSAFE_GET_PUT(Object, Object, kIntrinsicFlagIsObject), #undef UNSAFE_GET_PUT + INTRINSIC(JavaLangSystem, ArrayCopy, CharArrayICharArrayII_V , kIntrinsicSystemArrayCopyCharArray, + 0), + + #undef INTRINSIC }; @@ -334,6 +397,10 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { } intrinsic = it->second; } + if (kIntrinsicIsStatic[intrinsic.opcode] != (info->type == kStatic)) { + // Invoke type mismatch. + return false; + } switch (intrinsic.opcode) { case kIntrinsicDoubleCvt: return backend->GenInlinedDoubleCvt(info); @@ -387,6 +454,8 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { intrinsic.d.data & kIntrinsicFlagIsObject, intrinsic.d.data & kIntrinsicFlagIsVolatile, intrinsic.d.data & kIntrinsicFlagIsOrdered); + case kIntrinsicSystemArrayCopyCharArray: + return backend->GenInlinedArrayCopyCharArray(info); default: LOG(FATAL) << "Unexpected intrinsic opcode: " << intrinsic.opcode; return false; // avoid warning "control reaches end of non-void function" diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index c7a3b83260..5b3b104150 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -118,6 +118,8 @@ class DexFileMethodInliner { kClassCacheJavaLangThread, kClassCacheLibcoreIoMemory, kClassCacheSunMiscUnsafe, + kClassCacheJavaLangSystem, + kClassCacheJavaLangCharArray, kClassCacheLast }; @@ -170,6 +172,7 @@ class DexFileMethodInliner { kNameCachePutObject, kNameCachePutObjectVolatile, kNameCachePutOrderedObject, + kNameCacheArrayCopy, kNameCacheLast }; @@ -214,6 +217,7 @@ class DexFileMethodInliner { kProtoCacheObjectJJ_V, kProtoCacheObjectJ_Object, kProtoCacheObjectJObject_V, + kProtoCacheCharArrayICharArrayII_V, kProtoCacheLast }; diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 6c670cdeba..02f39ac180 100755 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1280,7 +1280,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { } Load32Disp(rl_obj.reg, offset_offset, reg_off); MarkPossibleNullPointerException(info->opt_flags); - Load32Disp(rl_obj.reg, value_offset, reg_ptr); + LoadRefDisp(rl_obj.reg, value_offset, reg_ptr, kNotVolatile); if (range_check) { // Set up a slow path to allow retry in case of bounds violation */ OpRegReg(kOpCmp, rl_idx.reg, reg_max); @@ -1367,8 +1367,8 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { } bool Mir2Lir::GenInlinedReverseBytes(CallInfo* info, OpSize size) { - if (cu_->instruction_set == kMips) { - // TODO - add Mips implementation + if (cu_->instruction_set == kMips || cu_->instruction_set == kArm64) { + // TODO - add Mips implementation; Enable Arm64. return false; } RegLocation rl_src_i = info->args[0]; @@ -1499,14 +1499,8 @@ bool Mir2Lir::GenInlinedAbsDouble(CallInfo* info) { RegLocation rl_dest = InlineTargetWide(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - if (cu_->instruction_set == kArm64) { - // TODO - Can ecode ? UBXF otherwise - // OpRegRegImm(kOpAnd, rl_result.reg, 0x7fffffffffffffff); - return false; - } else { - OpRegCopyWide(rl_result.reg, rl_src.reg); - OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); - } + OpRegCopyWide(rl_result.reg, rl_src.reg); + OpRegImm(kOpAnd, rl_result.reg.GetHigh(), 0x7fffffff); StoreValueWide(rl_dest, rl_result); return true; } @@ -1533,6 +1527,11 @@ bool Mir2Lir::GenInlinedDoubleCvt(CallInfo* info) { return true; } +bool Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { + return false; +} + + /* * Fast String.indexOf(I) & (II). Tests for simple case of char <= 0xFFFF, * otherwise bails to standard library code. @@ -1651,7 +1650,8 @@ bool Mir2Lir::GenInlinedCurrentThread(CallInfo* info) { break; case kArm64: - Load32Disp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg); + LoadRefDisp(TargetPtrReg(kSelf), Thread::PeerOffset<8>().Int32Value(), rl_result.reg, + kNotVolatile); break; case kX86: @@ -1685,10 +1685,11 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true); + RegLocation rl_result = EvalLoc(rl_dest, LocToRegClass(rl_dest), true); if (is_long) { - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - LoadBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_result.reg, k64); + if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 + || cu_->instruction_set == kArm64) { + LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k64); } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); @@ -1696,7 +1697,11 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, FreeTemp(rl_temp_offset); } } else { - LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32); + if (rl_result.ref) { + LoadRefIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0); + } else { + LoadBaseIndexed(rl_object.reg, rl_offset.reg, rl_result.reg, 0, k32); + } } if (is_volatile) { @@ -1734,8 +1739,9 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, RegLocation rl_value; if (is_long) { rl_value = LoadValueWide(rl_src_value, kCoreReg); - if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { - StoreBaseIndexedDisp(rl_object.reg, rl_offset.reg, 0, 0, rl_value.reg, k64); + if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64 + || cu_->instruction_set == kArm64) { + StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k64); } else { RegStorage rl_temp_offset = AllocTemp(); OpRegRegReg(kOpAdd, rl_temp_offset, rl_object.reg, rl_offset.reg); @@ -1744,7 +1750,11 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, } } else { rl_value = LoadValue(rl_src_value); - StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32); + if (rl_value.ref) { + StoreRefIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0); + } else { + StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32); + } } // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard. @@ -1772,12 +1782,9 @@ void Mir2Lir::GenInvoke(CallInfo* info) { return; } DCHECK(cu_->compiler_driver->GetMethodInlinerMap() != nullptr); - // Temporary disable intrinsics for Arm64. We will enable them later step by step. - if (cu_->instruction_set != kArm64) { - if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file) - ->GenIntrinsic(this, info)) { - return; - } + if (cu_->compiler_driver->GetMethodInlinerMap()->GetMethodInliner(cu_->dex_file) + ->GenIntrinsic(this, info)) { + return; } GenInvokeNoInline(info); } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 6768790d19..48855012c3 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -992,6 +992,7 @@ class Mir2Lir : public Backend { virtual bool GenInlinedAbsDouble(CallInfo* info); bool GenInlinedFloatCvt(CallInfo* info); bool GenInlinedDoubleCvt(CallInfo* info); + virtual bool GenInlinedArrayCopyCharArray(CallInfo* info); virtual bool GenInlinedIndexOf(CallInfo* info, bool zero_based); bool GenInlinedStringCompareTo(CallInfo* info); bool GenInlinedCurrentThread(CallInfo* info); @@ -1023,8 +1024,9 @@ class Mir2Lir : public Backend { return LoadBaseDisp(r_base, displacement, r_dest, kReference, is_volatile); } // Load a reference at base + index and decompress into register. - virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest) { - return LoadBaseIndexed(r_base, r_index, r_dest, 2, kReference); + virtual LIR* LoadRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, + int scale) { + return LoadBaseIndexed(r_base, r_index, r_dest, scale, kReference); } // Load Dalvik value with 32-bit memory storage. If compressed object reference, decompress. virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind); @@ -1050,8 +1052,9 @@ class Mir2Lir : public Backend { return StoreBaseDisp(r_base, displacement, r_src, kReference, is_volatile); } // Store an uncompressed reference into a compressed 32-bit container by index. - virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src) { - return StoreBaseIndexed(r_base, r_index, r_src, 2, kReference); + virtual LIR* StoreRefIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, + int scale) { + return StoreBaseIndexed(r_base, r_index, r_src, scale, kReference); } // Store 32 bits, regardless of target. virtual LIR* Store32Disp(RegStorage r_base, int displacement, RegStorage r_src) { diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 879cf93bf1..7baf2d9663 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -402,6 +402,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, EXT_0F_ENCODING_MAP(Pxor, 0x66, 0xEF, REG_DEF0_USE0), EXT_0F_ENCODING2_MAP(Phaddw, 0x66, 0x38, 0x01, REG_DEF0_USE0), EXT_0F_ENCODING2_MAP(Phaddd, 0x66, 0x38, 0x02, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Haddpd, 0x66, 0x7C, REG_DEF0_USE0), + EXT_0F_ENCODING_MAP(Haddps, 0xF2, 0x7C, REG_DEF0_USE0), { kX86PextrbRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x3A, 0x14, 0, 0, 1, false }, "PextbRRI", "!0r,!1r,!2d" }, { kX86PextrwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC5, 0x00, 0, 0, 1, false }, "PextwRRI", "!0r,!1r,!2d" }, @@ -410,6 +412,9 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86PshuflwRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0xF2, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuflwRRI", "!0r,!1r,!2d" }, { kX86PshufdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0x70, 0, 0, 0, 1, false }, "PshuffRRI", "!0r,!1r,!2d" }, + { kX86ShufpsRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x00, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpsRRI", "!0r,!1r,!2d" }, + { kX86ShufpdRRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0 | REG_USE1, { 0x66, 0, 0x0F, 0xC6, 0, 0, 0, 1, false }, "kX86ShufpdRRI", "!0r,!1r,!2d" }, + { kX86PsrawRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 4, 0, 1, false }, "PsrawRI", "!0r,!1d" }, { kX86PsradRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x72, 0, 4, 0, 1, false }, "PsradRI", "!0r,!1d" }, { kX86PsrlwRI, kRegImm, IS_BINARY_OP | REG_DEF0_USE0, { 0x66, 0, 0x0F, 0x71, 0, 2, 0, 1, false }, "PsrlwRI", "!0r,!1d" }, @@ -429,7 +434,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Fst64M, kMem, IS_STORE | IS_UNARY_OP | REG_USE0 | USE_FP_STACK, { 0x0, 0, 0xDD, 0x00, 0, 2, 0, 0, false }, "Fstd64M", "[!0r,!1d]" }, { kX86Fprem, kNullary, NO_OPERAND | USE_FP_STACK, { 0xD9, 0, 0xF8, 0, 0, 0, 0, 0, false }, "Fprem64", "" }, { kX86Fucompp, kNullary, NO_OPERAND | USE_FP_STACK, { 0xDA, 0, 0xE9, 0, 0, 0, 0, 0, false }, "Fucompp", "" }, - { kX86Fstsw16R, kNullary, NO_OPERAND | USE_FP_STACK, { 0x9B, 0xDF, 0xE0, 0, 0, 0, 0, 0, false }, "Fstsw16R", "ax" }, + { kX86Fstsw16R, kNullary, NO_OPERAND | REG_DEFA | USE_FP_STACK, { 0x9B, 0xDF, 0xE0, 0, 0, 0, 0, 0, false }, "Fstsw16R", "ax" }, EXT_0F_ENCODING_MAP(Mova128, 0x66, 0x6F, REG_DEF0), { kX86Mova128MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x66, 0, 0x0F, 0x6F, 0, 0, 0, 0, false }, "Mova128MR", "[!0r+!1d],!2r" }, diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 123fe90d03..55e5993dce 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -517,6 +517,7 @@ class X86Mir2Lir : public Mir2Lir { * @returns true if a register is byte addressable. */ bool IsByteRegister(RegStorage reg); + bool GenInlinedArrayCopyCharArray(CallInfo* info) OVERRIDE; /* * @brief generate inline code for fast case of Strng.indexOf. diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 5372512589..cf29e52bb8 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -2321,7 +2321,7 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, // For 32-bit, SETcc only works with EAX..EDX. RegStorage object_32reg = object.reg.Is64Bit() ? As32BitReg(object.reg) : object.reg; - if (result_reg == object_32reg || !IsByteRegister(result_reg)) { + if (result_reg.GetRegNum() == object_32reg.GetRegNum() || !IsByteRegister(result_reg)) { result_reg = AllocateByteRegister(); } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 72e47d06b1..43882c2e02 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -1023,6 +1023,123 @@ void X86Mir2Lir::InstallLiteralPools() { Mir2Lir::InstallLiteralPools(); } +bool X86Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) { + if (cu_->target64) { + // TODO: Implement ArrayCOpy intrinsic for x86_64 + return false; + } + + RegLocation rl_src = info->args[0]; + RegLocation rl_srcPos = info->args[1]; + RegLocation rl_dst = info->args[2]; + RegLocation rl_dstPos = info->args[3]; + RegLocation rl_length = info->args[4]; + if (rl_srcPos.is_const && (mir_graph_->ConstantValue(rl_srcPos) < 0)) { + return false; + } + if (rl_dstPos.is_const && (mir_graph_->ConstantValue(rl_dstPos) < 0)) { + return false; + } + ClobberCallerSave(); + LockCallTemps(); // Using fixed registers + LoadValueDirectFixed(rl_src , rs_rAX); + LoadValueDirectFixed(rl_dst , rs_rCX); + LIR* src_dst_same = OpCmpBranch(kCondEq, rs_rAX , rs_rCX, nullptr); + LIR* src_null_branch = OpCmpImmBranch(kCondEq, rs_rAX , 0, nullptr); + LIR* dst_null_branch = OpCmpImmBranch(kCondEq, rs_rCX , 0, nullptr); + LoadValueDirectFixed(rl_length , rs_rDX); + LIR* len_negative = OpCmpImmBranch(kCondLt, rs_rDX , 0, nullptr); + LIR* len_too_big = OpCmpImmBranch(kCondGt, rs_rDX , 128, nullptr); + LoadValueDirectFixed(rl_src , rs_rAX); + LoadWordDisp(rs_rAX , mirror::Array::LengthOffset().Int32Value(), rs_rAX); + LIR* src_bad_len = nullptr; + LIR* srcPos_negative = nullptr; + if (!rl_srcPos.is_const) { + LoadValueDirectFixed(rl_srcPos , rs_rBX); + srcPos_negative = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr); + OpRegReg(kOpAdd, rs_rBX, rs_rDX); + src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + } else { + int pos_val = mir_graph_->ConstantValue(rl_srcPos.orig_sreg); + if (pos_val == 0) { + src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr); + } else { + OpRegRegImm(kOpAdd, rs_rBX, rs_rDX, pos_val); + src_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + } + } + LIR* dstPos_negative = nullptr; + LIR* dst_bad_len = nullptr; + LoadValueDirectFixed(rl_dst, rs_rAX); + LoadWordDisp(rs_rAX, mirror::Array::LengthOffset().Int32Value(), rs_rAX); + if (!rl_dstPos.is_const) { + LoadValueDirectFixed(rl_dstPos , rs_rBX); + dstPos_negative = OpCmpImmBranch(kCondLt, rs_rBX , 0, nullptr); + OpRegRegReg(kOpAdd, rs_rBX, rs_rBX, rs_rDX); + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + } else { + int pos_val = mir_graph_->ConstantValue(rl_dstPos.orig_sreg); + if (pos_val == 0) { + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rDX, nullptr); + } else { + OpRegRegImm(kOpAdd, rs_rBX, rs_rDX, pos_val); + dst_bad_len = OpCmpBranch(kCondLt, rs_rAX , rs_rBX, nullptr); + } + } + // everything is checked now + LoadValueDirectFixed(rl_src , rs_rAX); + LoadValueDirectFixed(rl_dst , rs_rBX); + LoadValueDirectFixed(rl_srcPos , rs_rCX); + NewLIR5(kX86Lea32RA, rs_rAX.GetReg(), rs_rAX.GetReg(), + rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value()); + // RAX now holds the address of the first src element to be copied + + LoadValueDirectFixed(rl_dstPos , rs_rCX); + NewLIR5(kX86Lea32RA, rs_rBX.GetReg(), rs_rBX.GetReg(), + rs_rCX.GetReg() , 1, mirror::Array::DataOffset(2).Int32Value() ); + // RBX now holds the address of the first dst element to be copied + + // check if the number of elements to be copied is odd or even. If odd + // then copy the first element (so that the remaining number of elements + // is even). + LoadValueDirectFixed(rl_length , rs_rCX); + OpRegImm(kOpAnd, rs_rCX, 1); + LIR* jmp_to_begin_loop = OpCmpImmBranch(kCondEq, rs_rCX, 0, nullptr); + OpRegImm(kOpSub, rs_rDX, 1); + LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSignedHalf); + StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSignedHalf); + + // since the remaining number of elements is even, we will copy by + // two elements at a time. + LIR *beginLoop = NewLIR0(kPseudoTargetLabel); + LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_rDX , 0, nullptr); + OpRegImm(kOpSub, rs_rDX, 2); + LoadBaseIndexedDisp(rs_rAX, rs_rDX, 1, 0, rs_rCX, kSingle); + StoreBaseIndexedDisp(rs_rBX, rs_rDX, 1, 0, rs_rCX, kSingle); + OpUnconditionalBranch(beginLoop); + LIR *check_failed = NewLIR0(kPseudoTargetLabel); + LIR* launchpad_branch = OpUnconditionalBranch(nullptr); + LIR *return_point = NewLIR0(kPseudoTargetLabel); + jmp_to_ret->target = return_point; + jmp_to_begin_loop->target = beginLoop; + src_dst_same->target = check_failed; + len_negative->target = check_failed; + len_too_big->target = check_failed; + src_null_branch->target = check_failed; + if (srcPos_negative != nullptr) + srcPos_negative ->target = check_failed; + if (src_bad_len != nullptr) + src_bad_len->target = check_failed; + dst_null_branch->target = check_failed; + if (dstPos_negative != nullptr) + dstPos_negative->target = check_failed; + if (dst_bad_len != nullptr) + dst_bad_len->target = check_failed; + AddIntrinsicSlowPath(info, launchpad_branch, return_point); + return true; +} + + /* * Fast string.index_of(I) & (II). Inline check for simple case of char <= 0xffff, * otherwise bails to standard library code. diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index ff243ce149..e271e9d100 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -564,11 +564,15 @@ enum X86OpCode { Binary0fOpCode(kX86Pxor), // parallel XOR 128 bits x 1 Binary0fOpCode(kX86Phaddw), // parallel horizontal addition 16 bits x 8 Binary0fOpCode(kX86Phaddd), // parallel horizontal addition 32 bits x 4 + Binary0fOpCode(kX86Haddpd), // parallel FP horizontal addition 64 bits x 2 + Binary0fOpCode(kX86Haddps), // parallel FP horizontal addition 32 bits x 4 kX86PextrbRRI, // Extract 8 bits from XMM into GPR kX86PextrwRRI, // Extract 16 bits from XMM into GPR kX86PextrdRRI, // Extract 32 bits from XMM into GPR kX86PshuflwRRI, // Shuffle 16 bits in lower 64 bits of XMM. kX86PshufdRRI, // Shuffle 32 bits in XMM. + kX86ShufpsRRI, // FP Shuffle 32 bits in XMM. + kX86ShufpdRRI, // FP Shuffle 64 bits in XMM. kX86PsrawRI, // signed right shift of floating point registers 16 bits x 8 kX86PsradRI, // signed right shift of floating point registers 32 bits x 4 kX86PsrlwRI, // logical right shift of floating point registers 16 bits x 8 diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 96625c5dac..770ae89ca2 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1461,6 +1461,18 @@ static bool SkipClass(jobject class_loader, const DexFile& dex_file, mirror::Cla return false; } +static void CheckAndClearResolveException(Thread* self) + SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) { + CHECK(self->IsExceptionPending()); + mirror::Throwable* exception = self->GetException(nullptr); + std::string descriptor = exception->GetClass()->GetDescriptor(); + if (descriptor != "Ljava/lang/IncompatibleClassChangeError;" && + descriptor != "Ljava/lang/NoClassDefFoundError;") { + LOG(FATAL) << "Unexpected exeption " << exception->Dump(); + } + self->ClearException(); +} + static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manager, size_t class_def_index) LOCKS_EXCLUDED(Locks::mutator_lock_) { @@ -1496,8 +1508,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag if (klass == NULL) { // Class couldn't be resolved, for example, super-class is in a different dex file. Don't // attempt to resolve methods and fields when there is no declaring class. - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); + CheckAndClearResolveException(soa.Self()); resolve_fields_and_methods = false; } else { resolve_fields_and_methods = manager->GetCompiler()->IsImage(); @@ -1516,8 +1527,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), dex_cache, class_loader, true); if (field == NULL) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); + CheckAndClearResolveException(soa.Self()); } } it.Next(); @@ -1532,8 +1542,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag mirror::ArtField* field = class_linker->ResolveField(dex_file, it.GetMemberIndex(), dex_cache, class_loader, false); if (field == NULL) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); + CheckAndClearResolveException(soa.Self()); } } it.Next(); @@ -1545,8 +1554,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag NullHandle<mirror::ArtMethod>(), it.GetMethodInvokeType(class_def)); if (method == NULL) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); + CheckAndClearResolveException(soa.Self()); } it.Next(); } @@ -1556,8 +1564,7 @@ static void ResolveClassFieldsAndMethods(const ParallelCompilationManager* manag NullHandle<mirror::ArtMethod>(), it.GetMethodInvokeType(class_def)); if (method == NULL) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); + CheckAndClearResolveException(soa.Self()); } it.Next(); } diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index fb3341ba71..92b2feeb7f 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -43,6 +43,7 @@ class CompilerOptions { static const size_t kDefaultNumDexMethodsThreshold = 900; static constexpr double kDefaultTopKProfileThreshold = 90.0; static const bool kDefaultIncludeDebugSymbols = kIsDebugBuild; + static const bool kDefaultIncludePatchInformation = false; CompilerOptions() : compiler_filter_(kDefaultCompilerFilter), @@ -52,6 +53,7 @@ class CompilerOptions { tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), generate_gdb_information_(false), + include_patch_information_(kDefaultIncludePatchInformation), top_k_profile_threshold_(kDefaultTopKProfileThreshold), include_debug_symbols_(kDefaultIncludeDebugSymbols), explicit_null_checks_(true), @@ -69,6 +71,7 @@ class CompilerOptions { size_t tiny_method_threshold, size_t num_dex_methods_threshold, bool generate_gdb_information, + bool include_patch_information, double top_k_profile_threshold, bool include_debug_symbols, bool explicit_null_checks, @@ -85,6 +88,7 @@ class CompilerOptions { tiny_method_threshold_(tiny_method_threshold), num_dex_methods_threshold_(num_dex_methods_threshold), generate_gdb_information_(generate_gdb_information), + include_patch_information_(include_patch_information), top_k_profile_threshold_(top_k_profile_threshold), include_debug_symbols_(include_debug_symbols), explicit_null_checks_(explicit_null_checks), @@ -188,6 +192,10 @@ class CompilerOptions { return generate_gdb_information_; } + bool GetIncludePatchInformation() const { + return include_patch_information_; + } + private: CompilerFilter compiler_filter_; size_t huge_method_threshold_; @@ -196,6 +204,7 @@ class CompilerOptions { size_t tiny_method_threshold_; size_t num_dex_methods_threshold_; bool generate_gdb_information_; + bool include_patch_information_; // When using a profile file only the top K% of the profiled samples will be compiled. double top_k_profile_threshold_; bool include_debug_symbols_; diff --git a/compiler/elf_fixup.cc b/compiler/elf_fixup.cc index 404e3f8692..60f76efed0 100644 --- a/compiler/elf_fixup.cc +++ b/compiler/elf_fixup.cc @@ -69,97 +69,7 @@ bool ElfFixup::FixupDynamic(ElfFile& elf_file, uintptr_t base_address) { for (Elf32_Word i = 0; i < elf_file.GetDynamicNum(); i++) { Elf32_Dyn& elf_dyn = elf_file.GetDynamic(i); Elf32_Word d_tag = elf_dyn.d_tag; - bool elf_dyn_needs_fixup = false; - switch (d_tag) { - // case 1: well known d_tag values that imply Elf32_Dyn.d_un contains an address in d_ptr - case DT_PLTGOT: - case DT_HASH: - case DT_STRTAB: - case DT_SYMTAB: - case DT_RELA: - case DT_INIT: - case DT_FINI: - case DT_REL: - case DT_DEBUG: - case DT_JMPREL: { - elf_dyn_needs_fixup = true; - break; - } - // d_val or ignored values - case DT_NULL: - case DT_NEEDED: - case DT_PLTRELSZ: - case DT_RELASZ: - case DT_RELAENT: - case DT_STRSZ: - case DT_SYMENT: - case DT_SONAME: - case DT_RPATH: - case DT_SYMBOLIC: - case DT_RELSZ: - case DT_RELENT: - case DT_PLTREL: - case DT_TEXTREL: - case DT_BIND_NOW: - case DT_INIT_ARRAYSZ: - case DT_FINI_ARRAYSZ: - case DT_RUNPATH: - case DT_FLAGS: { - break; - } - // boundary values that should not be used - case DT_ENCODING: - case DT_LOOS: - case DT_HIOS: - case DT_LOPROC: - case DT_HIPROC: { - LOG(FATAL) << "Illegal d_tag value 0x" << std::hex << d_tag; - break; - } - default: { - // case 2: "regular" DT_* ranges where even d_tag values imply an address in d_ptr - if ((DT_ENCODING < d_tag && d_tag < DT_LOOS) - || (DT_LOOS < d_tag && d_tag < DT_HIOS) - || (DT_LOPROC < d_tag && d_tag < DT_HIPROC)) { - // Special case for MIPS which breaks the regular rules between DT_LOPROC and DT_HIPROC - if (elf_file.GetHeader().e_machine == EM_MIPS) { - switch (d_tag) { - case DT_MIPS_RLD_VERSION: - case DT_MIPS_TIME_STAMP: - case DT_MIPS_ICHECKSUM: - case DT_MIPS_IVERSION: - case DT_MIPS_FLAGS: - case DT_MIPS_LOCAL_GOTNO: - case DT_MIPS_CONFLICTNO: - case DT_MIPS_LIBLISTNO: - case DT_MIPS_SYMTABNO: - case DT_MIPS_UNREFEXTNO: - case DT_MIPS_GOTSYM: - case DT_MIPS_HIPAGENO: { - break; - } - case DT_MIPS_BASE_ADDRESS: - case DT_MIPS_CONFLICT: - case DT_MIPS_LIBLIST: - case DT_MIPS_RLD_MAP: { - elf_dyn_needs_fixup = true; - break; - } - default: { - LOG(FATAL) << "Unknown MIPS d_tag value 0x" << std::hex << d_tag; - break; - } - } - } else if ((elf_dyn.d_tag % 2) == 0) { - elf_dyn_needs_fixup = true; - } - } else { - LOG(FATAL) << "Unknown d_tag value 0x" << std::hex << d_tag; - } - break; - } - } - if (elf_dyn_needs_fixup) { + if (IsDynamicSectionPointer(d_tag, elf_file.GetHeader().e_machine)) { uint32_t d_ptr = elf_dyn.d_un.d_ptr; if (DEBUG_FIXUP) { LOG(INFO) << StringPrintf("In %s moving Elf32_Dyn[%d] from 0x%08x to 0x%08" PRIxPTR, diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index e4dcaa7426..42743862fe 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -803,6 +803,25 @@ bool ElfWriterQuick::Create(File* elf_file, return elf_writer.Write(oat_writer, dex_files, android_root, is_host); } +// Add patch information to this section. Each patch is a Elf32_Word that +// identifies an offset from the start of the text section +void ElfWriterQuick::ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug) { + size_t size = + compiler_driver_->GetCodeToPatch().size() + + compiler_driver_->GetMethodsToPatch().size() + + compiler_driver_->GetClassesToPatch().size(); + if (size == 0) { + if (debug) { + LOG(INFO) << "No patches to record"; + } + return; + } + buffer->resize(size * sizeof(uintptr_t)); + if (debug) { + LOG(INFO) << "Patches reserved for " << size; + } +} + bool ElfWriterQuick::Write(OatWriter* oat_writer, const std::vector<const DexFile*>& dex_files_unused, const std::string& android_root_unused, @@ -836,6 +855,13 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer, builder.RegisterRawSection(debug_str); } + if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) { + ElfRawSectionBuilder oat_patches(".oat_patches", SHT_OAT_PATCH, 0, NULL, 0, + sizeof(size_t), sizeof(size_t)); + ReservePatchSpace(oat_patches.GetBuffer(), debug); + builder.RegisterRawSection(oat_patches); + } + return builder.Write(); } diff --git a/compiler/elf_writer_quick.h b/compiler/elf_writer_quick.h index 6eb5d68817..a0d36df471 100644 --- a/compiler/elf_writer_quick.h +++ b/compiler/elf_writer_quick.h @@ -51,6 +51,7 @@ class ElfWriterQuick FINAL : public ElfWriter { void AddDebugSymbols(ElfBuilder& builder, OatWriter* oat_writer, bool debug); + void ReservePatchSpace(std::vector<uint8_t>* buffer, bool debug); class ElfSectionBuilder { public: diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 6e5f19a8c5..acfa607f39 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -27,6 +27,8 @@ #include "compiled_method.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" +#include "elf_file.h" +#include "elf_utils.h" #include "elf_writer.h" #include "gc/accounting/card_table-inl.h" #include "gc/accounting/heap_bitmap.h" @@ -138,7 +140,8 @@ bool ImageWriter::Write(const std::string& image_filename, ElfWriter::GetOatElfInformation(oat_file.get(), oat_loaded_size, oat_data_offset); CalculateNewObjectOffsets(oat_loaded_size, oat_data_offset); CopyAndFixupObjects(); - PatchOatCodeAndMethods(); + + PatchOatCodeAndMethods(oat_file.get()); Thread::Current()->TransitionFromRunnableToSuspended(kNative); std::unique_ptr<File> image_file(OS::CreateEmptyFile(image_filename.c_str())); @@ -782,7 +785,25 @@ static Class* GetTargetType(const CompilerDriver::TypePatchInformation* patch) return klass; } -void ImageWriter::PatchOatCodeAndMethods() { +void ImageWriter::PatchOatCodeAndMethods(File* elf_file) { + std::vector<uintptr_t> patches; + std::set<uintptr_t> patches_set; + auto maybe_push = [&patches, &patches_set] (uintptr_t p) { + if (patches_set.find(p) == patches_set.end()) { + patches.push_back(p); + patches_set.insert(p); + } + }; + const bool add_patches = compiler_driver_.GetCompilerOptions().GetIncludePatchInformation(); + if (add_patches) { + // TODO if we are adding patches the resulting ELF file might have a potentially rather large + // amount of free space where patches might have been placed. We should adjust the ELF file to + // get rid of this excess space. + patches.reserve(compiler_driver_.GetCodeToPatch().size() + + compiler_driver_.GetMethodsToPatch().size() + + compiler_driver_.GetClassesToPatch().size()); + } + uintptr_t loc = 0; Thread* self = Thread::Current(); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); const char* old_cause = self->StartAssertNoThreadSuspension("ImageWriter"); @@ -828,14 +849,20 @@ void ImageWriter::PatchOatCodeAndMethods() { } else { value = PointerToLowMemUInt32(GetOatAddress(code_offset)); } - SetPatchLocation(patch, value); + SetPatchLocation(patch, value, &loc); + if (add_patches && !patch->AsCall()->IsRelative()) { + maybe_push(loc); + } } const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch(); for (size_t i = 0; i < methods_to_patch.size(); i++) { const CompilerDriver::CallPatchInformation* patch = methods_to_patch[i]; ArtMethod* target = GetTargetMethod(patch); - SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target))); + SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)), &loc); + if (add_patches && !patch->AsCall()->IsRelative()) { + maybe_push(loc); + } } const std::vector<const CompilerDriver::TypePatchInformation*>& classes_to_patch = @@ -843,16 +870,50 @@ void ImageWriter::PatchOatCodeAndMethods() { for (size_t i = 0; i < classes_to_patch.size(); i++) { const CompilerDriver::TypePatchInformation* patch = classes_to_patch[i]; Class* target = GetTargetType(patch); - SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target))); + SetPatchLocation(patch, PointerToLowMemUInt32(GetImageAddress(target)), &loc); + if (add_patches) { + maybe_push(loc); + } } // Update the image header with the new checksum after patching ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); image_header->SetOatChecksum(oat_file_->GetOatHeader().GetChecksum()); self->EndAssertNoThreadSuspension(old_cause); + + // Update the ElfFiles SHT_OAT_PATCH section to include the patches. + if (add_patches) { + std::string err; + // TODO we are mapping in the contents of this file twice. We should be able + // to do it only once, which would be better. + std::unique_ptr<ElfFile> file(ElfFile::Open(elf_file, true, false, &err)); + if (file == nullptr) { + LOG(ERROR) << err; + } + Elf32_Shdr* shdr = file->FindSectionByName(".oat_patches"); + if (shdr != nullptr) { + CHECK_EQ(shdr, file->FindSectionByType(SHT_OAT_PATCH)) + << "Incorrect type for .oat_patches section"; + CHECK_LE(patches.size() * sizeof(uintptr_t), shdr->sh_size) + << "We got more patches than anticipated"; + CHECK_LE(reinterpret_cast<uintptr_t>(file->Begin()) + shdr->sh_offset + shdr->sh_size, + reinterpret_cast<uintptr_t>(file->End())) << "section is too large"; + CHECK(shdr == &file->GetSectionHeader(file->GetSectionHeaderNum() - 1) || + shdr->sh_offset + shdr->sh_size <= (shdr + 1)->sh_offset) + << "Section overlaps onto next section"; + // It's mmap'd so we can just memcpy. + memcpy(file->Begin() + shdr->sh_offset, patches.data(), patches.size()*sizeof(uintptr_t)); + // TODO We should fill in the newly empty space between the last patch and the start of the + // next section by moving the following sections down if possible. + shdr->sh_size = patches.size() * sizeof(uintptr_t); + } else { + LOG(ERROR) << "Unable to find section header for SHT_OAT_PATCH"; + } + } } -void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) { +void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value, + uintptr_t* patched_ptr) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(), patch->GetReferrerClassDefIdx(), @@ -885,6 +946,14 @@ void ImageWriter::SetPatchLocation(const CompilerDriver::PatchInformation* patch } *patch_location = value; oat_header.UpdateChecksum(patch_location, sizeof(value)); + + uintptr_t loc = reinterpret_cast<uintptr_t>(patch_location) - + (reinterpret_cast<uintptr_t>(oat_file_->Begin()) + oat_header.GetExecutableOffset()); + CHECK_GT(reinterpret_cast<uintptr_t>(patch_location), + reinterpret_cast<uintptr_t>(oat_file_->Begin()) + oat_header.GetExecutableOffset()); + CHECK_LT(loc, oat_file_->Size() - oat_header.GetExecutableOffset()); + + *patched_ptr = loc; } } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index aff155affc..2bcb41e3fe 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -150,9 +150,10 @@ class ImageWriter { SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); // Patches references in OatFile to expect runtime addresses. - void PatchOatCodeAndMethods() + void PatchOatCodeAndMethods(File* elf_file) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); - void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value) + void SetPatchLocation(const CompilerDriver::PatchInformation* patch, uint32_t value, + uintptr_t* patched_location) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_); const CompilerDriver& compiler_driver_; diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 25b489ba79..b4d863bf32 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -61,8 +61,8 @@ class JniCompilerTest : public CommonCompilerTest { method = c->FindVirtualMethod(method_name, method_sig); } ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig; - if (method->GetEntryPointFromQuickCompiledCode() == nullptr) { - ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr); + if (method->GetEntryPointFromQuickCompiledCode() == nullptr || + method->GetEntryPointFromQuickCompiledCode() == class_linker_->GetQuickGenericJniTrampoline()) { CompileMethod(method); ASSERT_TRUE(method->GetEntryPointFromQuickCompiledCode() != nullptr) << method_name << " " << method_sig; |