diff options
-rw-r--r-- | compiler/dex/quick/arm/arm_dex_file_method_inliner.cc | 4 | ||||
-rw-r--r-- | compiler/dex/quick/arm/arm_lir.h | 10 | ||||
-rw-r--r-- | compiler/dex/quick/arm/assemble_arm.cc | 13 | ||||
-rw-r--r-- | compiler/dex/quick/arm/int_arm.cc | 135 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 2 | ||||
-rw-r--r-- | disassembler/disassembler_arm.cc | 47 |
6 files changed, 166 insertions, 45 deletions
diff --git a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc index 257b2c4e94..59f7202f33 100644 --- a/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc +++ b/compiler/dex/quick/arm/arm_dex_file_method_inliner.cc @@ -66,8 +66,8 @@ const DexFileMethodInliner::IntrinsicDef ArmDexFileMethodInliner::kIntrinsicMeth INTRINSIC(SunMiscUnsafe, CompareAndSwapInt, ObjectJII_Z, kIntrinsicCas, kIntrinsicFlagNone), - // INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas, - // kIntrinsicFlagIsLong), + INTRINSIC(SunMiscUnsafe, CompareAndSwapLong, ObjectJJJ_Z, kIntrinsicCas, + kIntrinsicFlagIsLong), INTRINSIC(SunMiscUnsafe, CompareAndSwapObject, ObjectJObjectObject_Z, kIntrinsicCas, kIntrinsicFlagIsObject), diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h index 8cd7c9440f..395c78828e 100644 --- a/compiler/dex/quick/arm/arm_lir.h +++ b/compiler/dex/quick/arm/arm_lir.h @@ -426,9 +426,11 @@ enum ArmOpcode { kThumb2Vmovd_IMM8, // vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12] [10110000] imm4l[3-0]. kThumb2Mla, // mla [111110110000] rn[19-16] ra[15-12] rd[7-4] [0000] rm[3-0]. kThumb2Umull, // umull [111110111010] rn[19-16], rdlo[15-12] rdhi[11-8] [0000] rm[3-0]. - kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[11-8] [1111] imm8[7-0]. - kThumb2Strex, // strex [111010000100] rn[19-16] rt[11-8] rd[11-8] imm8[7-0]. - kThumb2Clrex, // clrex [111100111011111110000111100101111]. + kThumb2Ldrex, // ldrex [111010000101] rn[19-16] rt[15-12] [1111] imm8[7-0]. + kThumb2Ldrexd, // ldrexd [111010001101] rn[19-16] rt[15-12] rt2[11-8] [11111111]. + kThumb2Strex, // strex [111010000100] rn[19-16] rt[15-12] rd[11-8] imm8[7-0]. + kThumb2Strexd, // strexd [111010001100] rn[19-16] rt[15-12] rt2[11-8] [0111] Rd[3-0]. + kThumb2Clrex, // clrex [11110011101111111000111100101111]. kThumb2Bfi, // bfi [111100110110] rn[19-16] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. kThumb2Bfc, // bfc [11110011011011110] [0] imm3[14-12] rd[11-8] imm2[7-6] [0] msb[4-0]. kThumb2Dmb, // dmb [1111001110111111100011110101] option[3-0]. @@ -447,7 +449,7 @@ enum ArmOpcode { kThumb2MovImm16HST, // Special purpose version for switch table use. kThumb2LdmiaWB, // ldmia [111010011001[ rn[19..16] mask[15..0]. kThumb2SubsRRI12, // setflags encoding. - kThumb2OrrRRRs, // orrx [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. + kThumb2OrrRRRs, // orrs [111010100101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. kThumb2Push1, // t3 encoding of push. kThumb2Pop1, // t3 encoding of pop. kThumb2RsubRRR, // rsb [111010111101] rn[19..16] [0000] rd[11..8] [0000] rm[3..0]. diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc index 1c81a5aa0a..b3236ae23e 100644 --- a/compiler/dex/quick/arm/assemble_arm.cc +++ b/compiler/dex/quick/arm/assemble_arm.cc @@ -877,8 +877,7 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { "vmov.f64", "!0S, #0x!1h", 4, kFixupNone), ENCODING_MAP(kThumb2Mla, 0xfb000000, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, - kFmtBitBlt, 15, 12, - IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3, + kFmtBitBlt, 15, 12, IS_QUAD_OP | REG_DEF0_USE123, "mla", "!0C, !1C, !2C, !3C", 4, kFixupNone), ENCODING_MAP(kThumb2Umull, 0xfba00000, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, @@ -889,10 +888,18 @@ const ArmEncodingMap ArmMir2Lir::EncodingMap[kArmLast] = { kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD, "ldrex", "!0C, [!1C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Ldrexd, 0xe8d0007f, + kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, + kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2 | IS_LOAD, + "ldrexd", "!0C, !1C, [!2C]", 4, kFixupNone), ENCODING_MAP(kThumb2Strex, 0xe8400000, kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE, - "strex", "!0C,!1C, [!2C, #!2E]", 4, kFixupNone), + "strex", "!0C, !1C, [!2C, #!2E]", 4, kFixupNone), + ENCODING_MAP(kThumb2Strexd, 0xe8c00070, + kFmtBitBlt, 3, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, + kFmtBitBlt, 19, 16, IS_QUAD_OP | REG_DEF0_USE123 | IS_STORE, + "strexd", "!0C, !1C, !2C, [!3C]", 4, kFixupNone), ENCODING_MAP(kThumb2Clrex, 0xf3bf8f2f, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1, NO_OPERAND, diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 97271794f7..e839fe5c5d 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -561,22 +561,67 @@ void ArmMir2Lir::OpTlsCmp(ThreadOffset offset, int val) { } bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { - DCHECK(!is_long); // not supported yet DCHECK_EQ(cu_->instruction_set, kThumb2); // Unused - RegLocation rl_src_unsafe = info->args[0]; RegLocation rl_src_obj = info->args[1]; // Object - known non-null RegLocation rl_src_offset = info->args[2]; // long low rl_src_offset.wide = 0; // ignore high half in info->args[3] RegLocation rl_src_expected = info->args[4]; // int, long or Object - RegLocation rl_src_new_value = info->args[5]; // int, long or Object + // If is_long, high half is in info->args[5] + RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object + // If is_long, high half is in info->args[7] RegLocation rl_dest = InlineTarget(info); // boolean place for result + // We have only 5 temporary registers available and actually only 4 if the InlineTarget + // above locked one of the temps. For a straightforward CAS64 we need 7 registers: + // r_ptr (1), new_value (2), expected(2) and ldrexd result (2). If neither expected nor + // new_value is in a non-temp core register we shall reload them in the ldrex/strex loop + // into the same temps, reducing the number of required temps down to 5. We shall work + // around the potentially locked temp by using LR for r_ptr, unconditionally. + // TODO: Pass information about the need for more temps to the stack frame generation + // code so that we can rely on being able to allocate enough temps. + DCHECK(!reg_pool_->core_regs[rARM_LR].is_temp); + MarkTemp(rARM_LR); + FreeTemp(rARM_LR); + LockTemp(rARM_LR); + bool load_early = true; + if (is_long) { + bool expected_is_core_reg = + rl_src_expected.location == kLocPhysReg && !IsFpReg(rl_src_expected.low_reg); + bool new_value_is_core_reg = + rl_src_new_value.location == kLocPhysReg && !IsFpReg(rl_src_new_value.low_reg); + bool expected_is_good_reg = expected_is_core_reg && !IsTemp(rl_src_expected.low_reg); + bool new_value_is_good_reg = new_value_is_core_reg && !IsTemp(rl_src_new_value.low_reg); + + if (!expected_is_good_reg && !new_value_is_good_reg) { + // None of expected/new_value is non-temp reg, need to load both late + load_early = false; + // Make sure they are not in the temp regs and the load will not be skipped. + if (expected_is_core_reg) { + FlushRegWide(rl_src_expected.low_reg, rl_src_expected.high_reg); + ClobberSReg(rl_src_expected.s_reg_low); + ClobberSReg(GetSRegHi(rl_src_expected.s_reg_low)); + rl_src_expected.location = kLocDalvikFrame; + } + if (new_value_is_core_reg) { + FlushRegWide(rl_src_new_value.low_reg, rl_src_new_value.high_reg); + ClobberSReg(rl_src_new_value.s_reg_low); + ClobberSReg(GetSRegHi(rl_src_new_value.s_reg_low)); + rl_src_new_value.location = kLocDalvikFrame; + } + } + } // Release store semantics, get the barrier out of the way. TODO: revisit GenMemBarrier(kStoreLoad); RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); - RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + RegLocation rl_new_value; + if (!is_long) { + rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + } else if (load_early) { + rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); + } if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { // Mark card for object assuming new value is stored. @@ -585,7 +630,7 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); - int r_ptr = AllocTemp(); + int r_ptr = rARM_LR; OpRegRegReg(kOpAdd, r_ptr, rl_object.low_reg, rl_offset.low_reg); // Free now unneeded rl_object and rl_offset to give more temps. @@ -594,29 +639,77 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { ClobberSReg(rl_offset.s_reg_low); FreeTemp(rl_offset.low_reg); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - LoadConstant(rl_result.low_reg, 0); // r_result := 0 + RegLocation rl_expected; + if (!is_long) { + rl_expected = LoadValue(rl_src_expected, kCoreReg); + } else if (load_early) { + rl_expected = LoadValueWide(rl_src_expected, kCoreReg); + } else { + rl_new_value.low_reg = rl_expected.low_reg = AllocTemp(); + rl_new_value.high_reg = rl_expected.high_reg = AllocTemp(); + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + // result = tmp != 0; - // while ([r_ptr] == rExpected && r_result == 0) { - // [r_ptr] <- r_new_value && r_result := success ? 0 : 1 - // r_result ^= 1 - // } - int r_old_value = AllocTemp(); + int r_tmp = AllocTemp(); LIR* target = NewLIR0(kPseudoTargetLabel); - NewLIR3(kThumb2Ldrex, r_old_value, r_ptr, 0); - - RegLocation rl_expected = LoadValue(rl_src_expected, kCoreReg); - OpRegReg(kOpCmp, r_old_value, rl_expected.low_reg); - FreeTemp(r_old_value); // Now unneeded. - OpIT(kCondEq, "TT"); - NewLIR4(kThumb2Strex /* eq */, rl_result.low_reg, rl_new_value.low_reg, r_ptr, 0); - FreeTemp(r_ptr); // Now unneeded. - OpRegImm(kOpXor /* eq */, rl_result.low_reg, 1); - OpRegImm(kOpCmp /* eq */, rl_result.low_reg, 0); + + if (is_long) { + int r_tmp_high = AllocTemp(); + if (!load_early) { + LoadValueDirectWide(rl_src_expected, rl_expected.low_reg, rl_expected.high_reg); + } + NewLIR3(kThumb2Ldrexd, r_tmp, r_tmp_high, r_ptr); + OpRegReg(kOpSub, r_tmp, rl_expected.low_reg); + OpRegReg(kOpSub, r_tmp_high, rl_expected.high_reg); + if (!load_early) { + LoadValueDirectWide(rl_src_new_value, rl_new_value.low_reg, rl_new_value.high_reg); + } + // Make sure we use ORR that sets the ccode + if (ARM_LOWREG(r_tmp) && ARM_LOWREG(r_tmp_high)) { + NewLIR2(kThumbOrr, r_tmp, r_tmp_high); + } else { + NewLIR4(kThumb2OrrRRRs, r_tmp, r_tmp, r_tmp_high, 0); + } + FreeTemp(r_tmp_high); // Now unneeded + + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + OpIT(kCondEq, "T"); + NewLIR4(kThumb2Strexd /* eq */, r_tmp, rl_new_value.low_reg, rl_new_value.high_reg, r_ptr); + + } else { + NewLIR3(kThumb2Ldrex, r_tmp, r_ptr, 0); + OpRegReg(kOpSub, r_tmp, rl_expected.low_reg); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + OpIT(kCondEq, "T"); + NewLIR4(kThumb2Strex /* eq */, r_tmp, rl_new_value.low_reg, r_ptr, 0); + } + + // Still one conditional left from OpIT(kCondEq, "T") from either branch + OpRegImm(kOpCmp /* eq */, r_tmp, 1); OpCondBranch(kCondEq, target); + if (!load_early) { + FreeTemp(rl_expected.low_reg); // Now unneeded. + FreeTemp(rl_expected.high_reg); // Now unneeded. + } + + // result := (tmp1 != 0) ? 0 : 1; + RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + OpRegRegImm(kOpRsub, rl_result.low_reg, r_tmp, 1); + DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); + OpIT(kCondCc, ""); + LoadConstant(rl_result.low_reg, 0); /* cc */ + FreeTemp(r_tmp); // Now unneeded. + StoreValue(rl_dest, rl_result); + // Now, restore lr to its non-temp status. + Clobber(rARM_LR); + UnmarkTemp(rARM_LR); return true; } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index ad9b0de3d5..f8a2d03ec8 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -87,6 +87,7 @@ typedef uint32_t CodeOffset; // Native code offset in bytes. #define REG_DEF0_USE01 (REG_DEF0 | REG_USE01) #define REG_DEF0_USE0 (REG_DEF0 | REG_USE0) #define REG_DEF0_USE12 (REG_DEF0 | REG_USE12) +#define REG_DEF0_USE123 (REG_DEF0 | REG_USE123) #define REG_DEF0_USE1 (REG_DEF0 | REG_USE1) #define REG_DEF0_USE2 (REG_DEF0 | REG_USE2) #define REG_DEFAD_USEAD (REG_DEFAD_USEA | REG_USED) @@ -98,6 +99,7 @@ typedef uint32_t CodeOffset; // Native code offset in bytes. #define REG_USE02 (REG_USE0 | REG_USE2) #define REG_USE12 (REG_USE1 | REG_USE2) #define REG_USE23 (REG_USE2 | REG_USE3) +#define REG_USE123 (REG_USE1 | REG_USE2 | REG_USE3) struct BasicBlock; struct CallInfo; diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 936fb07728..90d84d5a72 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -440,18 +440,34 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) if (op3 == 0) { // op3 is 00, op4 is 00 opcode << "strex"; args << Rd << ", " << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]"; + if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || + Rd.r == Rn.r || Rd.r == Rt.r) { + args << " (UNPREDICTABLE)"; + } } else { // op3 is 01, op4 is 00 // this is one of strexb, strexh or strexd int op5 = (instr >> 4) & 0xf; switch (op5) { case 4: - opcode << "strexb"; - break; case 5: - opcode << "strexh"; + opcode << ((op5 == 4) ? "strexb" : "strexh"); + Rd = ArmRegister(instr, 0); + args << Rd << ", " << Rt << ", [" << Rn << "]"; + if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || + Rd.r == Rn.r || Rd.r == Rt.r || (instr & 0xf00) != 0xf00) { + args << " (UNPREDICTABLE)"; + } break; case 7: opcode << "strexd"; + ArmRegister Rt2 = Rd; + Rd = ArmRegister(instr, 0); + args << Rd << ", " << Rt << ", " << Rt2 << ", [" << Rn << "]"; + if (Rd.r == 13 || Rd.r == 15 || Rt.r == 13 || Rt.r == 15 || + Rt2.r == 13 || Rt2.r == 15 || Rn.r == 15 || + Rd.r == Rn.r || Rd.r == Rt.r || Rd.r == Rt2.r) { + args << " (UNPREDICTABLE)"; + } break; } } @@ -460,6 +476,9 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) if (op3 == 0) { // op3 is 00, op4 is 01 opcode << "ldrex"; args << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]"; + if (Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || (instr & 0xf00) != 0xf00) { + args << " (UNPREDICTABLE)"; + } } else { // op3 is 01, op4 is 01 // this is one of strexb, strexh or strexd int op5 = (instr >> 4) & 0xf; @@ -471,13 +490,20 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) opcode << "tbh"; break; case 4: - opcode << "ldrexb"; - break; case 5: - opcode << "ldrexh"; + opcode << ((op5 == 4) ? "ldrexb" : "ldrexh"); + args << Rt << ", [" << Rn << "]"; + if (Rt.r == 13 || Rt.r == 15 || Rn.r == 15 || (instr & 0xf0f) != 0xf0f) { + args << " (UNPREDICTABLE)"; + } break; case 7: opcode << "ldrexd"; + args << Rt << ", " << Rd /* Rt2 */ << ", [" << Rn << "]"; + if (Rt.r == 13 || Rt.r == 15 || Rd.r == 13 /* Rt2 */ || Rd.r == 15 /* Rt2 */ || + Rn.r == 15 || (instr & 0x00f) != 0x00f) { + args << " (UNPREDICTABLE)"; + } break; } } @@ -507,15 +533,6 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } } - - if (op3 == 0 && op4 == 0) { // STREX - ArmRegister Rd(instr, 8); - opcode << "strex"; - args << Rd << ", " << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]"; - } else if (op3 == 0 && op4 == 1) { // LDREX - opcode << "ldrex"; - args << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]"; - } } else if ((op2 & 0x60) == 0x20) { // 01x xxxx // Data-processing (shifted register) // |111|1110|0000|0|0000|1111|1100|00|00|0000| |