diff options
Diffstat (limited to 'compiler/dex/quick')
| -rw-r--r-- | compiler/dex/quick/arm/int_arm.cc | 15 | ||||
| -rw-r--r-- | compiler/dex/quick/gen_common.cc | 31 | ||||
| -rw-r--r-- | compiler/dex/quick/gen_invoke.cc | 20 | ||||
| -rw-r--r-- | compiler/dex/quick/local_optimizations.cc | 8 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir-inl.h | 4 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.cc | 7 | ||||
| -rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 10 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 2 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/int_x86.cc | 12 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/target_x86.cc | 48 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 1 |
12 files changed, 140 insertions, 19 deletions
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 1d959fa9de..93d48d6650 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -777,6 +777,9 @@ LIR* ArmMir2Lir::OpDecAndBranch(ConditionCode c_code, int reg, LIR* target) { void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { #if ANDROID_SMP != 0 + // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one. + LIR* barrier = last_lir_insn_; + int dmb_flavor; // TODO: revisit Arm barrier kinds switch (barrier_kind) { @@ -789,8 +792,16 @@ void ArmMir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { dmb_flavor = kSY; // quiet gcc. break; } - LIR* dmb = NewLIR1(kThumb2Dmb, dmb_flavor); - dmb->u.m.def_mask = ENCODE_ALL; + + // If the same barrier already exists, don't generate another. + if (barrier == nullptr + || (barrier != nullptr && (barrier->opcode != kThumb2Dmb || barrier->operands[0] != dmb_flavor))) { + barrier = NewLIR1(kThumb2Dmb, dmb_flavor); + } + + // At this point we must have a memory barrier. Mark it as a scheduling barrier as well. + DCHECK(!barrier->flags.use_def_invalid); + barrier->u.m.def_mask = ENCODE_ALL; #endif } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 8c3a11fb6c..323597729e 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -479,6 +479,7 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, rl_src = LoadValue(rl_src, kAnyReg); } if (field_info.IsVolatile()) { + // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } if (is_long_or_double) { @@ -488,6 +489,7 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, StoreWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_src.reg.GetReg()); } if (field_info.IsVolatile()) { + // A load might follow the volatile store so insert a StoreLoad barrier. GenMemBarrier(kStoreLoad); } if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { @@ -559,9 +561,7 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, } // r_base now holds static storage base RegLocation rl_result = EvalLoc(rl_dest, kAnyReg, true); - if (field_info.IsVolatile()) { - GenMemBarrier(kLoadLoad); - } + if (is_long_or_double) { LoadBaseDispWide(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG); @@ -569,6 +569,14 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, LoadWordDisp(r_base, field_info.FieldOffset().Int32Value(), rl_result.reg.GetReg()); } FreeTemp(r_base); + + if (field_info.IsVolatile()) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + if (is_long_or_double) { StoreValueWide(rl_dest, rl_result); } else { @@ -716,7 +724,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, rl_result.reg.GetHighReg(), rl_obj.s_reg_low); MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); } } else { int reg_ptr = AllocTemp(); @@ -725,7 +736,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, LoadBaseDispWide(reg_ptr, 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG); if (field_info.IsVolatile()) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); } FreeTemp(reg_ptr); } @@ -737,7 +751,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, rl_result.reg.GetReg(), kWord, rl_obj.s_reg_low); MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); } StoreValue(rl_dest, rl_result); } @@ -773,25 +790,29 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size, reg_ptr = AllocTemp(); OpRegRegImm(kOpAdd, reg_ptr, rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value()); if (field_info.IsVolatile()) { + // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } StoreBaseDispWide(reg_ptr, 0, rl_src.reg.GetReg(), rl_src.reg.GetHighReg()); MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { - GenMemBarrier(kLoadLoad); + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); } FreeTemp(reg_ptr); } else { rl_src = LoadValue(rl_src, reg_class); GenNullCheck(rl_obj.reg.GetReg(), opt_flags); if (field_info.IsVolatile()) { + // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } StoreBaseDisp(rl_obj.reg.GetReg(), field_info.FieldOffset().Int32Value(), rl_src.reg.GetReg(), kWord); MarkPossibleNullPointerException(opt_flags); if (field_info.IsVolatile()) { - GenMemBarrier(kLoadLoad); + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); } if (is_object && !mir_graph_->IsConstantNullRef(rl_src)) { MarkGCCard(rl_src.reg.GetReg(), rl_obj.reg.GetReg()); diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index f3c5a34c3d..9e50749a1d 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -1356,18 +1356,27 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, RegLocation rl_src_offset = info->args[2]; // long low rl_src_offset.wide = 0; // ignore high half in info->args[3] RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info); // result reg - if (is_volatile) { - GenMemBarrier(kLoadLoad); - } + RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_long) { OpRegReg(kOpAdd, rl_object.reg.GetReg(), rl_offset.reg.GetReg()); LoadBaseDispWide(rl_object.reg.GetReg(), 0, rl_result.reg.GetReg(), rl_result.reg.GetHighReg(), INVALID_SREG); - StoreValueWide(rl_dest, rl_result); } else { LoadBaseIndexed(rl_object.reg.GetReg(), rl_offset.reg.GetReg(), rl_result.reg.GetReg(), 0, kWord); + } + + if (is_volatile) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. + GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); + } + + if (is_long) { + StoreValueWide(rl_dest, rl_result); + } else { StoreValue(rl_dest, rl_result); } return true; @@ -1385,6 +1394,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, rl_src_offset.wide = 0; // ignore high half in info->args[3] RegLocation rl_src_value = info->args[4]; // value to store if (is_volatile || is_ordered) { + // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); @@ -1401,7 +1411,9 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, // Free up the temp early, to ensure x86 doesn't run out of temporaries in MarkGCCard. FreeTemp(rl_offset.reg.GetReg()); + if (is_volatile) { + // A load might follow the volatile store so insert a StoreLoad barrier. GenMemBarrier(kStoreLoad); } if (is_object) { diff --git a/compiler/dex/quick/local_optimizations.cc b/compiler/dex/quick/local_optimizations.cc index 6df91e674a..dd4af9c15f 100644 --- a/compiler/dex/quick/local_optimizations.cc +++ b/compiler/dex/quick/local_optimizations.cc @@ -92,7 +92,10 @@ void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads. ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) == (REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores. - !(target_flags & (IS_LOAD | IS_STORE))) { + // Skip instructions that are neither loads or stores. + !(target_flags & (IS_LOAD | IS_STORE)) || + // Skip instructions that do both load and store. + ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) { continue; } @@ -293,7 +296,8 @@ void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { /* Skip non-interesting instructions */ if (!(target_flags & IS_LOAD) || (this_lir->flags.is_nop == true) || - ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1))) { + ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || + ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) { continue; } diff --git a/compiler/dex/quick/mir_to_lir-inl.h b/compiler/dex/quick/mir_to_lir-inl.h index 8b1f81d47f..b2362fc549 100644 --- a/compiler/dex/quick/mir_to_lir-inl.h +++ b/compiler/dex/quick/mir_to_lir-inl.h @@ -192,6 +192,10 @@ inline void Mir2Lir::SetupResourceMasks(LIR* lir) { SetupRegMask(&lir->u.m.def_mask, lir->operands[1]); } + if (flags & REG_DEF2) { + SetupRegMask(&lir->u.m.def_mask, lir->operands[2]); + } + if (flags & REG_USE0) { SetupRegMask(&lir->u.m.use_mask, lir->operands[0]); } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 82664e2a90..657c3d98c4 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -138,7 +138,10 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { LoadBaseDisp(reg_obj, data.field_offset, rl_dest.reg.GetReg(), kWord, INVALID_SREG); } if (data.is_volatile) { + // Without context sensitive analysis, we must issue the most conservative barriers. + // In this case, either a load or store may follow so we issue both barriers. GenMemBarrier(kLoadLoad); + GenMemBarrier(kLoadStore); } return true; } @@ -160,6 +163,7 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { int reg_obj = LoadArg(data.object_arg); int reg_src = LoadArg(data.src_arg, wide); if (data.is_volatile) { + // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } if (wide) { @@ -170,7 +174,8 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { StoreBaseDisp(reg_obj, data.field_offset, reg_src, kWord); } if (data.is_volatile) { - GenMemBarrier(kLoadLoad); + // A load might follow the volatile store so insert a StoreLoad barrier. + GenMemBarrier(kStoreLoad); } if (data.op_variant == InlineMethodAnalyser::IPutVariant(Instruction::IPUT_OBJECT)) { MarkGCCard(reg_src, reg_obj); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index 5a1f6cd034..51fe8f1c81 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -56,6 +56,7 @@ typedef uint32_t CodeOffset; // Native code offset in bytes. #define NO_OPERAND (1ULL << kNoOperand) #define REG_DEF0 (1ULL << kRegDef0) #define REG_DEF1 (1ULL << kRegDef1) +#define REG_DEF2 (1ULL << kRegDef2) #define REG_DEFA (1ULL << kRegDefA) #define REG_DEFD (1ULL << kRegDefD) #define REG_DEF_FPCS_LIST0 (1ULL << kRegDefFPCSList0) @@ -953,7 +954,16 @@ class Mir2Lir : public Backend { */ virtual void GenSelect(BasicBlock* bb, MIR* mir) = 0; + /** + * @brief Used to generate a memory barrier in an architecture specific way. + * @details The last generated LIR will be considered for use as barrier. Namely, + * if the last LIR can be updated in a way where it will serve the semantics of + * barrier, then it will be used as such. Otherwise, a new LIR will be generated + * that can keep the semantics. + * @param barrier_kind The kind of memory barrier to generate. + */ virtual void GenMemBarrier(MemBarrierKind barrier_kind) = 0; + virtual void GenMoveException(RegLocation rl_dest) = 0; virtual void GenMultiplyByTwoBitMultiplier(RegLocation rl_src, RegLocation rl_result, int lit, int first_bit, diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 9cafcee58d..e7a1a69b52 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -344,6 +344,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86LockCmpxchgAR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014 | REG_DEFA_USEA | SETS_CCODES, { 0xF0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Lock Cmpxchg", "[!0r+!1r<<!2d+!3d],!4r" }, { kX86LockCmpxchg8bM, kMem, IS_STORE | IS_BINARY_OP | REG_USE0 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1d]" }, { kX86LockCmpxchg8bA, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0 }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" }, + { kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0 }, "Xchg", "[!0r+!1d],!2r" }, EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0), EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0), diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 275a2d934a..f2654b9d74 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -372,6 +372,8 @@ class X86Mir2Lir : public Mir2Lir { void OpVectorRegCopyWide(uint8_t fp_reg, uint8_t low_reg, uint8_t high_reg); void GenConstWide(RegLocation rl_dest, int64_t value); + static bool ProvidesFullMemoryBarrier(X86OpCode opcode); + /* * @brief generate inline code for fast case of Strng.indexOf. * @param info Call parameters diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index c929265668..14278a4abf 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -772,6 +772,11 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { : (SRegOffset(rl_src_offset.s_reg_low) + push_offset)); LoadWordDisp(TargetReg(kSp), srcOffsetSp, rSI); NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0); + + // After a store we need to insert barrier in case of potential load. Since the + // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. + GenMemBarrier(kStoreLoad); + FreeTemp(rSI); UnmarkTemp(rSI); NewLIR1(kX86Pop32R, rSI); @@ -784,9 +789,6 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { FlushReg(r0); LockTemp(r0); - // Release store semantics, get the barrier out of the way. TODO: revisit - GenMemBarrier(kStoreLoad); - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); @@ -801,6 +803,10 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { LoadValueDirect(rl_src_expected, r0); NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg()); + // After a store we need to insert barrier in case of potential load. Since the + // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated. + GenMemBarrier(kStoreLoad); + FreeTemp(r0); } diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 9994927d08..9588b17bcf 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -425,10 +425,54 @@ void X86Mir2Lir::FreeCallTemps() { FreeTemp(rX86_ARG3); } +bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { + switch (opcode) { + case kX86LockCmpxchgMR: + case kX86LockCmpxchgAR: + case kX86LockCmpxchg8bM: + case kX86LockCmpxchg8bA: + case kX86XchgMR: + case kX86Mfence: + // Atomic memory instructions provide full barrier. + return true; + default: + break; + } + + // Conservative if cannot prove it provides full barrier. + return false; +} + void X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { #if ANDROID_SMP != 0 - // TODO: optimize fences - NewLIR0(kX86Mfence); + // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. + LIR* mem_barrier = last_lir_insn_; + + /* + * According to the JSR-133 Cookbook, for x86 only StoreLoad barriers need memory fence. All other barriers + * (LoadLoad, LoadStore, StoreStore) are nops due to the x86 memory model. For those cases, all we need + * to ensure is that there is a scheduling barrier in place. + */ + if (barrier_kind == kStoreLoad) { + // If no LIR exists already that can be used a barrier, then generate an mfence. + if (mem_barrier == nullptr) { + mem_barrier = NewLIR0(kX86Mfence); + } + + // If last instruction does not provide full barrier, then insert an mfence. + if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { + mem_barrier = NewLIR0(kX86Mfence); + } + } + + // Now ensure that a scheduling barrier is in place. + if (mem_barrier == nullptr) { + GenBarrier(); + } else { + // Mark as a scheduling barrier. + DCHECK(!mem_barrier->flags.use_def_invalid); + mem_barrier->u.m.def_mask = ENCODE_ALL; + } #endif } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index abe1b3d947..b72e60d7ac 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -388,6 +388,7 @@ enum X86OpCode { kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR, // compare and exchange kX86LockCmpxchgMR, kX86LockCmpxchgAR, // locked compare and exchange kX86LockCmpxchg8bM, kX86LockCmpxchg8bA, // locked compare and exchange + kX86XchgMR, // exchange memory with register (automatically locked) Binary0fOpCode(kX86Movzx8), // zero-extend 8-bit value Binary0fOpCode(kX86Movzx16), // zero-extend 16-bit value Binary0fOpCode(kX86Movsx8), // sign-extend 8-bit value |