diff options
| -rw-r--r-- | compiler/dex/compiler_enums.h | 10 | ||||
| -rw-r--r-- | compiler/dex/mir_analysis.cc | 3 | ||||
| -rw-r--r-- | compiler/dex/mir_dataflow.cc | 3 | ||||
| -rw-r--r-- | compiler/dex/mir_graph.cc | 4 | ||||
| -rw-r--r-- | compiler/dex/mir_graph.h | 1 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 6 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/codegen_x86.h | 2 | ||||
| -rwxr-xr-x | compiler/dex/quick/x86/int_x86.cc | 4 | ||||
| -rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 6 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/utility_x86.cc | 31 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 11 | ||||
| -rw-r--r-- | disassembler/disassembler_x86.cc | 1 |
12 files changed, 73 insertions, 9 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 47cb4245d7..5263b8de7e 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -234,6 +234,10 @@ enum ExtendedMIROpcode { // @note: All currently reserved vector registers are returned to the temporary pool. kMirOpReturnVectorRegisters, + // @brief Create a memory barrier. + // vA: a constant defined by enum MemBarrierKind. + kMirOpMemBarrier, + kMirOpLast, }; @@ -249,6 +253,7 @@ enum MIROptimizationFlagPositions { kMIRIgnoreSuspendCheck, kMIRDup, kMIRMark, // Temporary node mark. + kMIRStoreNonTemporal, kMIRLastMIRFlag, }; @@ -453,12 +458,15 @@ std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrierafter each volatile load. * -# Use StoreStore barrier after all stores but before return from any constructor whose * class has final fields. + * -# Use NTStoreStore to order non-temporal stores with respect to all later + * store-to-memory instructions. Only generated together with non-temporal stores. */ enum MemBarrierKind { kAnyStore, kLoadAny, kStoreStore, - kAnyAny + kAnyAny, + kNTStoreStore, }; std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 0b05bbea18..a8af92cbe6 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -889,6 +889,9 @@ const uint32_t MIRGraph::analysis_attributes_[kMirOpLast] = { // 129 MirOpReturnVectorRegisters AN_NONE, + + // 130 MirOpMemBarrier + AN_NONE, }; struct MethodStats { diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 55ccf64b90..a964cc720f 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -888,6 +888,9 @@ const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 129 MirOpReturnVectorRegisters 0, + + // 130 MirOpMemBarrier + 0, }; /* Return the base virtual register for a SSA name */ diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 6a20908a2f..49e5c7682c 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -67,6 +67,7 @@ const char* MIRGraph::extended_mir_op_names_[kMirOpLast - kMirOpFirst] = { "PackedSet", "ReserveVectorRegisters", "ReturnVectorRegisters", + "MemBarrier", }; MIRGraph::MIRGraph(CompilationUnit* cu, ArenaAllocator* arena) @@ -941,7 +942,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff mir->next ? " | " : " "); } } else { - fprintf(file, " {%04x %s %s %s %s\\l}%s\\\n", mir->offset, + fprintf(file, " {%04x %s %s %s %s %s\\l}%s\\\n", mir->offset, mir->ssa_rep ? GetDalvikDisassembly(mir) : !MIR::DecodedInstruction::IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) : @@ -949,6 +950,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", (mir->optimization_flags & MIR_IGNORE_SUSPEND_CHECK) != 0 ? " no_suspendcheck" : " ", + (mir->optimization_flags & MIR_STORE_NON_TEMPORAL) != 0 ? " non_temporal" : " ", mir->next ? " | " : " "); } } diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 4e0dfc1061..2943b9d9b3 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -195,6 +195,7 @@ enum OatMethodAttributes { #define MIR_CALLEE (1 << kMIRCallee) #define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck) #define MIR_DUP (1 << kMIRDup) +#define MIR_STORE_NON_TEMPORAL (1 << kMIRStoreNonTemporal) #define BLOCK_NAME_LEN 80 diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 6173163aa6..46f5dd332b 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -188,6 +188,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32MR", "[!0r+!1d],!2r" }, { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Movnti32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0F, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32MR", "[!0r+!1d],!2r" }, + { kX86Movnti32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0F, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti32AR", "[!0r+!1r<<!2d+!3d],!4r" }, { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov32TR", "fs:[!0d],!1r" }, { kX86Mov32RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RR", "!0r,!1r" }, { kX86Mov32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov32RM", "!0r,[!1r+!2d]" }, @@ -203,6 +205,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov64MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { REX_W, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64MR", "[!0r+!1d],!2r" }, { kX86Mov64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { REX_W, 0, 0x89, 0, 0, 0, 0, 0, false }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Movnti64MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0x0F, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64MR", "[!0r+!1d],!2r" }, + { kX86Movnti64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0x0F, 0, 0xC3, 0, 0, 0, 0, 0, false }, "Movnti64AR", "[!0r+!1r<<!2d+!3d],!4r" }, { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0, false }, "Mov64TR", "fs:[!0d],!1r" }, { kX86Mov64RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RR", "!0r,!1r" }, { kX86Mov64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0, false }, "Mov64RM", "!0r,[!1r+!2d]" }, @@ -486,7 +490,9 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, // TODO: load/store? // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly. + { kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" }, { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" }, + { kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" }, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 9c50121b0a..aeeaea24d5 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -408,7 +408,7 @@ class X86Mir2Lir : public Mir2Lir { LIR* LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, RegStorage r_dest, OpSize size); LIR* StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, int displacement, - RegStorage r_src, OpSize size); + RegStorage r_src, OpSize size, int opt_flags = 0); RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num); diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index cc515384d9..d08ea7c3a0 100755 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -2324,9 +2324,9 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) { RegStorage temp = AllocTemp(); OpRegCopy(temp, rl_src.reg); - StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size); + StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size, opt_flags); } else { - StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size); + StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, rl_src.reg, size, opt_flags); } if (card_mark) { // Free rl_index if its a temp. Ensures there are 2 free regs for card mark. diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 604f4bf516..c43a1ff10b 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -597,6 +597,9 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { mem_barrier = NewLIR0(kX86Mfence); ret = true; } + } else if (barrier_kind == kNTStoreStore) { + mem_barrier = NewLIR0(kX86Sfence); + ret = true; } // Now ensure that a scheduling barrier is in place. @@ -1530,6 +1533,9 @@ void X86Mir2Lir::GenMachineSpecificExtendedMethodMIR(BasicBlock* bb, MIR* mir) { case kMirOpPackedSet: GenSetVector(bb, mir); break; + case kMirOpMemBarrier: + GenMemBarrier(static_cast<MemBarrierKind>(mir->dalvikInsn.vA)); + break; default: break; } diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index f159beb640..037dfedfe7 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -779,15 +779,20 @@ LIR* X86Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_ } LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int scale, - int displacement, RegStorage r_src, OpSize size) { + int displacement, RegStorage r_src, OpSize size, + int opt_flags) { LIR *store = NULL; LIR *store2 = NULL; bool is_array = r_index.Valid(); bool pair = r_src.IsPair(); bool is64bit = (size == k64) || (size == kDouble); + bool consider_non_temporal = false; + X86OpCode opcode = kX86Nop; switch (size) { case k64: + consider_non_temporal = true; + // Fall through! case kDouble: if (r_src.IsFloat()) { opcode = is_array ? kX86MovsdAR : kX86MovsdMR; @@ -804,6 +809,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int opcode = is_array ? kX86Mov64AR : kX86Mov64MR; CHECK_EQ(is_array, false); CHECK_EQ(r_src.IsFloat(), false); + consider_non_temporal = true; break; } // else fall-through to k32 case case k32: @@ -815,6 +821,7 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int DCHECK(r_src.IsSingle()); } DCHECK_EQ((displacement & 0x3), 0); + consider_non_temporal = true; break; case kUnsignedHalf: case kSignedHalf: @@ -829,6 +836,28 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int LOG(FATAL) << "Bad case in StoreBaseIndexedDispBody"; } + // Handle non temporal hint here. + if (consider_non_temporal && ((opt_flags & MIR_STORE_NON_TEMPORAL) != 0)) { + switch (opcode) { + // We currently only handle 32/64 bit moves here. + case kX86Mov64AR: + opcode = kX86Movnti64AR; + break; + case kX86Mov64MR: + opcode = kX86Movnti64MR; + break; + case kX86Mov32AR: + opcode = kX86Movnti32AR; + break; + case kX86Mov32MR: + opcode = kX86Movnti32MR; + break; + default: + // Do nothing here. + break; + } + } + if (!is_array) { if (!pair) { store = NewLIR3(opcode, r_base.GetReg(), displacement + LOWORD_OFFSET, r_src.GetReg()); diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 9620cd1296..15fe7e298f 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -440,12 +440,12 @@ enum X86OpCode { kX86Mov16MR, kX86Mov16AR, kX86Mov16TR, kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT, kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI, - kX86Mov32MR, kX86Mov32AR, kX86Mov32TR, + kX86Mov32MR, kX86Mov32AR, kX86Movnti32MR, kX86Movnti32AR, kX86Mov32TR, kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT, kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Lea32RM, kX86Lea32RA, - kX86Mov64MR, kX86Mov64AR, kX86Mov64TR, + kX86Mov64MR, kX86Mov64AR, kX86Movnti64MR, kX86Movnti64AR, kX86Mov64TR, kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT, kX86Mov64RI32, kX86Mov64RI64, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI, kX86Lea64RM, @@ -620,7 +620,12 @@ enum X86OpCode { kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR, // move into reg from xmm kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA, // move 32 bit to 64 bit with sign extension kX86Set8R, kX86Set8M, kX86Set8A, // set byte depending on condition operand - kX86Mfence, // memory barrier + kX86Lfence, // memory barrier to serialize all previous + // load-from-memory instructions + kX86Mfence, // memory barrier to serialize all previous + // load-from-memory and store-to-memory instructions + kX86Sfence, // memory barrier to serialize all previous + // store-to-memory instructions Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply Binary0fOpCode(kX86Imul64), // 64bit multiply diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index 0bf758efb9..7551add41c 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -764,6 +764,7 @@ DISASSEMBLER_ENTRY(cmp, case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break; case 0xBE: opcode << "movsxb"; has_modrm = true; load = true; byte_second_operand = true; rex |= (rex == 0 ? 0 : 0b1000); break; case 0xBF: opcode << "movsxw"; has_modrm = true; load = true; break; + case 0xC3: opcode << "movnti"; store = true; has_modrm = true; break; case 0xC5: if (prefix[2] == 0x66) { opcode << "pextrw"; |